Source code for src.processing.JH_HIT
import pandas as pd
from countrycode.countrycode import countrycode
# hot fix for sys.path issues in test environment
try:
from processing import utils
from processing import check
except Exception as e:
from src.processing import utils
from src.processing import check
[docs]def transform(record: dict, key_ref: dict, country_ref: pd.DataFrame, who_coding: pd.DataFrame, prov_measure_filter: pd.DataFrame):
"""
Apply transformations to JH_HIT records.
Parameters
----------
record : dict
Input record.
key_ref : dict
Reference for key mapping.
country_ref : pd.DataFrame
Reference for WHO accepted country names.
who_coding : pd.DataFrame
Reference for WHO coding.
prov_measure_filter : pd.DataFrame
Reference for filtering by `prov_measure` values.
Returns
-------
dict
Record with transformations applied.
"""
# 1.
if pd.isnull(record['locality']) and pd.isnull(record['usa_county']):
return(None)
# 2. generator function of new record with correct keys (shared)
new_record = utils.generate_blank_record()
# 3. replace data in new record with data from old record using column
# reference (shared)
record = utils.apply_key_map(new_record, record, key_ref)
# 4.
record = apply_prov_measure_filter(record, prov_measure_filter)
# replace with a None - passing decorator
if record is None:
return(None)
# 5. Handle date - infer format (shared)
record = utils.parse_date(record)
# 6. Assign unique ID (shared)
#record = utils.assign_id(record)
# 7. replace non ascii characters (shared)
# 8. replace sensitive country names by ISO (utils)
record = utils.replace_sensitive_regions(record)
# 9. assign ISO code
record['iso'] = countrycode(codes=record['country_territory_area'], origin='country_name', target='iso3c')
# 10. check for missing ISO codes (shared)
check.check_missing_iso(record)
# 11. Join WHO accepted country names (shared)
record = utils.assign_who_country_name(record, country_ref)
# 12. Join who coding from lookup (shared)
record = utils.assign_who_coding(record, who_coding)
# 13. check for missing WHO codes (shared)
check.check_missing_who_code(record)
# 14. replace admin_level values
record = utils.replace_conditional(record, 'admin_level', '', 'unknown')
record = utils.replace_conditional(record, 'admin_level', 'Yes', 'national')
record = utils.replace_conditional(record, 'admin_level', 'No', 'state')
# Replace JH enforcement == 'unknown' with None
record = utils.replace_conditional(record, 'enforcement', 'unknown', None)
# Replace JH targeter values
record = utils.replace_conditional(record, 'targeted', 'geographic subpobulation', None)
record = utils.replace_conditional(record, 'targeted', 'entire population', None)
# 15. fill_not_enough_to_code
record = fill_not_enough_to_code(record)
# 16. replace unknown non_compliance_penalty
record = utils.replace_conditional(record, 'non_compliance_penalty', 'unknown', 'Not Known')
record = utils.remove_tags(record)
record = blank_record_and_url(record)
return(record)
[docs]def blank_record_and_url(record: dict):
"""
Assign who_code == 11 and 'Not enough to code' to records with no `comments` AND no `url`.
Parameters
----------
record : dict
Input record.
Returns
-------
type
Record with coding altered.
"""
if (pd.isna(record['comments'])) and (pd.isna(record['link'])) and (pd.isna(record['alt_link'])):
record['who_code'] = '11'
record['who_category'] = 'Not enough to code'
record['who_subcategory'] = 'Not enough to code'
record['who_measure'] = 'Not enough to code'
return(record)
[docs]def apply_prov_measure_filter(record: dict, prov_measure_filter: pd.DataFrame):
"""
Filter only some `prov_measure` and `prov_category` values.
Only some JH_HIT codings are accepted.
Relies on `prov_measure_filter` defined in `config`.
Parameters
----------
record : dict
Input record.
prov_measure_filter : pd.DataFrame
Config of which codings to drop. Defined in `config` directory.
Returns
-------
type
If coding is included in WHO PHSM dataset, record, else None.
"""
if record['prov_category'] in list(prov_measure_filter['prov_category']) and record['prov_measure'] in list(prov_measure_filter['prov_measure']):
return record
else:
return(None)
[docs]def fill_not_enough_to_code(record: dict):
"""
Function to add "not enough to code" label when comments are blank.
Parameters
----------
record : dict
Input record.
Returns
-------
type
Record with `prov_measure` and `prov_category` values altered conditionally.
"""
if record['comments'] == '' and record['prov_category'] != 'school_closed':
record['prov_measure'] = 'not_enough_to_code'
record['prov_category'] = 'not_enough_to_code'
return(record)