Source code for src.processing.CDC_ITF

import pandas as pd
from countrycode.countrycode import countrycode

# hot fix for sys.path issues in test environment
try:

    from processing import utils
    from processing import check

except Exception as e:

    from src.processing import utils
    from src.processing import check


[docs]def transform(record: dict, key_ref: dict, country_ref: pd.DataFrame, who_coding: pd.DataFrame): """ Apply transformations to CDC_ITF records. Parameters ---------- record : dict Input record. key_ref : dict Reference for key mapping. country_ref : pd.DataFrame Reference for WHO accepted country names. who_coding : pd.DataFrame Reference for WHO coding. Returns ------- dict Record with transformations applied. """ # 1. Join comments in ``Concise Notes`` and ``Notes`` columns comments = join_comments(record) # 2. Create a new blank record new_record = utils.generate_blank_record() # 3. replace data in new record with data from old record using key_ref record = utils.apply_key_map(new_record, record, key_ref) # 4. Assign merged comments to new record record['comments'] = comments # 6. Assign unique ID (shared) #record = utils.assign_id(record) # If area_covered is national, set to blank record = area_covered_national(record) # 5. Handle date formatting record = utils.parse_date(record) # 6. Assign date_end with measure_stage value record = add_date_end(record) # 7. Make manual country name changes record = utils.replace_conditional(record, 'country_territory_area', 'Saint Martin', 'French Saint Martin') record = utils.replace_conditional(record, 'country_territory_area', 'Réunion', 'Reunion') record = utils.replace_conditional(record, 'country_territory_area', 'Curaçao', 'Curacao') record = utils.replace_conditional(record, 'country_territory_area', 'Curaçao', 'Curacao') record = utils.replace_conditional(record, 'country_territory_area', 'Curaçao', 'Curacao') record = utils.replace_conditional(record, 'country_territory_area', 'St. Barts', 'Saint Barthelemy') record = utils.replace_conditional(record, 'country_territory_area', 'Czechia', 'Czech Republic') record = utils.replace_conditional(record, 'country_territory_area', 'D. P. R. of Korea', 'North Korea') record = utils.replace_conditional(record, 'country_territory_area', 'Eswatini', 'Swaziland') record = utils.replace_conditional(record, 'country_territory_area', 'South Korea', 'Korea') record = utils.replace_conditional(record, 'country_territory_area', 'Bonaire, Saint Eustatius and Saba', 'Carribean Netherlands') # 7. Make manual measure_stage name changes record = utils.replace_conditional(record, 'measure_stage', 'Impose', 'new') record = utils.replace_conditional(record, 'measure_stage', 'Lift', 'phase-out') record = utils.replace_conditional(record, 'measure_stage', 'Pause', 'modification') record = utils.replace_conditional(record, 'measure_stage', 'Ease', 'modification') record = utils.replace_conditional(record, 'measure_stage', 'Strengthen', 'modification') # 7. Make manual non_compliance_penalty name changes record = utils.replace_conditional(record, 'non_compliance_penalty', 'Yes', 'not known') record = utils.replace_conditional(record, 'non_compliance_penalty', 'Yes ', 'not known') record = utils.replace_conditional(record, 'non_compliance_penalty', 'yes ', 'not known') record = utils.replace_conditional(record, 'non_compliance_penalty', 'yes', 'not known') record = utils.replace_conditional(record, 'non_compliance_penalty', 'No', None) record = utils.replace_conditional(record, 'non_compliance_penalty', "No'", None) # 8. replace sensitive country names record = utils.replace_sensitive_regions(record) # 9. assign ISO code record['iso'] = countrycode(codes=record['country_territory_area'], origin='country_name', target='iso3c') # 10. check missing ISO check.check_missing_iso(record) # 11. Join WHO accepted country names (shared) record = utils.assign_who_country_name(record, country_ref) # 12. Join who coding from lookup (shared) record = utils.assign_who_coding(record, who_coding) # 13. check for missing WHO codes (shared) check.check_missing_who_code(record) # 14. set all admin_level values to national record = utils.replace_conditional(record, 'admin_level', 'Subnational/regional only', 'other') record = utils.replace_conditional(record, 'admin_level', 'subnational/regional only', 'other') record = utils.replace_conditional(record, 'admin_level', 'National', 'national') # 15. Replace measure_stage extension record = utils.replace_conditional(record, 'measure_stage', 'Extend with same stringency', 'extension') # 16. Add WHO PHSM admin_level values record = utils.add_admin_level(record) record = utils.remove_tags(record, ['comments', 'link', 'alt_link']) return(record)
[docs]def area_covered_national(record: dict): """ Function to remove area_covered == "national" Replace with None. Parameters ---------- record : dict Input record. Returns ------- type Record with `area_covered` changed. """ if record['area_covered'] in ['national']: record['area_covered'] = None return(record)
[docs]def add_date_end(record: dict): """ Function to make ``date_end`` ``date_start`` if ``measure_stage`` is "Lift" Parameters ---------- record : dict Input record. Returns ------- type Record with date_end changed conditionally, or original record. """ if record['measure_stage'] == 'Lift': record['date_end'] = record['date_start'] return(record)
[docs]def join_comments(record: dict): """ Combine comments from "Concise Notes" and "Notes" fields. Both will be stored in `comments` column of output dataset. Parameters ---------- record : dict Input record. Returns ------- type Record with merged comments. """ if type(record['Concise Notes']) != str: record['Concise Notes'] = '' if type(record['Notes']) != str: record['Notes'] = '' comments = record['Concise Notes'] + '. ' + record['Notes'] return(comments)