From c7ca5671f521934a035f0db0be389911c372a548 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 2 May 2021 10:16:13 -0500
Subject: [PATCH 01/46] Initial mdt package structure

---
 src/mdt/__init__.py        | 0
 src/mdt/meps/__init__.py   | 0
 src/mdt/rxnorm/__init__.py | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 src/mdt/__init__.py
 create mode 100644 src/mdt/meps/__init__.py
 create mode 100644 src/mdt/rxnorm/__init__.py

diff --git a/src/mdt/__init__.py b/src/mdt/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/mdt/meps/__init__.py b/src/mdt/meps/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/mdt/rxnorm/__init__.py b/src/mdt/rxnorm/__init__.py
new file mode 100644
index 0000000..e69de29

From b03342fe99f78a352266f65b3bd069043986c136 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 2 May 2021 10:29:21 -0500
Subject: [PATCH 02/46] Initial database module within mdt package

---
 src/mdt/database.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 src/mdt/database.py

diff --git a/src/mdt/database.py b/src/mdt/database.py
new file mode 100644
index 0000000..e69de29

From d723c173a19c44d3af06f33a64a032910d274ce4 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 2 May 2021 10:29:53 -0500
Subject: [PATCH 03/46] Initial synthea module

---
 src/mdt/synthea.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 src/mdt/synthea.py

diff --git a/src/mdt/synthea.py b/src/mdt/synthea.py
new file mode 100644
index 0000000..e69de29

From 19a0a28e7fa62cea92a34f224ffb1830b9e33481 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 2 May 2021 10:53:30 -0500
Subject: [PATCH 04/46] Moved mdt_functions methods into rxclass.py

---
 src/mdt/rxnorm/rxclass.py | 67 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 src/mdt/rxnorm/rxclass.py

diff --git a/src/mdt/rxnorm/rxclass.py b/src/mdt/rxnorm/rxclass.py
new file mode 100644
index 0000000..70f422e
--- /dev/null
+++ b/src/mdt/rxnorm/rxclass.py
@@ -0,0 +1,67 @@
+
+
+def rxclass_findclassesbyid_payload(class_id):
+    """Generates and returns URLs as strings for hitting the RxClass API function FindClassesById."""
+
+    param_dict = {'classId':class_id}
+
+    payload = payload_constructor('https://rxnav.nlm.nih.gov/REST/rxclass/class/byId.json?', param_dict)
+
+    return payload
+
+
+def rxclass_getclassmember_payload(class_id, relation, ttys = ['IN','MIN']):
+    """Generates and returns URLs as strings for hitting the RxClass API function GetClassMembers."""
+
+    relation_dict = {
+        'ATC':"ATC",
+        'has_EPC':"DailyMed",
+        'has_Chemical_Structure':"DailyMed",
+        'has_MoA':"DailyMed",
+        'has_PE':"DailyMed",
+        'has_EPC':"FDASPL",
+        'has_Chemical_Structure':"FDASPL",
+        #'has_MoA':"FDASPL",
+        #'has_PE':"FDASPL",
+        'has_TC': "FMTSME",
+        'CI_with': "MEDRT",
+        'induces': "MEDRT",
+        'may_diagnose': "MEDRT",
+        'may_prevent': "MEDRT",
+        'may_treat': "MEDRT",
+        'CI_ChemClass': "MEDRT",
+        'has_active_metabolites': "MEDRT",
+        'has_Ingredient': "MEDRT",
+        'CI_MoA': "MEDRT",
+        #'has_MoA': "MEDRT",
+        'has_PK': "MEDRT",
+        'site_of_metabolism': "MEDRT",
+        'CI_PE': "MEDRT",
+        #'has_PE': "MEDRT",
+        'has_schedule':'RXNORM',
+        'MESH': "MESH",
+        'isa_disposition': "SNOWMEDCT",
+        'isa_structure': "SNOWMEDCT",
+        'has_VAClass': "VA",
+        'has_VAClass_extended': "VA",
+    }
+
+    if relation not in list(relation_dict.keys()):
+        raise ValueError("results: relation must be one of %r." % list(relation_dict.keys()))
+
+    #If relaSource is VA or RXNORM, specify ttys as one or more of: SCD, SBD, GPCK, BPCK. The default TTYs do not intersect VA or RXNORM classes.
+    if relation_dict.get(relation) in ['VA','RXNORM']:
+        ttys = ttys.extend(['SCD','SBD','GPCK','BPCK'])
+
+
+    param_dict = {'classId':class_id,
+                  'relaSource':relation_dict.get(relation),
+                  'ttys':'+'.join(ttys)}
+
+    #Does not send rela parameter on data sources with single rela, see RxClass API documentation
+    if relation not in ['MESH','ATC']:
+        param_dict['rela'] = relation 
+    
+    payload = payload_constructor('https://rxnav.nlm.nih.gov/REST/rxclass/classMembers.json?', param_dict)
+
+    return payload

From 8336a3e06c812132b6f332476a7b5328bac59e47 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 2 May 2021 10:54:15 -0500
Subject: [PATCH 05/46] Moved mdt_functions into database.py

---
 src/mdt/database.py | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/src/mdt/database.py b/src/mdt/database.py
index e69de29..9ef2370 100644
--- a/src/mdt/database.py
+++ b/src/mdt/database.py
@@ -0,0 +1,41 @@
+
+
+def create_mdt_con():
+    """create defualt connection to the data/MDT.db database. If database does not exist it creates it."""
+    conn = sql.connect('data/MDT.db')
+    return conn
+
+
+def sql_create_table(table_name, df, conn=None, delete_df=True):
+    """Creates a table in the connected database when passed a pandas dataframe. 
+    Note default is to delete dataframe if table name is same as global variable name that stores the df and delete_df is True"""
+
+    if conn == None:
+        conn = create_mdt_con()
+
+    try:
+        df.to_sql(table_name, conn, if_exists='replace',index=False)
+        print('{} table created in DB'.format(table_name))
+    except:
+        print('Could not create table {0} in DB'.format(table_name))
+
+
+def db_query(query_str,conn=None):
+    """Sends Query to DB and returns results as a dataframe"""
+
+    if conn == None:
+       conn = create_mdt_con()
+
+    return pd.read_sql(query_str,conn)
+
+
+def read_sql_string(file_name):
+    """reads the contents of a sql script into a string for python to use in a query"""
+
+    fd = open(file_name, 'r')
+    query_str  = fd.read()
+    fd.close()
+
+    print('Read {0} file as string'.format(file_name))
+
+    return query_str

From 96dee8bada01aab836bd236810be3e8e208dfa0d Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 2 May 2021 10:54:47 -0500
Subject: [PATCH 06/46] Moved mdt_functions into synthea.py

---
 src/mdt/synthea.py | 253 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 253 insertions(+)

diff --git a/src/mdt/synthea.py b/src/mdt/synthea.py
index e69de29..198074a 100644
--- a/src/mdt/synthea.py
+++ b/src/mdt/synthea.py
@@ -0,0 +1,253 @@
+
+
+def read_json(file_name):
+    # Opening JSON file
+    f = open(file_name,)
+
+    # returns JSON object as a dictionary
+    data = json.load(f)
+    return data
+
+
+def age_values(file_name):
+    """reads age_ranges from JSON to create dataframe with age_values"""
+    
+    data = {}
+    data['age'] = read_json('mdt_config.json')['age']
+    data['age_values'] = [list(range(int(age.split('-')[0]), int(age.split('-')[1])+1)) for age in data['age']]
+    df = pd.DataFrame(data)
+    df = df.explode('age_values')
+    return df
+
+
+#TODO: Add option to string search doseage form
+def rxcui_ndc_matcher(rxcui_list):
+    """mashes list of RxCUIs against RxNorm combined table to get matching NDCs. 
+    Select output of return, clipboard, csv....return is default"""
+
+    df = db_query('SELECT * FROM rxcui_ndc')
+    filtered_df = df[df['medication_ingredient_rxcui'].isin(rxcui_list) | df['medication_product_rxcui'].isin(rxcui_list)]
+    
+    print("RXCUI list matched on {0} NDCs".format(filtered_df['medication_ndc'].count()))
+    
+    return filtered_df
+
+
+def output_df(df,output='csv', filename='df_output'):
+    """Outputs a dataframe to a csv of clipboard if you use the output=clipboard arguement"""
+
+
+    if output == 'clipboard':
+        df.to_clipboard(index=False,excel=True)
+    elif output == 'csv':
+        df.to_csv('data/'+filename+'.csv',index=False)
+
+
+def output_json(data, filename='json_output'):
+    with open('data/'+filename+'.json', 'w', encoding='utf-8') as f:
+        json.dump(data, f, ensure_ascii=False, indent=4)
+
+
+def normalize_name(name):
+    #Replace all non-alphanumeric characters with an underscore
+    name = re.sub(r"[^a-zA-Z0-9]", "_", name)
+    #Then, replace all duplicate underscores with just one underscore
+    name = re.sub(r"_{2,}", "_", name)
+    #If there'a an underscore at the end of the word, remove
+    name = re.sub(r"_$", "", name)
+    return name
+
+
+def generate_module(rxcui_ndc_df, rxclass_name):
+    module_dict = {}
+    state_prefix = 'Prescribe_'
+
+
+    rxclass_name = normalize_name(rxclass_name)
+    module_dict['name'] = rxclass_name + ' Medications'
+    module_dict['remarks'] = ['Remarks go here', 'and here.']
+    #NOTE: not sure the difference between 1 and 2... I think 2 is the most recent version(?)
+    module_dict['gmf_version'] = 2
+
+    states_dict = {}
+
+    #Initial state (required)
+    #NOTE: if we change to conditional to check for existence of medication, channge direct_transition to transition
+    states_dict['Initial'] = {
+        'type': 'Initial',
+        'direct_transition': state_prefix + 'Ingredient'
+    }
+
+    #Terminal state (required)
+    states_dict['Terminal'] = {
+        'type': 'Terminal'
+    }
+    
+    #Get tuples of medication_product names and medication_product RXCUIs and loop through to generate MedicationOrders 
+
+    #Read in MEPS Reference table
+    meps_reference_str = read_sql_string('meps_reference.sql')
+    meps_reference = db_query(meps_reference_str)
+
+    #Join MEPS to filtered rxcui_ndc dataframe (rxcui_list)
+    meps_rxcui = meps_reference.astype(str).merge(rxcui_ndc_df.astype(str)[['medication_ingredient_name', 'medication_ingredient_rxcui','medication_product_name', 'medication_product_rxcui', 'medication_ndc']], how = 'inner', left_on = 'RXNDC', right_on = 'medication_ndc')
+
+    #Optional: Age range join - can be customized in the mdt_config.json file
+    #groupby_demographic_variable: must be either an empty list [] or list of patient demographics (e.g., age, gender, state) - based on user inputs in the mdt_config.json file
+
+    data = read_json('mdt_config.json')
+    demographic_distrib_flags = data['demographic_distrib_flags']
+
+    groupby_demographic_variables = []
+    for k, v in demographic_distrib_flags.items():
+        if v == 'Y':
+               groupby_demographic_variables.append(k)  
+        
+    if demographic_distrib_flags['age'] == 'Y':
+        age_ranges = age_values('mdt_config.json')
+        meps_rxcui = meps_rxcui.merge(age_ranges.astype(str), how='inner', left_on='AGELAST', right_on='age_values')
+    #Optional: State-region mapping from MEPS 
+    if demographic_distrib_flags['state'] == 'Y':
+        meps_rxcui = meps_rxcui.merge(meps_region_states.astype(str), how='inner', left_on='region_num', right_on='region_value')
+
+
+    #Clean text to JSON/SQL-friendly format 
+    for col in meps_rxcui[['medication_ingredient_name', 'medication_product_name']]:
+        meps_rxcui[col] = meps_rxcui[col].apply(lambda x: normalize_name(x))
+
+        
+    dcp_dict = {}
+    output = 'csv'
+    medication_ingredient_list = meps_rxcui['medication_ingredient_name'].unique().tolist()
+   
+    #Ingredient Name Distribution (Transition 1)
+
+    """Numerator = ingred_name
+    Denominator = total population [filtered by rxclass_name upstream between rxcui_ndc & rxclass]
+    1. Find distinct count of patients (DUPERSID) = patient_count
+    2. Multiply count of patients * personweight = weighted_patient_count
+    3. Add the weighted_patient_counts, segmented by ingredient_name + selected patient demographics = patients_by_demographics (Numerator) 
+    4. Add the patients_by_demographics from Step 3 = weighted_patient_count_total (Denominator) -- Taking SUM of SUMs to make the Denominator = 100%  
+    5. Calculate percentage (Output from Step 3/Output from Step 4) -- format as 0.0-1.0 per Synthea requirements. 
+    6. Add the 'prescribe_' prefix to the medication_ingredient_name (e.g., 'prescribe_fluticasone') 
+    7. Pivot the dataframe to transpose medication_ingredient_names from rows to columns """
+
+    filename = rxclass_name + '_ingredient_distrib'
+    #1
+    dcp_dict['patient_count_ingredient'] = meps_rxcui[['medication_ingredient_name',  'medication_ingredient_rxcui', 'person_weight', 'DUPERSID']+groupby_demographic_variables].groupby(['medication_ingredient_name',  'medication_ingredient_rxcui', 'person_weight']+groupby_demographic_variables)['DUPERSID'].nunique()
+    dcp_df = pd.DataFrame(dcp_dict['patient_count_ingredient']).reset_index()
+    #2
+    dcp_df['weighted_patient_count_ingredient'] = dcp_df['person_weight'].astype(float)*dcp_df['DUPERSID']
+    #3
+    dcp_dict['patients_by_demographics_ingredient'] = dcp_df.groupby(['medication_ingredient_name']+groupby_demographic_variables)['weighted_patient_count_ingredient'].sum()
+    dcp_demographic_df = pd.DataFrame(dcp_dict['patients_by_demographics_ingredient']).reset_index()
+    #4
+    if len(groupby_demographic_variables) > 0:
+        dcp_demographictotal_df = pd.merge(dcp_demographic_df,  dcp_demographic_df.groupby(groupby_demographic_variables)['weighted_patient_count_ingredient'].sum(), how = 'inner', left_on = groupby_demographic_variables, right_index=True, suffixes = ('_demographic', '_total'))
+    else:
+        dcp_demographictotal_df = dcp_demographic_df
+        dcp_demographictotal_df['weighted_patient_count_ingredient_demographic'] = dcp_demographic_df['weighted_patient_count_ingredient']
+        dcp_demographictotal_df['weighted_patient_count_ingredient_total'] = dcp_demographic_df['weighted_patient_count_ingredient'].sum()
+    #5
+    dcp_demographictotal_df['percent_ingredient_patients'] = round(dcp_demographictotal_df['weighted_patient_count_ingredient_demographic']/dcp_demographictotal_df['weighted_patient_count_ingredient_total'], 3)
+    #6 TODO: change this column to medication_product_state_name(?)
+    dcp_demographictotal_df['medication_ingredient_name'] = dcp_demographictotal_df['medication_ingredient_name'].apply(lambda x: normalize_name(state_prefix + x))
+    #Generate ingredient table transition
+    lookup_table_transition = []
+    lookup_table_name = filename + '.' + output
+    module_medication_ingredient_name_list = dcp_demographictotal_df['medication_ingredient_name'].unique().tolist()
+    for idx, transition in enumerate(module_medication_ingredient_name_list):
+        lookup_table_transition.append({
+            'transition': transition,
+            'default_probability': '1' if idx == 0 else '0',
+            'lookup_table_name': lookup_table_name
+        })
+    state_name = state_prefix + 'Ingredient'
+    states_dict[state_name] = {
+        'type': 'Simple',
+        'name': state_name,
+        'lookup_table_transition': lookup_table_transition
+    }
+    #7
+    dcp_dict['percent_ingredient_patients'] = dcp_demographictotal_df
+    if len(groupby_demographic_variables) > 0:
+        dcp_dict['percent_ingredient_patients'] = dcp_dict['percent_ingredient_patients'].reset_index().pivot(index= groupby_demographic_variables, columns = 'medication_ingredient_name', values='percent_ingredient_patients').reset_index()
+    else:
+        dcp_dict['percent_ingredient_patients'] = dcp_dict['percent_ingredient_patients'][['medication_ingredient_name', 'percent_ingredient_patients']].set_index('medication_ingredient_name').T
+        
+    #Fill NULLs and save as CSV
+    dcp_dict['percent_ingredient_patients'].fillna(0, inplace=True)
+    output_df(dcp_dict['percent_ingredient_patients'], output=output, filename=filename)
+
+    #Product Name Distribution (Transition 2)
+    """Numerator = product_name 
+    Denominator = ingred_name
+    Loop through all the ingredient_names to create product distributions by ingredient name
+    Same steps as above for Ingredient Name Distribution (1-7), but first filter medication_product_names for only those that have the same medication_ingredient_name (Step 0) """
+
+    for ingred_name in medication_ingredient_list:
+        filename = rxclass_name + '_product_' + ingred_name + '_distrib'
+        #0
+        meps_rxcui_ingred = meps_rxcui[meps_rxcui['medication_ingredient_name']==ingred_name][['medication_product_name',  'medication_product_rxcui', 'medication_ingredient_name',  'medication_ingredient_rxcui', 'person_weight', 'DUPERSID']+groupby_demographic_variables]
+        #1
+        dcp_dict['patient_count_product'] = meps_rxcui_ingred.groupby(['medication_product_name',  'medication_product_rxcui',  'medication_ingredient_name',  'medication_ingredient_rxcui', 'person_weight']+groupby_demographic_variables)['DUPERSID'].nunique()
+        dcp_df = pd.DataFrame(dcp_dict['patient_count_product']).reset_index()
+        #2
+        dcp_df['weighted_patient_count_product'] = dcp_df['person_weight'].astype(float)*dcp_df['DUPERSID']
+        #3
+        dcp_dict['patients_by_demographics_product'] = dcp_df.groupby(['medication_product_name', 'medication_ingredient_name']+groupby_demographic_variables)['weighted_patient_count_product'].sum()
+        dcp_demographic_df = pd.DataFrame(dcp_dict['patients_by_demographics_product']).reset_index()
+        #4
+        dcp_demographictotal_df = pd.merge(dcp_demographic_df,  dcp_demographic_df.groupby(['medication_ingredient_name']+groupby_demographic_variables)['weighted_patient_count_product'].sum(), how = 'inner', left_on = ['medication_ingredient_name']+groupby_demographic_variables, right_index=True, suffixes = ('_demographic', '_total'))
+        #5
+        dcp_demographictotal_df['percent_product_patients'] = round(dcp_demographictotal_df['weighted_patient_count_product_demographic']/dcp_demographictotal_df['weighted_patient_count_product_total'], 3)
+        #6 TODO: change this column to medication_product_state_name or medication_product_transition_name(?)
+        dcp_demographictotal_df['medication_product_name'] = dcp_demographictotal_df['medication_product_name'].apply(lambda x: normalize_name(state_prefix + x))
+        #Generate product table transition
+        lookup_table_transition = []
+        lookup_table_name = filename + '.' + output
+        module_medication_product_name_list = dcp_demographictotal_df['medication_product_name'].unique().tolist()
+        for idx, transition in enumerate(module_medication_product_name_list):
+            lookup_table_transition.append({
+                'transition': transition,
+                'default_probability': '1' if idx == 1 else '0',
+                'lookup_table_name': lookup_table_name
+            })
+        state_name = state_prefix + ingred_name
+        states_dict[state_name] = {
+            'type': 'Simple',
+            'name': state_name,
+            'lookup_table_transition': lookup_table_transition
+        }
+        #7
+        dcp_dict['percent_product_patients'] = dcp_demographictotal_df
+        if len(groupby_demographic_variables) > 0:
+            dcp_dict['percent_product_patients'] = dcp_dict['percent_product_patients'].reset_index().pivot(index= groupby_demographic_variables, columns = 'medication_product_name', values='percent_product_patients').reset_index()
+        else:
+            dcp_dict['percent_product_patients'] = dcp_dict['percent_product_patients'][['medication_product_name', 'percent_product_patients']].set_index('medication_product_name').T
+        
+        #Fill NULLs and save as CSV 
+        dcp_dict['percent_product_patients'].fillna(0, inplace=True)
+        output_df(dcp_dict['percent_product_patients'], output=output, filename=filename)
+
+    #Generate MedicationOrder states
+    medication_products = list(meps_rxcui[['medication_product_name', 'medication_product_rxcui']].to_records(index=False))
+    for (medication_product_name, medication_product_rxcui) in medication_products:
+        state_name = normalize_name(state_prefix + medication_product_name)
+        attribute = normalize_name(rxclass_name + '_prescription')
+        codes = {
+            'system': 'RxNorm',
+            'code': medication_product_rxcui,
+            'display': medication_product_name
+        }
+        states_dict[state_name] = {
+            'type': 'MedicationOrder',
+            'assign_to_attribute': attribute,
+            'codes': [ codes ],
+            'direct_transition': 'Terminal',
+            'name': state_name
+        }
+
+    module_dict['states'] = states_dict
+    
+    output_json(module_dict)

From dc90fd12913a2fac8b95299945cae4fc8f68c718 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 2 May 2021 10:56:06 -0500
Subject: [PATCH 07/46] Moved mdt_functions into utils.py

---
 src/mdt/rxnorm/utils.py | 47 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 src/mdt/rxnorm/utils.py

diff --git a/src/mdt/rxnorm/utils.py b/src/mdt/rxnorm/utils.py
new file mode 100644
index 0000000..da45a2a
--- /dev/null
+++ b/src/mdt/rxnorm/utils.py
@@ -0,0 +1,47 @@
+
+
+def json_extract(obj, key):
+    """Recursively fetch values from nested JSON."""
+    arr = []
+
+    def extract(obj, arr, key):
+        """Recursively search for values of key in JSON tree."""
+        if isinstance(obj, dict):
+            for k, v in obj.items():
+                if isinstance(v, (dict, list)):
+                    extract(v, arr, key)
+                elif k == key:
+                    arr.append(v)
+        elif isinstance(obj, list):
+            for item in obj:
+                extract(item, arr, key)
+        return arr
+
+    values = extract(obj, arr, key)
+    print(values)
+    return values
+
+
+def payload_constructor(base_url,params):
+    #TODO: exception handling for params as dict
+
+    params_str = urllib.parse.urlencode(params, safe=':+')
+    payload = {'base_url':base_url,
+                'params':params_str}
+
+    #debug print out
+    print("""Payload built with base URL: {0} and parameters: {1}""".format(base_url,params_str))
+
+    return payload
+
+
+def rxapi_get_requestor(request_dict):
+    """Sends a GET request to either RxNorm or RxClass"""
+    response = requests.get(request_dict['base_url'],params=request_dict['params'])
+
+    #debug print out
+    print("GET Request sent to URL: {0}".format(response.url))
+    print("Response HTTP Code: {0}".format(response.status_code))
+    if response.status_code == 200:
+    #TODO: Add execption handling that can manage 200 responses with no JSON
+        return response.json()

From f2e6f5fc0f77ef92c0e1c172658d6b2231f674f6 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 2 May 2021 11:59:24 -0500
Subject: [PATCH 08/46] Initial setup.py

---
 setup.py | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 setup.py

diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..f88bdfb
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,42 @@
+from setuptools import setup, find_packages
+import pathlib
+
+here = pathlib.Path(__file__).parent.resolve()
+
+# Get the long description from the README file
+long_description = (here / 'README.md').read_text(encoding='utf-8')
+
+setup(
+    name='medicationDiversification',
+    version='1.0.0',
+    # description='A sample Python project',  # Optional
+    # long_description=long_description,  # Optional
+    # long_description_content_type='text/markdown',  # Optional (see note above)
+    # url='https://github.com/pypa/sampleproject',  # Optional
+    # author='A. Random Developer',  # Optional
+    # author_email='author@example.com',  # Optional
+    # keywords='sample, setuptools, development',  # Optional
+    package_dir={'': 'src'},
+    packages=find_packages(where='src'),
+    python_requires='>=3.6, <4',
+    # install_requires=['peppercorn'],  # Optional
+
+    # If there are data files included in your packages that need to be
+    # installed, specify them here.
+    # package_data={  # Optional
+    #    'sample': ['package_data.dat'],
+    #},
+
+    # Although 'package_data' is the preferred approach, in some case you may
+    # need to place data files outside of your packages. See:
+    # http://docs.python.org/distutils/setupscript.html#installing-additional-files
+    #
+    # In this case, 'data_file' will be installed into '<sys.prefix>/my_data'
+    # data_files=[('my_data', ['data/data_file'])],  # Optional
+
+    entry_points={  # Optional
+        'console_scripts': [
+            'mdt=mdt.cli:entry_point',
+        ],
+    },
+)

From 957669f8c07e91f74a1245df43feb3ca4d75f16e Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 2 May 2021 12:00:32 -0500
Subject: [PATCH 09/46] Comment out entrypoint setup in setup.py for now

---
 setup.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/setup.py b/setup.py
index f88bdfb..0f5544d 100644
--- a/setup.py
+++ b/setup.py
@@ -34,9 +34,9 @@
     # In this case, 'data_file' will be installed into '<sys.prefix>/my_data'
     # data_files=[('my_data', ['data/data_file'])],  # Optional
 
-    entry_points={  # Optional
-        'console_scripts': [
-            'mdt=mdt.cli:entry_point',
-        ],
-    },
+    #  entry_points={  # Optional
+    #      'console_scripts': [
+    #          'mdt=mdt.cli:entry_point',
+    #      ],
+    #  },
 )

From 2d122e0ed343fceae55e9451476e6c3823822578 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 2 May 2021 12:01:12 -0500
Subject: [PATCH 10/46] Initial run_mdt.py main script/module

---
 src/mdt/run_mdt.py | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 src/mdt/run_mdt.py

diff --git a/src/mdt/run_mdt.py b/src/mdt/run_mdt.py
new file mode 100644
index 0000000..6c94e05
--- /dev/null
+++ b/src/mdt/run_mdt.py
@@ -0,0 +1,9 @@
+from mdt.database import create_mdt_con
+
+
+def main():
+    conn = create_mdt_con()
+
+
+if __name__ == '__main__':
+    main()

From a20c4fe1ca55d05698edf6fb358f557c1e3815b1 Mon Sep 17 00:00:00 2001
From: Bridg109 <40433162+Bridg109@users.noreply.github.com>
Date: Mon, 3 May 2021 22:03:48 -0500
Subject: [PATCH 11/46] added function to download RxNorm,

---
 src/mdt/rxnorm/utils.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/mdt/rxnorm/utils.py b/src/mdt/rxnorm/utils.py
index da45a2a..a8d49c8 100644
--- a/src/mdt/rxnorm/utils.py
+++ b/src/mdt/rxnorm/utils.py
@@ -1,4 +1,6 @@
-
+from pathlib import Path
+import requests, os
+from typing import Callable
 
 def json_extract(obj, key):
     """Recursively fetch values from nested JSON."""
@@ -45,3 +47,16 @@ def rxapi_get_requestor(request_dict):
     if response.status_code == 200:
     #TODO: Add execption handling that can manage 200 responses with no JSON
         return response.json()
+
+
+def get_dataset(
+        dest: os.PathLike = Path.cwd(),
+        handler: Callable[[any], None] = None
+):
+    url = f'https://download.nlm.nih.gov/rxnorm/RxNorm_full_prescribe_current.zip'
+    response = requests.get(url)
+    if handler:
+        return handler(response.content)
+    (dest / url.split('/')[-1]).write_bytes(response.content)
+    return response
+

From 16bd740301ae91b032aacc42e449319a1b7965cf Mon Sep 17 00:00:00 2001
From: Bridg109 <40433162+Bridg109@users.noreply.github.com>
Date: Mon, 3 May 2021 22:04:59 -0500
Subject: [PATCH 12/46] adds to download and load RxNorm, Pathlib use

---
 src/mdt/database.py | 42 ++++++++++++++++++++++++++++++++++++++++--
 src/mdt/run_mdt.py  |  4 ++--
 2 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/src/mdt/database.py b/src/mdt/database.py
index 9ef2370..7e0e6b2 100644
--- a/src/mdt/database.py
+++ b/src/mdt/database.py
@@ -1,12 +1,27 @@
+from .rxnorm.utils import get_dataset
+
+from pathlib import Path
+import zipfile,io, sqlite3
+import pandas as pd
+
+
+def to_data():
+    """creates paths to data folder, making directory if not present"""
+    path = Path.cwd() / 'data'
+    try:
+        path.mkdir(exist_ok=False)
+    except:
+        pass
+    return path
 
 
 def create_mdt_con():
     """create defualt connection to the data/MDT.db database. If database does not exist it creates it."""
-    conn = sql.connect('data/MDT.db')
+    conn = sqlite3.connect(to_data() / 'MDT.db')
     return conn
 
 
-def sql_create_table(table_name, df, conn=None, delete_df=True):
+def sql_create_table(table_name, df, conn=None):
     """Creates a table in the connected database when passed a pandas dataframe. 
     Note default is to delete dataframe if table name is same as global variable name that stores the df and delete_df is True"""
 
@@ -39,3 +54,26 @@ def read_sql_string(file_name):
     print('Read {0} file as string'.format(file_name))
 
     return query_str
+
+
+def load_rxnorm():
+    """downloads and loads RxNorm dataset into database"""
+
+    z = zipfile.ZipFile(get_dataset(handler=io.BytesIO))
+
+    col_names = ['RXCUI','LAT','TS','LUI','STT','SUI','ISPREF','RXAUI','SAUI','SCUI','SDUI','SAB','TTY','CODE','STR','SRL','SUPPRESS','CVF','test']
+    rxnconso = pd.read_csv(z.open('rrf/RXNCONSO.RRF'),sep='|',header=None,dtype=object,names=col_names)
+    sql_create_table('rxnconso',rxnconso)
+    del rxnconso
+
+    col_names = ['RXCUI1','RXAUI1','STYPE1','REL','RXCUI2','RXAUI2','STYPE2','RELA','RUI','SRUI','SAB','SL','DIR','RG','SUPPRESS','CVF','test']
+    rxnrel = pd.read_csv(z.open('rrf/RXNREL.RRF'),sep='|',dtype=object,header=None,names=col_names)
+    sql_create_table('rxnrel',rxnrel)
+    del rxnrel
+
+    col_names = ['RXCUI','LUI','SUI','RXAUI','STYPE','CODE','ATUI','SATUI','ATN','SAB','ATV','SUPPRESS','CVF','test']
+    rxnsat = pd.read_csv(z.open('rrf/RXNSAT.RRF'),sep='|',dtype=object,header=None,names=col_names)
+    sql_create_table('rxnsat',rxnsat)
+    del rxnsat 
+
+    del z
\ No newline at end of file
diff --git a/src/mdt/run_mdt.py b/src/mdt/run_mdt.py
index 6c94e05..096cb81 100644
--- a/src/mdt/run_mdt.py
+++ b/src/mdt/run_mdt.py
@@ -1,8 +1,8 @@
-from mdt.database import create_mdt_con
+from mdt.database import load_rxnorm
 
 
 def main():
-    conn = create_mdt_con()
+    load_rxnorm()
 
 
 if __name__ == '__main__':

From 1af83aac8a482f9586770b704df830b469986566 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Wed, 5 May 2021 08:26:43 -0500
Subject: [PATCH 13/46] ignore .vim, .ds_store and python egg-info

---
 .gitignore | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 5540557..576c65e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,10 +1,14 @@
 data
 __pycache__
 .vscode
+.vim
+.DS_Store
 venv
 .venv
 
+*.egg-info
+
 .ipynb_checkpoints
 */.ipynb_checkpoints/*
 
-*.ipynb
\ No newline at end of file
+*.ipynb

From 49e30ddbf3afb7ae78d6225daaf2fa636383351f Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Fri, 7 May 2021 08:00:32 -0500
Subject: [PATCH 14/46] Meps utils module with get_dataset function

---
 src/mdt/meps/utils.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 src/mdt/meps/utils.py

diff --git a/src/mdt/meps/utils.py b/src/mdt/meps/utils.py
new file mode 100644
index 0000000..70e24c5
--- /dev/null
+++ b/src/mdt/meps/utils.py
@@ -0,0 +1,27 @@
+import os
+from pathlib import Path
+from typing import Callable
+import requests
+
+
+def get_dataset(
+        dat_name: str,
+        dest: os.PathLike = Path.cwd(),
+        handler: Callable[[any], None] = None
+):
+    """Get a MEPS Dataset given a dat name + extension
+
+    Args:
+        dat_name (str): MEPS dat file name, ie: h206adat.zip
+        dest (Path): Destination path to save file, defaults to CWD
+        hander (func, optional): Function to bypass CWD save
+    """
+    url = f'https://www.meps.ahrq.gov/mepsweb/data_files/pufs/{dat_name}'
+    response = requests.get(url)
+
+    if handler:
+        return handler(response)
+
+    (dest / url.split('/')[-1]).write_bytes(response.content)
+
+    return response

From 548342eed05e9fc89cde63b4077004b20272ee28 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Fri, 7 May 2021 08:28:18 -0500
Subject: [PATCH 15/46] Move meps_lists vars into new meps module columns.py

---
 src/mdt/meps/columns.py | 383 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 383 insertions(+)
 create mode 100644 src/mdt/meps/columns.py

diff --git a/src/mdt/meps/columns.py b/src/mdt/meps/columns.py
new file mode 100644
index 0000000..e372f31
--- /dev/null
+++ b/src/mdt/meps/columns.py
@@ -0,0 +1,383 @@
+import pandas as pd 
+
+#Source: https://www.meps.ahrq.gov/survey_comp/hc_technical_notes.shtml
+meps_region_states = pd.DataFrame({'region_value': [1, 2, 3, 4], 
+              'region_label': ['Northeast', 'Midwest', 'South', 'West'],
+              'state': [['Connecticut', 'Maine', 'Massachusetts', 'New Hampshire', 'New Jersey',
+                        'New York', 'Pennsylvania', 'Rhode Island', 'Vermont'],
+                        ['Indiana', 'Illinois', 'Iowa', 'Kansas', 'Michigan', 'Minnesota', 'Missouri',
+                        'Nebraska', 'North Dakota', 'Ohio', 'South Dakota', 'Wisconsin'],
+                        ['Alabama', 'Arkansas', 'Delaware', 'District of Columbia', 'Florida',
+                        'Georgia', 'Kentucky', 'Louisiana', 'Maryland', 'Mississippi', 'North Carolina', 'Oklahoma', 'South Carolina', 'Tennessee', 'Texas', 'Virginia',
+                        'West Virginia'],
+                        ['Alaska', 'Arizona', 'California', 'Colorado', 'Hawaii', 'Idaho', 'Montana',
+                        'Nevada', 'New Mexico', 'Oregon', 'Utah', 'Washington', 'Wyoming']]
+                }).set_index(['region_value'])['state'].apply(pd.Series).stack().reset_index(level=1, drop=True).reset_index().rename(columns={0:'state'}).astype(str)
+
+
+
+d_col_names=["DUID", "PID", "DUPERSID", "PANEL", "FAMID31", "FAMID42",
+    "FAMID53", "FAMID18", "FAMIDYR", "CPSFAMID", "FCSZ1231",
+    "FCRP1231", "RULETR31", "RULETR42", "RULETR53", "RULETR18",
+    "RUSIZE31", "RUSIZE42", "RUSIZE53", "RUSIZE18", "RUCLAS31",
+    "RUCLAS42", "RUCLAS53", "RUCLAS18", "FAMSZE31", "FAMSZE42",
+    "FAMSZE53", "FAMSZE18", "FMRS1231", "FAMS1231", "FAMSZEYR",
+    "FAMRFPYR", "REGION31", "REGION42", "REGION53", "REGION18",
+    "REFPRS31", "REFPRS42", "REFPRS53", "REFPRS18", "RESP31",
+    "RESP42", "RESP53", "RESP18", "PROXY31", "PROXY42",
+    "PROXY53", "PROXY18", "INTVLANG", "BEGRFM31", "BEGRFY31",
+    "ENDRFM31", "ENDRFY31", "BEGRFM42", "BEGRFY42", "ENDRFM42",
+    "ENDRFY42", "BEGRFM53", "BEGRFY53", "ENDRFM53", "ENDRFY53",
+    "ENDRFM18", "ENDRFY18", "KEYNESS", "INSCOP31", "INSCOP42",
+    "INSCOP53", "INSCOP18", "INSC1231", "INSCOPE", "ELGRND31",
+    "ELGRND42", "ELGRND53", "ELGRND18", "PSTATS31", "PSTATS42",
+    "PSTATS53", "RURSLT31", "RURSLT42", "RURSLT53", "AGE31X",
+    "AGE42X", "AGE53X", "AGE18X", "AGELAST", "DOBMM", "DOBYY",
+    "SEX", "RACEV1X", "RACEV2X", "RACEAX", "RACEBX", "RACEWX",
+    "RACETHX", "HISPANX", "HISPNCAT", "MARRY31X", "MARRY42X",
+    "MARRY53X", "MARRY18X", "SPOUID31", "SPOUID42", "SPOUID53",
+    "SPOUID18", "SPOUIN31", "SPOUIN42", "SPOUIN53", "SPOUIN18",
+    "EDUCYR", "HIDEG", "FTSTU31X", "FTSTU42X", "FTSTU53X",
+    "FTSTU18X", "ACTDTY31", "ACTDTY42", "ACTDTY53", "HONRDC31",
+    "HONRDC42", "REFRL31X", "REFRL42X", "REFRL53X", "REFRL18X",
+    "OTHLANG", "LANGSPK", "HWELLSPE", "OTHLGSPK", "WHTLGSPK",
+    "HWELLSPK", "BORNUSA", "YRSINUS", "MOPID31X", "MOPID42X",
+    "MOPID53X", "DAPID31X", "DAPID42X", "DAPID53X", "RTHLTH31",
+    "RTHLTH42", "RTHLTH53", "MNHLTH31", "MNHLTH42", "MNHLTH53",
+    "HIBPDX", "HIBPAGED", "BPMLDX", "CHDDX", "CHDAGED",
+    "ANGIDX", "ANGIAGED", "MIDX", "MIAGED", "OHRTDX",
+    "OHRTAGED", "OHRTTYPE", "STRKDX", "STRKAGED", "EMPHDX",
+    "EMPHAGED", "CHBRON31", "CHOLDX", "CHOLAGED", "CANCERDX",
+    "CABLADDR", "CABREAST", "CACERVIX", "CACOLON", "CALUNG",
+    "CALYMPH", "CAMELANO", "CAOTHER", "CAPROSTA", "CASKINNM",
+    "CASKINDK", "CAUTERUS", "DIABDX_M18", "DIABAGED",
+    "JTPAIN31_M18", "ARTHDX", "ARTHTYPE", "ARTHAGED", "ASTHDX",
+    "ASTHAGED", "ASSTIL31", "ASATAK31", "ASTHEP31", "ASACUT31",
+    "ASMRCN31", "ASPREV31", "ASDALY31", "ASPKFL31", "ASEVFL31",
+    "ASWNFL31", "ADHDADDX", "ADHDAGED", "IADLHP31", "ADLHLP31",
+    "AIDHLP31", "WLKLIM31", "LFTDIF31", "STPDIF31", "WLKDIF31",
+    "MILDIF31", "STNDIF31", "BENDIF31", "RCHDIF31", "FNGRDF31",
+    "ACTLIM31", "WRKLIM31", "HSELIM31", "SCHLIM31", "UNABLE31",
+    "SOCLIM31", "COGLIM31", "DFHEAR42", "DFSEE42", "DFCOG42",
+    "DFWLKC42", "DFDRSB42", "DFERND42", "ANYLMI18", "CHPMED42",
+    "CHPMHB42", "CHPMCN42", "CHSERV42", "CHSRHB42", "CHSRCN42",
+    "CHLIMI42", "CHLIHB42", "CHLICO42", "CHTHER42", "CHTHHB42",
+    "CHTHCO42", "CHCOUN42", "CHEMPB42", "CSHCN42", "MESHGT42",
+    "WHNHGT42", "MESWGT42", "WHNWGT42", "CHBMIX42", "MESVIS42",
+    "EATHLT42", "WHNEAT42", "PHYSCL42", "WHNPHY42", "SAFEST42",
+    "WHNSAF42", "BOOST42", "WHNBST42", "LAPBLT42", "WHNLAP42",
+    "HELMET42", "WHNHEL42", "NOSMOK42", "WHNSMK42", "TIMALN42",
+    "LSTETH53", "PHYEXE53", "OFTSMK53", "SAQELIG", "ADSEX42",
+    "ADAGE42", "ADPROX42", "ADGENH42", "ADDAYA42", "ADCLIM42",
+    "ADACLS42", "ADWKLM42", "ADEMLS42", "ADMWCF42", "ADPAIN42",
+    "ADPCFL42", "ADENGY42", "ADPRST42", "ADSOCA42", "VPCS42",
+    "VMCS42", "VRFLAG42", "ADNERV42", "ADHOPE42", "ADREST42",
+    "ADSAD42", "ADEFRT42", "ADWRTH42", "K6SUM42", "ADINTR42",
+    "ADDPRS42", "PHQ242", "ADBRTC42", "ADMDVT42", "ADFLST42",
+    "ADWGHD42", "ADBMI42", "ADWTAD42", "ADKALC42", "ADRNK542",
+    "ADRNK442", "ADSTAL42", "ADTBAC42", "ADOFTB42", "ADQTTB42",
+    "ADQTMD42", "ADQTHP42", "ADMOOD42", "ADBPCK42", "ADCHLC42",
+    "ADPNEU42", "ADSHNG42", "ADNOAP42", "ADDSCU42", "ADCOLN42",
+    "ADCLNS42", "ADSGMD42", "ADBLDS42", "ADPROS42", "ADPSAG42",
+    "ADUTRM42", "ADPAP42", "ADPAPG42", "ADOSTP42", "ADBNDN42",
+    "ADBRST42", "ADMMGR42", "ADCMPM42", "ADCMPY42", "ADLANG42",
+    "VSAQELIG", "VACTDY53", "VAPRHT53", "VACOPD53", "VADERM53",
+    "VAGERD53", "VAHRLS53", "VABACK53", "VAJTPN53", "VARTHR53",
+    "VAGOUT53", "VANECK53", "VATMD53", "VAPTSD53", "VALCOH53",
+    "VABIPL53", "VADEPR53", "VAMOOD53", "VAPROS53", "VARHAB53",
+    "VAMNHC53", "VAGCNS53", "VARXMD53", "VACRGV53", "VAMOBL53",
+    "VACOST53", "VARECM53", "VAREP53", "VAWAIT53", "VALOCT53",
+    "VANTWK53", "VANEED53", "VAOUT53", "VAPAST53", "VACOMP53",
+    "VAMREC53", "VAGTRC53", "VACARC53", "VAPROB53", "VACARE53",
+    "VAPACT53", "VAPCPR53", "VAPROV53", "VAPCOT53", "VAPCCO53",
+    "VAPCRC53", "VAPCSN53", "VAPCRF53", "VAPCSO53", "VAPCOU53",
+    "VAPCUN53", "VASPCL53", "VASPMH53", "VASPOU53", "VASPUN53",
+    "VACMPM53", "VACMPY53", "VAPROX53", "DCSELIG", "DSDIA53",
+    "DSA1C53", "DSFT1953", "DSFT1853", "DSFT1753", "DSFB1753",
+    "DSFTNV53", "DSEY1953", "DSEY1853", "DSEY1753", "DSEB1753",
+    "DSEYNV53", "DSCH1953", "DSCH1853", "DSCH1753", "DSCB1753",
+    "DSCHNV53", "DSFL1953", "DSFL1853", "DSFL1753", "DSVB1753",
+    "DSFLNV53", "DSKIDN53", "DSEYPR53", "DSDIET53", "DSMED53",
+    "DSINSU53", "DSCPCP53", "DSCNPC53", "DSCPHN53", "DSCINT53",
+    "DSCGRP53", "DSCONF53", "DSPRX53", "DDNWRK18", "OTHDYS18",
+    "OTHNDD18", "ACCELI42", "HAVEUS42", "PRACTP42",
+    "YNOUSC42_M18", "PROVTY42_M18", "PLCTYP42", "TMTKUS42",
+    "TYPEPE42", "LOCATN42", "HSPLAP42", "WHITPR42", "BLCKPR42",
+    "ASIANP42", "NATAMP42", "PACISP42", "OTHRCP42", "GENDRP42",
+    "PHNREG42", "OFFHOU42", "AFTHOU42", "TREATM42", "DECIDE42",
+    "EXPLOP42", "PRVSPK42", "DLAYCA42", "AFRDCA42", "DLAYDN42",
+    "AFRDDN42", "DLAYPM42", "AFRDPM42", "EMPST31", "EMPST42",
+    "EMPST53", "RNDFLG31", "MORJOB31", "MORJOB42", "MORJOB53",
+    "EVRWRK", "HRWG31X", "HRWG42X", "HRWG53X", "HRWGIM31",
+    "HRWGIM42", "HRWGIM53", "HRHOW31", "HRHOW42", "HRHOW53",
+    "DIFFWG31", "DIFFWG42", "DIFFWG53", "NHRWG31", "NHRWG42",
+    "NHRWG53", "HOUR31", "HOUR42", "HOUR53", "TEMPJB31",
+    "TEMPJB42", "TEMPJB53", "SSNLJB31", "SSNLJB42", "SSNLJB53",
+    "SELFCM31", "SELFCM42", "SELFCM53", "DISVW31X", "DISVW42X",
+    "DISVW53X", "CHOIC31", "CHOIC42", "CHOIC53", "INDCAT31",
+    "INDCAT42", "INDCAT53", "NUMEMP31", "NUMEMP42", "NUMEMP53",
+    "MORE31", "MORE42", "MORE53", "UNION31", "UNION42",
+    "UNION53", "NWK31", "NWK42", "NWK53", "CHGJ3142",
+    "CHGJ4253", "YCHJ3142", "YCHJ4253", "STJBMM31", "STJBYY31",
+    "STJBMM42", "STJBYY42", "STJBMM53", "STJBYY53", "EVRETIRE",
+    "OCCCAT31", "OCCCAT42", "OCCCAT53", "PAYVAC31", "PAYVAC42",
+    "PAYVAC53", "SICPAY31", "SICPAY42", "SICPAY53", "PAYDR31",
+    "PAYDR42", "PAYDR53", "RETPLN31", "RETPLN42", "RETPLN53",
+    "BSNTY31", "BSNTY42", "BSNTY53", "JOBORG31", "JOBORG42",
+    "JOBORG53", "HELD31X", "HELD42X", "HELD53X", "OFFER31X",
+    "OFFER42X", "OFFER53X", "OFREMP31", "OFREMP42", "OFREMP53",
+    "EMPST31H", "EMPST42H", "EMPST53H", "SLFCM31H", "SLFCM42H",
+    "SLFCM53H", "NMEMP31H", "NMEMP42H", "NMEMP53H", "MORE31H",
+    "MORE42H", "MORE53H", "INDCT31H", "INDCT42H", "INDCT53H",
+    "OCCCT31H", "OCCCT42H", "OCCCT53H", "HOUR31H", "HOUR42H",
+    "HOUR53H", "JBORG31H", "JBORG42H", "JBORG53H", "UNION31H",
+    "UNION42H", "UNION53H", "BSNTY31H", "BSNTY42H", "BSNTY53H",
+    "HRWG31H", "HRWG42H", "HRWG53H", "CMJHLD31", "CMJHLD42",
+    "CMJHLD53", "OFFER31H", "OFFER42H", "OFFER53H", "OFEMP31H",
+    "OFEMP42H", "OFEMP53H", "PYVAC31H", "PYVAC42H", "PYVAC53H",
+    "SCPAY31H", "SCPAY42H", "SCPAY53H", "PAYDR31H", "PAYDR42H",
+    "PAYDR53H", "RTPLN31H", "RTPLN42H", "RTPLN53H", "FILEDR18",
+    "WILFIL18", "FLSTAT18", "FILER18", "JTINRU18", "JNTPID18",
+    "TAXFRM18", "FOODST18", "FOODMN18", "FOODVL18", "TTLP18X",
+    "FAMINC18", "POVCAT18", "POVLEV18", "WAGEP18X", "WAGIMP18",
+    "BUSNP18X", "BUSIMP18", "UNEMP18X", "UNEIMP18", "WCMPP18X",
+    "WCPIMP18", "INTRP18X", "INTIMP18", "DIVDP18X", "DIVIMP18",
+    "SALEP18X", "SALIMP18", "PENSP18X", "PENIMP18", "SSECP18X",
+    "SSCIMP18", "TRSTP18X", "TRTIMP18", "VETSP18X", "VETIMP18",
+    "IRASP18X", "IRAIMP18", "ALIMP18X", "ALIIMP18", "CHLDP18X",
+    "CHLIMP18", "CASHP18X", "CSHIMP18", "SSIP18X", "SSIIMP18",
+    "PUBP18X", "PUBIMP18", "OTHRP18X", "OTHIMP18", "HIEUIDX",
+    "TRIJA18X", "TRIFE18X", "TRIMA18X", "TRIAP18X", "TRIMY18X",
+    "TRIJU18X", "TRIJL18X", "TRIAU18X", "TRISE18X", "TRIOC18X",
+    "TRINO18X", "TRIDE18X", "MCRJA18", "MCRFE18", "MCRMA18",
+    "MCRAP18", "MCRMY18", "MCRJU18", "MCRJL18", "MCRAU18",
+    "MCRSE18", "MCROC18", "MCRNO18", "MCRDE18", "MCRJA18X",
+    "MCRFE18X", "MCRMA18X", "MCRAP18X", "MCRMY18X", "MCRJU18X",
+    "MCRJL18X", "MCRAU18X", "MCRSE18X", "MCROC18X", "MCRNO18X",
+    "MCRDE18X", "MCDJA18", "MCDFE18", "MCDMA18", "MCDAP18",
+    "MCDMY18", "MCDJU18", "MCDJL18", "MCDAU18", "MCDSE18",
+    "MCDOC18", "MCDNO18", "MCDDE18", "MCDJA18X", "MCDFE18X",
+    "MCDMA18X", "MCDAP18X", "MCDMY18X", "MCDJU18X", "MCDJL18X",
+    "MCDAU18X", "MCDSE18X", "MCDOC18X", "MCDNO18X", "MCDDE18X",
+    "GVAJA18", "GVAFE18", "GVAMA18", "GVAAP18", "GVAMY18",
+    "GVAJU18", "GVAJL18", "GVAAU18", "GVASE18", "GVAOC18",
+    "GVANO18", "GVADE18", "GVBJA18", "GVBFE18", "GVBMA18",
+    "GVBAP18", "GVBMY18", "GVBJU18", "GVBJL18", "GVBAU18",
+    "GVBSE18", "GVBOC18", "GVBNO18", "GVBDE18", "GVCJA18",
+    "GVCFE18", "GVCMA18", "GVCAP18", "GVCMY18", "GVCJU18",
+    "GVCJL18", "GVCAU18", "GVCSE18", "GVCOC18", "GVCNO18",
+    "GVCDE18", "VAPJA18", "VAPFE18", "VAPMA18", "VAPAP18",
+    "VAPMY18", "VAPJU18", "VAPJL18", "VAPAU18", "VAPSE18",
+    "VAPOC18", "VAPNO18", "VAPDE18", "IHSJA18", "IHSFE18",
+    "IHSMA18", "IHSAP18", "IHSMY18", "IHSJU18", "IHSJL18",
+    "IHSAU18", "IHSSE18", "IHSOC18", "IHSNO18", "IHSDE18",
+    "PUBJA18X", "PUBFE18X", "PUBMA18X", "PUBAP18X", "PUBMY18X",
+    "PUBJU18X", "PUBJL18X", "PUBAU18X", "PUBSE18X", "PUBOC18X",
+    "PUBNO18X", "PUBDE18X", "PEGJA18", "PEGFE18", "PEGMA18",
+    "PEGAP18", "PEGMY18", "PEGJU18", "PEGJL18", "PEGAU18",
+    "PEGSE18", "PEGOC18", "PEGNO18", "PEGDE18", "PDKJA18",
+    "PDKFE18", "PDKMA18", "PDKAP18", "PDKMY18", "PDKJU18",
+    "PDKJL18", "PDKAU18", "PDKSE18", "PDKOC18", "PDKNO18",
+    "PDKDE18", "PNGJA18", "PNGFE18", "PNGMA18", "PNGAP18",
+    "PNGMY18", "PNGJU18", "PNGJL18", "PNGAU18", "PNGSE18",
+    "PNGOC18", "PNGNO18", "PNGDE18", "POGJA18", "POGFE18",
+    "POGMA18", "POGAP18", "POGMY18", "POGJU18", "POGJL18",
+    "POGAU18", "POGSE18", "POGOC18", "POGNO18", "POGDE18",
+    "POEJA18", "POEFE18", "POEMA18", "POEAP18", "POEMY18",
+    "POEJU18", "POEJL18", "POEAU18", "POESE18", "POEOC18",
+    "POENO18", "POEDE18", "PNEJA18", "PNEFE18", "PNEMA18",
+    "PNEAP18", "PNEMY18", "PNEJU18", "PNEJL18", "PNEAU18",
+    "PNESE18", "PNEOC18", "PNENO18", "PNEDE18", "PRXJA18",
+    "PRXFE18", "PRXMA18", "PRXAP18", "PRXMY18", "PRXJU18",
+    "PRXJL18", "PRXAU18", "PRXSE18", "PRXOC18", "PRXNO18",
+    "PRXDE18", "PRIJA18", "PRIFE18", "PRIMA18", "PRIAP18",
+    "PRIMY18", "PRIJU18", "PRIJL18", "PRIAU18", "PRISE18",
+    "PRIOC18", "PRINO18", "PRIDE18", "HPEJA18", "HPEFE18",
+    "HPEMA18", "HPEAP18", "HPEMY18", "HPEJU18", "HPEJL18",
+    "HPEAU18", "HPESE18", "HPEOC18", "HPENO18", "HPEDE18",
+    "HPDJA18", "HPDFE18", "HPDMA18", "HPDAP18", "HPDMY18",
+    "HPDJU18", "HPDJL18", "HPDAU18", "HPDSE18", "HPDOC18",
+    "HPDNO18", "HPDDE18", "HPNJA18", "HPNFE18", "HPNMA18",
+    "HPNAP18", "HPNMY18", "HPNJU18", "HPNJL18", "HPNAU18",
+    "HPNSE18", "HPNOC18", "HPNNO18", "HPNDE18", "HPOJA18",
+    "HPOFE18", "HPOMA18", "HPOAP18", "HPOMY18", "HPOJU18",
+    "HPOJL18", "HPOAU18", "HPOSE18", "HPOOC18", "HPONO18",
+    "HPODE18", "HPXJA18", "HPXFE18", "HPXMA18", "HPXAP18",
+    "HPXMY18", "HPXJU18", "HPXJL18", "HPXAU18", "HPXSE18",
+    "HPXOC18", "HPXNO18", "HPXDE18", "HPRJA18", "HPRFE18",
+    "HPRMA18", "HPRAP18", "HPRMY18", "HPRJU18", "HPRJL18",
+    "HPRAU18", "HPRSE18", "HPROC18", "HPRNO18", "HPRDE18",
+    "INSJA18X", "INSFE18X", "INSMA18X", "INSAP18X", "INSMY18X",
+    "INSJU18X", "INSJL18X", "INSAU18X", "INSSE18X", "INSOC18X",
+    "INSNO18X", "INSDE18X", "PRVEV18", "TRIEV18", "MCREV18",
+    "MCDEV18", "VAEV18", "GVAEV18", "GVBEV18", "GVCEV18",
+    "UNINS18", "INSCOV18", "INSURC18", "TRIST31X", "TRIST42X",
+    "TRIST18X", "TRIPR31X", "TRIPR42X", "TRIPR18X", "TRIEX31X",
+    "TRIEX42X", "TRIEX18X", "TRILI31X", "TRILI42X", "TRILI18X",
+    "TRICH31X", "TRICH42X", "TRICH18X", "MCRPD31", "MCRPD42",
+    "MCRPD18", "MCRPD31X", "MCRPD42X", "MCRPD18X", "MCRPB31",
+    "MCRPB42", "MCRPB18", "MCRPHO31", "MCRPHO42", "MCRPHO18",
+    "MCDHMO31", "MCDHMO42", "MCDHMO18", "MCDMC31", "MCDMC42",
+    "MCDMC18", "PRVHMO31", "PRVHMO42", "PRVHMO18", "FSAGT31",
+    "HASFSA31", "PFSAMT31", "PREVCOVR", "MORECOVR", "TRICR31X",
+    "TRICR42X", "TRICR53X", "TRICR18X", "TRIAT31X", "TRIAT42X",
+    "TRIAT53X", "TRIAT18X", "MCAID31", "MCAID42", "MCAID53",
+    "MCAID18", "MCAID31X", "MCAID42X", "MCAID53X", "MCAID18X",
+    "MCARE31", "MCARE42", "MCARE53", "MCARE18", "MCARE31X",
+    "MCARE42X", "MCARE53X", "MCARE18X", "MCDAT31X", "MCDAT42X",
+    "MCDAT53X", "MCDAT18X", "GOVTA31", "GOVTA42", "GOVTA53",
+    "GOVTA18", "GOVAAT31", "GOVAAT42", "GOVAAT53", "GOVAAT18",
+    "GOVTB31", "GOVTB42", "GOVTB53", "GOVTB18", "GOVBAT31",
+    "GOVBAT42", "GOVBAT53", "GOVBAT18", "GOVTC31", "GOVTC42",
+    "GOVTC53", "GOVTC18", "GOVCAT31", "GOVCAT42", "GOVCAT53",
+    "GOVCAT18", "VAPROG31", "VAPROG42", "VAPROG53", "VAPROG18",
+    "VAPRAT31", "VAPRAT42", "VAPRAT53", "VAPRAT18", "IHS31",
+    "IHS42", "IHS53", "IHS18", "IHSAT31", "IHSAT42", "IHSAT53",
+    "IHSAT18", "PRIDK31", "PRIDK42", "PRIDK53", "PRIDK18",
+    "PRIEU31", "PRIEU42", "PRIEU53", "PRIEU18", "PRING31",
+    "PRING42", "PRING53", "PRING18", "PRIOG31", "PRIOG42",
+    "PRIOG53", "PRIOG18", "PRINEO31", "PRINEO42", "PRINEO53",
+    "PRINEO18", "PRIEUO31", "PRIEUO42", "PRIEUO53", "PRIEUO18",
+    "PRSTX31", "PRSTX42", "PRSTX53", "PRSTX18", "PRIV31",
+    "PRIV42", "PRIV53", "PRIV18", "PRIVAT31", "PRIVAT42",
+    "PRIVAT53", "PRIVAT18", "PUB31X", "PUB42X", "PUB53X",
+    "PUB18X", "PUBAT31X", "PUBAT42X", "PUBAT53X", "PUBAT18X",
+    "VERFLG31", "VERFLG42", "VERFLG18", "INS31X", "INS42X",
+    "INS53X", "INS18X", "INSAT31X", "INSAT42X", "INSAT53X",
+    "INSAT18X", "DENTIN31", "DENTIN42", "DENTIN53", "DNTINS31",
+    "DNTINS18", "PMEDIN31", "PMEDIN42", "PMEDIN53", "PMDINS31",
+    "PMDINS18", "PROBPY42", "CRFMPY42", "PYUNBL42", "PMEDUP31",
+    "PMEDUP42", "PMEDUP53", "PMEDPY31", "PMEDPY42", "PMEDPY53",
+    "TOTTCH18", "TOTEXP18", "TOTSLF18", "TOTMCR18", "TOTMCD18",
+    "TOTPRV18", "TOTVA18", "TOTTRI18", "TOTOFD18", "TOTSTL18",
+    "TOTWCP18", "TOTOPR18", "TOTOPU18", "TOTOSR18", "TOTPTR18",
+    "TOTOTH18", "OBTOTV18", "OBVTCH18", "OBVEXP18", "OBVSLF18",
+    "OBVMCR18", "OBVMCD18", "OBVPRV18", "OBVVA18", "OBVTRI18",
+    "OBVOFD18", "OBVSTL18", "OBVWCP18", "OBVOPR18", "OBVOPU18",
+    "OBVOSR18", "OBVPTR18", "OBVOTH18", "OBDRV18", "OBDTCH18",
+    "OBDEXP18", "OBDSLF18", "OBDMCR18", "OBDMCD18", "OBDPRV18",
+    "OBDVA18", "OBDTRI18", "OBDOFD18", "OBDSTL18", "OBDWCP18",
+    "OBDOPR18", "OBDOPU18", "OBDOSR18", "OBDPTR18", "OBDOTH18",
+    "OPTOTV18", "OPTTCH18", "OPTEXP18", "OPTSLF18", "OPTMCR18",
+    "OPTMCD18", "OPTPRV18", "OPTVA18", "OPTTRI18", "OPTOFD18",
+    "OPTSTL18", "OPTWCP18", "OPTOPR18", "OPTOPU18", "OPTOSR18",
+    "OPTPTR18", "OPTOTH18", "OPFTCH18", "OPFEXP18", "OPFSLF18",
+    "OPFMCR18", "OPFMCD18", "OPFPRV18", "OPFVA18", "OPFTRI18",
+    "OPFOFD18", "OPFSTL18", "OPFWCP18", "OPFOPR18", "OPFOPU18",
+    "OPFOSR18", "OPFPTR18", "OPFOTH18", "OPDEXP18", "OPDTCH18",
+    "OPDSLF18", "OPDMCR18", "OPDMCD18", "OPDPRV18", "OPDVA18",
+    "OPDTRI18", "OPDOFD18", "OPDSTL18", "OPDWCP18", "OPDOPR18",
+    "OPDOPU18", "OPDOSR18", "OPDPTR18", "OPDOTH18", "OPDRV18",
+    "OPVTCH18", "OPVEXP18", "OPVSLF18", "OPVMCR18", "OPVMCD18",
+    "OPVPRV18", "OPVVA18", "OPVTRI18", "OPVOFD18", "OPVSTL18",
+    "OPVWCP18", "OPVOPR18", "OPVOPU18", "OPVOSR18", "OPVPTR18",
+    "OPVOTH18", "OPSEXP18", "OPSTCH18", "OPSSLF18", "OPSMCR18",
+    "OPSMCD18", "OPSPRV18", "OPSVA18", "OPSTRI18", "OPSOFD18",
+    "OPSSTL18", "OPSWCP18", "OPSOPR18", "OPSOPU18", "OPSOSR18",
+    "OPSPTR18", "OPSOTH18", "ERTOT18", "ERTTCH18", "ERTEXP18",
+    "ERTSLF18", "ERTMCR18", "ERTMCD18", "ERTPRV18", "ERTVA18",
+    "ERTTRI18", "ERTOFD18", "ERTSTL18", "ERTWCP18", "ERTOPR18",
+    "ERTOPU18", "ERTOSR18", "ERTPTR18", "ERTOTH18", "ERFTCH18",
+    "ERFEXP18", "ERFSLF18", "ERFMCR18", "ERFMCD18", "ERFPRV18",
+    "ERFVA18", "ERFTRI18", "ERFOFD18", "ERFSTL18", "ERFWCP18",
+    "ERFOPR18", "ERFOPU18", "ERFOSR18", "ERFPTR18", "ERFOTH18",
+    "ERDEXP18", "ERDTCH18", "ERDSLF18", "ERDMCR18", "ERDMCD18",
+    "ERDPRV18", "ERDVA18", "ERDTRI18", "ERDOFD18", "ERDSTL18",
+    "ERDWCP18", "ERDOPR18", "ERDOPU18", "ERDOSR18", "ERDPTR18",
+    "ERDOTH18", "IPDIS18", "IPTEXP18", "IPTTCH18", "IPTSLF18",
+    "IPTMCR18", "IPTMCD18", "IPTPRV18", "IPTVA18", "IPTTRI18",
+    "IPTOFD18", "IPTSTL18", "IPTWCP18", "IPTOPR18", "IPTOPU18",
+    "IPTOSR18", "IPTPTR18", "IPTOTH18", "IPFEXP18", "IPFTCH18",
+    "IPFSLF18", "IPFMCR18", "IPFMCD18", "IPFPRV18", "IPFVA18",
+    "IPFTRI18", "IPFOFD18", "IPFSTL18", "IPFWCP18", "IPFOPR18",
+    "IPFOPU18", "IPFOSR18", "IPFPTR18", "IPFOTH18", "IPDEXP18",
+    "IPDTCH18", "IPDSLF18", "IPDMCR18", "IPDMCD18", "IPDPRV18",
+    "IPDVA18", "IPDTRI18", "IPDOFD18", "IPDSTL18", "IPDWCP18",
+    "IPDOPR18", "IPDOPU18", "IPDOSR18", "IPDPTR18", "IPDOTH18",
+    "IPNGTD18", "DVTOT18", "DVTTCH18", "DVTEXP18", "DVTSLF18",
+    "DVTMCR18", "DVTMCD18", "DVTPRV18", "DVTVA18", "DVTTRI18",
+    "DVTOFD18", "DVTSTL18", "DVTWCP18", "DVTOPR18", "DVTOPU18",
+    "DVTOSR18", "DVTPTR18", "DVTOTH18", "HHTOTD18", "HHAGD18",
+    "HHATCH18", "HHAEXP18", "HHASLF18", "HHAMCR18", "HHAMCD18",
+    "HHAPRV18", "HHAVA18", "HHATRI18", "HHAOFD18", "HHASTL18",
+    "HHAWCP18", "HHAOPR18", "HHAOPU18", "HHAOSR18", "HHAPTR18",
+    "HHAOTH18", "HHINDD18", "HHNTCH18", "HHNEXP18", "HHNSLF18",
+    "HHNMCD18", "HHNMCR18", "HHNPRV18", "HHNVA18", "HHNTRI18",
+    "HHNOFD18", "HHNSTL18", "HHNWCP18", "HHNOPR18", "HHNOPU18",
+    "HHNOSR18", "HHNPTR18", "HHNOTH18", "HHINFD18", "VISEXP18",
+    "VISTCH18", "VISSLF18", "VISMCR18", "VISMCD18", "VISPRV18",
+    "VISVA18", "VISTRI18", "VISOFD18", "VISSTL18", "VISWCP18",
+    "VISOPR18", "VISOPU18", "VISOSR18", "VISPTR18", "VISOTH18",
+    "OTHTCH18", "OTHEXP18", "OTHSLF18", "OTHMCR18", "OTHMCD18",
+    "OTHPRV18", "OTHVA18", "OTHTRI18", "OTHOFD18", "OTHSTL18",
+    "OTHWCP18", "OTHOPR18", "OTHOPU18", "OTHOSR18", "OTHPTR18",
+    "OTHOTH18", "RXTOT18", "RXEXP18", "RXSLF18", "RXMCR18",
+    "RXMCD18", "RXPRV18", "RXVA18", "RXTRI18", "RXOFD18",
+    "RXSTL18", "RXWCP18", "RXOPR18", "RXOPU18", "RXOSR18",
+    "RXPTR18", "RXOTH18", "PERWT18F", "FAMWT18F", "FAMWT18C",
+    "SAQWT18F", "DIABW18F", "VSAQW18F", "VARSTR", "VARPSU"]
+
+d_col_spaces = [(0,7),
+(7,10),
+(10,20),
+(20,22),
+(22,24),
+(24,26),
+(26,28),
+(28,30),
+(30,32),
+(32,34),
+(34,36),
+(36,38),
+(38,40),
+(40,42),
+(42,44),
+(44,47),
+(47,49),
+(49,51),
+(51,53),
+(53,55),
+(55,57),
+(57,59),
+(59,61),
+(61,62),
+(62,64),
+(64,66),
+(66,68),
+(68,70),
+(70,72),
+(72,74),
+(74,76),
+(76,77),
+(77,79),
+(79,81),
+(81,83),
+(83,85),
+(85,88),
+(88,91),
+(91,94),
+(94,97),
+(97,98),
+(98,99),
+(99,100),
+(100,101),
+(101,103),(103,105),(105,107),(107,108),(108,110),(110,112),(112,116),(116,118),(118,122),(122,124),(124,128),(128,130),(130,134),(134,136),(136,140),(140,142),(142,146),(146,148),(148,152),(152,153),(153,154),(154,155),(155,156),(156,157),(157,158),(158,159),(159,160),(160,161),(161,162),(162,163),(163,165),(165,167),(167,169),(169,171),(171,173),(173,175),(175,177),(177,179),(179,181),(181,183),(183,185),(185,187),(187,191),(191,192),(192,193),(193,195),(195,196),(196,197),(197,198),(198,199),(199,200),(200,201),(201,203),(203,205),(205,207),(207,209),(209,212),(212,215),(215,218),(218,221),(221,224),(224,227),(227,230),(230,233),(233,236),(236,239),(239,241),(241,243),(243,245),(245,247),(247,249),(249,251),(251,253),(253,256),(256,259),(259,261),(261,263),(263,265),(265,267),(267,270),(270,272),(272,275),(275,278),(278,280),(280,282),(282,285),(285,287),(287,290),(290,293),(293,296),(296,299),(299,302),(302,305),(305,307),(307,309),(309,311),(311,313),(313,315),(315,317),(317,320),(320,322),(322,324),(324,327),(327,329),(329,332),(332,334),(334,337),(337,339),(339,342),(342,344),(344,346),(346,349),(349,351),(351,354),(354,356),(356,358),(358,361),(361,363),(363,366),(366,368),(368,370),(370,372),(372,374),(374,376),(376,378),(378,380),(380,382),(382,384),(384,386),(386,388),(388,390),(390,393),(393,395),(395,397),(397,400),(400,402),(402,405),(405,407),(407,409),(409,412),(412,415),(415,417),(417,420),(420,422),(422,425),(425,427),(427,430),(430,432),(432,434),(434,437),(437,439),(439,441),(441,443),(443,445),(445,447),(447,449),(449,451),(451,453),(453,455),(455,457),(457,459),(459,461),(461,463),(463,465),(465,467),(467,469),(469,471),(471,473),(473,475),(475,477),(477,479),(479,481),(481,483),(483,485),(485,487),(487,489),(489,492),(492,495),(495,497),(497,499),(499,502),(502,504),(504,506),(506,509),(509,511),(511,513),(513,516),(516,518),(518,520),(520,523),(523,525),(525,527),(527,530),(530,532),(532,535),(535,537),(537,542),(542,545),(545,548),(548,550),(550,553),(553,555),(555,557),(557,559),(559,562),(562,564),(564,567),(567,569),(569,572),(572,574),(574,577),(577,579),(579,582),(582,584),(584,586),(586,588),(588,589),(589,592),(592,595),(595,598),(598,601),(601,604),(604,607),(607,610),(610,613),(613,616),(616,619),(619,622),(622,625),(625,628),(628,631),(631,634),(634,640),(640,646),(646,648),(648,651),(651,654),(654,657),(657,660),(660,663),(663,666),(666,669),(669,672),(672,675),(675,678),(678,681),(681,684),(684,687),(687,690),(690,695),(695,698),(698,701),(701,704),(704,707),(707,710),(710,713),(713,716),(716,719),(719,722),(722,725),(725,728),(728,731),(731,734),(734,737),(737,740),(740,743),(743,746),(746,749),(749,752),(752,755),(755,758),(758,761),(761,764),(764,767),(767,770),(770,773),(773,776),(776,779),(779,782),(782,785),(785,788),(788,792),(792,794),(794,795),(795,797),(797,800),(800,803),(803,806),(806,809),(809,812),(812,815),(815,818),(818,821),(821,824),(824,827),(827,830),(830,833),(833,836),(836,839),(839,842),(842,845),(845,848),(848,851),(851,854),(854,857),(857,860),(860,863),(863,866),(866,869),(869,872),(872,875),(875,878),(878,881),(881,884),(884,887),(887,890),(890,893),(893,896),(896,899),(899,902),(902,905),(905,908),(908,911),(911,914),(914,917),(917,920),(920,923),(923,926),(926,929),(929,931),(931,934),(934,937),(937,940),(940,942),(942,945),(945,948),(948,951),(951,954),(954,957),(957,961),(961,964),(964,965),(965,967),(967,970),(970,973),(973,976),(976,979),(979,982),(982,985),(985,988),(988,991),(991,994),(994,997),(997,1000),
+(1000,1003),(1003,1006),(1006,1009),(1009,1012),(1012,1015),(1015,1018),(1018,1021),(1021,1024),(1024,1027),(1027,1030),(1030,1033),(1033,1036),(1036,1039),(1039,1042),(1042,1045),(1045,1047),(1047,1049),(1049,1051),(1051,1053),(1053,1055),(1055,1058),(1058,1061),(1061,1064),(1064,1067),(1067,1070),(1070,1072),(1072,1074),(1074,1076),(1076,1078),(1078,1080),(1080,1082),(1082,1084),(1084,1086),(1086,1088),(1088,1090),(1090,1092),(1092,1094),(1094,1096),(1096,1098),(1098,1100),(1100,1102),(1102,1104),(1104,1106),(1106,1108),(1108,1110),(1110,1112),(1112,1114),(1114,1116),(1116,1118),(1118,1120),(1120,1122),(1122,1124),(1124,1126),(1126,1128),(1128,1130),(1130,1133),(1133,1136),(1136,1139),(1139,1141),(1141,1144),(1144,1147),(1147,1150),(1150,1153),(1153,1159),(1159,1165),(1165,1171),(1171,1172),(1172,1173),(1173,1174),(1174,1177),(1177,1180),(1180,1183),(1183,1186),(1186,1189),(1189,1192),(1192,1198),(1198,1204),(1204,1210),(1210,1213),(1213,1216),(1216,1219),(1219,1222),(1222,1225),(1225,1228),(1228,1231),(1231,1234),(1234,1237),(1237,1240),(1240,1243),(1243,1246),(1246,1249),(1249,1252),(1252,1255),(1255,1258),(1258,1261),(1261,1264),(1264,1267),(1267,1270),(1270,1273),(1273,1276),(1276,1279),(1279,1282),(1282,1285),(1285,1288),(1288,1291),(1291,1294),(1294,1297),(1297,1300),(1300,1303),(1303,1305),(1305,1307),(1307,1310),(1310,1313),(1313,1316),(1316,1319),(1319,1322),(1322,1326),(1326,1329),(1329,1333),(1333,1336),(1336,1340),(1340,1343),(1343,1346),(1346,1349),(1349,1352),(1352,1355),(1355,1358),(1358,1361),(1361,1364),(1364,1367),(1367,1370),(1370,1373),(1373,1376),(1376,1379),(1379,1382),(1382,1385),(1385,1388),(1388,1391),(1391,1394),(1394,1397),(1397,1400),(1400,1403),(1403,1406),(1406,1409),(1409,1412),(1412,1415),(1415,1418),(1418,1421),(1421,1424),(1424,1427),(1427,1430),(1430,1433),(1433,1435),(1435,1437),(1437,1439),(1439,1441),(1441,1443),(1443,1445),(1445,1448),(1448,1451),(1451,1454),(1454,1456),(1456,1458),(1458,1460),(1460,1462),(1462,1464),(1464,1466),(1466,1468),(1468,1470),(1470,1472),(1472,1475),(1475,1478),(1478,1481),(1481,1483),(1483,1485),(1485,1487),(1487,1489),(1489,1491),(1491,1493),(1493,1495),(1495,1497),(1497,1499),(1499,1505),(1505,1511),(1511,1517),(1517,1519),(1519,1521),(1521,1523),(1523,1525),(1525,1527),(1527,1529),(1529,1531),(1531,1533),(1533,1535),(1535,1537),(1537,1539),(1539,1541),(1541,1543),(1543,1545),(1545,1547),(1547,1549),(1549,1551),(1551,1553),(1553,1555),(1555,1557),(1557,1559),(1559,1561),(1561,1563),(1563,1565),(1565,1567),(1567,1569),(1569,1572),(1572,1574),(1574,1576),(1576,1578),(1578,1582),(1582,1589),(1589,1596),(1596,1597),(1597,1609),(1609,1615),(1615,1616),(1616,1622),(1622,1623),(1623,1628),(1628,1629),(1629,1634),(1634,1635),(1635,1640),(1640,1641),(1641,1646),(1646,1647),(1647,1654),(1654,1655),(1655,1660),(1660,1661),(1661,1666),(1666,1667),(1667,1674),(1674,1675),(1675,1680),(1680,1681),(1681,1686),(1686,1687),(1687,1692),(1692,1693),(1693,1698),(1698,1699),(1699,1704),(1704,1705),(1705,1710),(1710,1711),(1711,1716),(1716,1717),(1717,1723),(1723,1724),(1724,1733),(1733,1735),(1735,1737),(1737,1739),(1739,1741),(1741,1743),(1743,1745),(1745,1747),(1747,1749),(1749,1751),(1751,1753),(1753,1755),(1755,1757),(1757,1759),(1759,1761),(1761,1763),(1763,1765),(1765,1767),(1767,1769),(1769,1771),(1771,1773),(1773,1775),(1775,1777),(1777,1779),(1779,1781),(1781,1783),(1783,1785),(1785,1787),(1787,1789),(1789,1791),(1791,1793),(1793,1795),(1795,1797),(1797,1799),(1799,1801),(1801,1803),(1803,1805),(1805,1807),(1807,1809),(1809,1811),(1811,1813),(1813,1815),(1815,1817),(1817,1819),(1819,1821),(1821,1823),(1823,1825),(1825,1827),(1827,1829),(1829,1831),(1831,1833),(1833,1835),(1835,1837),(1837,1839),(1839,1841),(1841,1843),(1843,1845),(1845,1847),(1847,1849),(1849,1851),(1851,1853),(1853,1855),(1855,1857),(1857,1859),(1859,1861),(1861,1863),(1863,1865),(1865,1867),(1867,1869),(1869,1871),(1871,1873),(1873,1875),(1875,1877),(1877,1879),(1879,1881),(1881,1883),(1883,1885),(1885,1887),(1887,1889),(1889,1891),(1891,1893),(1893,1895),(1895,1897),(1897,1899),(1899,1901),(1901,1903),(1903,1905),(1905,1907),(1907,1909),(1909,1911),(1911,1913),(1913,1915),(1915,1917),(1917,1919),(1919,1921),(1921,1923),(1923,1925),(1925,1927),(1927,1929),(1929,1931),(1931,1933),(1933,1935),(1935,1937),(1937,1939),(1939,1941),(1941,1943),(1943,1945),(1945,1947),(1947,1949),(1949,1951),(1951,1953),(1953,1955),(1955,1957),(1957,1959),(1959,1961),(1961,1963),(1963,1965),(1965,1967),(1967,1969),(1969,1971),(1971,1973),(1973,1975),(1975,1977),(1977,1979),(1979,1981),(1981,1983),(1983,1985),(1985,1987),(1987,1989),(1989,1991),(1991,1993),(1993,1995),(1995,1997),(1997,1999),(1999,2001),(2001,2003),(2003,2005),(2005,2007),(2007,2009),(2009,2011),(2011,2013),(2013,2015),(2015,2017),(2017,2019),(2019,2021),(2021,2023),(2023,2025),(2025,2027),(2027,2029),(2029,2031),(2031,2033),(2033,2035),(2035,2037),(2037,2039),(2039,2041),(2041,2043),(2043,2045),(2045,2047),(2047,2049),(2049,2051),(2051,2053),(2053,2055),(2055,2057),(2057,2059),(2059,2061),(2061,2063),(2063,2065),(2065,2067),(2067,2069),(2069,2071),(2071,2073),(2073,2075),(2075,2077),(2077,2079),(2079,2081),(2081,2083),(2083,2085),(2085,2087),(2087,2089),(2089,2091),(2091,2093),(2093,2095),(2095,2097),(2097,2099),(2099,2101),(2101,2103),(2103,2105),(2105,2107),(2107,2109),(2109,2111),(2111,2113),(2113,2115),(2115,2117),(2117,2119),(2119,2121),(2121,2123),(2123,2125),(2125,2127),(2127,2129),(2129,2131),(2131,2133),(2133,2135),(2135,2137),(2137,2139),(2139,2141),(2141,2143),(2143,2145),(2145,2147),(2147,2149),(2149,2151),(2151,2153),(2153,2155),(2155,2157),(2157,2159),(2159,2161),(2161,2163),(2163,2165),(2165,2167),(2167,2169),(2169,2171),(2171,2173),(2173,2175),(2175,2177),(2177,2179),(2179,2181),(2181,2183),(2183,2185),(2185,2187),(2187,2189),(2189,2191),(2191,2193),(2193,2195),(2195,2197),(2197,2199),(2199,2201),(2201,2203),(2203,2205),(2205,2207),(2207,2209),(2209,2211),(2211,2213),(2213,2215),(2215,2217),(2217,2219),(2219,2221),(2221,2223),(2223,2225),(2225,2227),(2227,2229),(2229,2231),(2231,2233),(2233,2235),(2235,2237),(2237,2239),(2239,2241),(2241,2243),(2243,2245),(2245,2247),(2247,2249),(2249,2251),(2251,2253),(2253,2255),(2255,2257),(2257,2259),(2259,2261),(2261,2263),(2263,2265),(2265,2267),(2267,2269),(2269,2271),(2271,2273),(2273,2275),(2275,2277),(2277,2279),(2279,2281),(2281,2283),(2283,2285),(2285,2287),(2287,2289),(2289,2291),(2291,2293),(2293,2295),(2295,2297),(2297,2299),(2299,2301),(2301,2303),(2303,2305),(2305,2307),(2307,2309),(2309,2311),(2311,2313),(2313,2315),(2315,2317),(2317,2319),(2319,2321),(2321,2323),(2323,2325),(2325,2327),(2327,2329),(2329,2331),(2331,2333),(2333,2335),(2335,2337),(2337,2339),(2339,2341),(2341,2343),(2343,2345),(2345,2347),(2347,2349),(2349,2351),(2351,2353),(2353,2355),(2355,2357),(2357,2358),(2358,2359),(2359,2360),(2360,2361),(2361,2362),(2362,2363),(2363,2364),(2364,2365),(2365,2366),(2366,2367),(2367,2368),(2368,2370),(2370,2372),(2372,2374),(2374,2376),(2376,2378),(2378,2380),(2380,2382),(2382,2384),(2384,2386),(2386,2388),(2388,2390),(2390,2392),(2392,2394),(2394,2396),(2396,2398),(2398,2401),(2401,2404),(2404,2407),(2407,2410),(2410,2413),(2413,2416),(2416,2419),(2419,2422),(2422,2425),(2425,2428),(2428,2431),(2431,2434),(2434,2437),(2437,2440),(2440,2443),(2443,2446),(2446,2449),(2449,2452),(2452,2455),(2455,2458),(2458,2461),(2461,2463),(2463,2466),(2466,2470),(2470,2472),(2472,2474),(2474,2476),(2476,2478),(2478,2480),(2480,2482),(2482,2484),(2484,2486),(2486,2488),(2488,2490),(2490,2492),(2492,2494),(2494,2496),(2496,2498),(2498,2500),(2500,2502),(2502,2504),(2504,2506),(2506,2508),(2508,2510),(2510,2512),(2512,2514),(2514,2516),(2516,2518),(2518,2520),(2520,2522),(2522,2524),(2524,2526),(2526,2528),(2528,2530),(2530,2532),(2532,2534),(2534,2536),(2536,2538),(2538,2540),(2540,2542),(2542,2544),(2544,2546),(2546,2548),(2548,2550),(2550,2552),(2552,2554),(2554,2556),(2556,2558),(2558,2560),(2560,2562),(2562,2564),(2564,2566),(2566,2568),(2568,2570),(2570,2572),(2572,2574),(2574,2576),(2576,2578),(2578,2580),(2580,2582),(2582,2584),(2584,2586),(2586,2588),(2588,2590),(2590,2592),(2592,2594),(2594,2596),(2596,2598),(2598,2600),(2600,2602),(2602,2604),(2604,2606),(2606,2608),(2608,2610),(2610,2612),(2612,2614),(2614,2616),(2616,2618),(2618,2620),(2620,2622),(2622,2624),(2624,2626),(2626,2628),(2628,2630),(2630,2632),(2632,2634),(2634,2636),(2636,2638),(2638,2640),(2640,2642),(2642,2644),(2644,2646),(2646,2648),(2648,2650),(2650,2652),(2652,2654),(2654,2656),(2656,2658),(2658,2660),(2660,2662),(2662,2664),(2664,2666),(2666,2668),(2668,2670),(2670,2672),(2672,2674),(2674,2676),(2676,2678),(2678,2680),(2680,2682),(2682,2684),(2684,2686),(2686,2688),(2688,2690),(2690,2692),(2692,2694),(2694,2696),(2696,2698),(2698,2700),(2700,2702),(2702,2704),(2704,2706),(2706,2708),(2708,2710),(2710,2712),(2712,2714),(2714,2716),(2716,2718),(2718,2720),(2720,2722),(2722,2724),(2724,2726),(2726,2728),(2728,2730),(2730,2732),(2732,2734),(2734,2736),(2736,2738),(2738,2740),(2740,2742),(2742,2744),(2744,2746),(2746,2748),(2748,2750),(2750,2752),(2752,2755),(2755,2758),(2758,2761),(2761,2768),(2768,2774),(2774,2780),(2780,2786),(2786,2792),(2792,2798),(2798,2804),(2804,2810),(2810,2815),(2815,2820),(2820,2825),(2825,2831),(2831,2836),(2836,2842),(2842,2848),(2848,2854),(2854,2857),(2857,2864),(2864,2870),(2870,2876),(2876,2882),(2882,2887),(2887,2893),(2893,2898),(2898,2903),(2903,2908),(2908,2912),(2912,2917),(2917,2922),(2922,2926),(2926,2931),(2931,2937),(2937,2942),(2942,2945),(2945,2952),(2952,2958),(2958,2964),(2964,2970),(2970,2975),(2975,2981),(2981,2986),(2986,2991),(2991,2996),(2996,3000),(3000,3005),(3005,3010),(3010,3014),(3014,3019),(3019,3025),(3025,3030),(3030,3033),(3033,3040),(3040,3046),(3046,3051),(3051,3056),(3056,3062),(3062,3068),(3068,3073),(3073,3078),(3078,3082),(3082,3087),(3087,3092),(3092,3096),(3096,3100),(3100,3105),(3105,3111),(3111,3116),(3116,3123),(3123,3129),(3129,3134),(3134,3139),(3139,3145),(3145,3151),(3151,3156),(3156,3161),(3161,3165),(3165,3170),(3170,3175),(3175,3179),(3179,3183),(3183,3188),(3188,3194),(3194,3199),(3199,3204),(3204,3209),(3209,3213),(3213,3218),(3218,3223),(3223,3228),(3228,3233),(3233,3237),(3237,3238),(3238,3241),(3241,3245),(3245,3249),(3249,3253),(3253,3258),(3258,3263),(3263,3268),(3268,3270),(3270,3277),(3277,3283),(3283,3288),(3288,3293),(3293,3299),(3299,3304),(3304,3309),(3309,3313),(3313,3317),(3317,3321),(3321,3326),(3326,3330),(3330,3334),(3334,3339),(3339,3344),(3344,3349),(3349,3354),(3354,3359),(3359,3363),(3363,3368),(3368,3372),(3372,3377),(3377,3381),(3381,3385),(3385,3386),(3386,3389),(3389,3393),(3393,3397),(3397,3401),(3401,3406),(3406,3411),(3411,3416),(3416,3418),(3418,3424),(3424,3429),(3429,3433),(3433,3438),(3438,3443),(3443,3448),(3448,3452),(3452,3456),(3456,3460),(3460,3465),(3465,3470),(3470,3474),(3474,3478),(3478,3482),(3482,3487),(3487,3492),(3492,3498),(3498,3503),(3503,3507),(3507,3512),(3512,3517),(3517,3522),(3522,3526),(3526,3530),(3530,3534),(3534,3539),(3539,3544),(3544,3548),(3548,3552),(3552,3556),(3556,3561),(3561,3566),(3566,3571),(3571,3576),(3576,3580),(3580,3584),(3584,3588),(3588,3592),(3592,3596),(3596,3600),(3600,3601),(3601,3604),(3604,3608),(3608,3612),(3612,3615),(3615,3619),(3619,3623),(3623,3627),(3627,3629),(3629,3635),(3635,3642),(3642,3647),(3647,3653),(3653,3659),(3659,3665),(3665,3671),(3671,3676),(3676,3681),(3681,3686),(3686,3691),(3691,3696),(3696,3701),(3701,3707),(3707,3713),(3713,3719),(3719,3725),(3725,3732),(3732,3737),(3737,3743),(3743,3749),(3749,3755),(3755,3761),(3761,3766),(3766,3771),(3771,3776),(3776,3781),(3781,3786),(3786,3791),(3791,3797),(3797,3803),(3803,3809),(3809,3814),(3814,3820),(3820,3824),(3824,3829),(3829,3834),(3834,3839),(3839,3844),(3844,3848),(3848,3849),(3849,3853),(3853,3857),(3857,3861),(3861,3865),(3865,3870),(3870,3875),(3875,3880),(3880,3883),(3883,3885),(3885,3890),(3890,3895),(3895,3900),(3900,3904),(3904,3909),(3909,3914),(3914,3918),(3918,3922),(3922,3926),(3926,3930),(3930,3933),(3933,3937),(3937,3941),(3941,3945),(3945,3950),(3950,3954),(3954,3957),(3957,3960),(3960,3966),(3966,3972),(3972,3977),(3977,3983),(3983,3989),(3989,3994),(3994,3999),(3999,4000),(4000,4004),(4004,4009),(4009,4010),(4010,4014),(4014,4018),(4018,4022),(4022,4027),(4027,4032),(4032,4035),(4035,4041),(4041,4047),(4047,4053),(4053,4058),(4058,4062),(4062,4066),(4066,4071),(4071,4073),(4073,4074),(4074,4077),(4077,4078),(4078,4081),(4081,4082),(4082,4087),(4087,4091),(4091,4096),(4096,4099),(4099,4104),(4104,4109),(4109,4113),(4113,4117),(4117,4122),(4122,4126),(4126,4129),(4129,4132),(4132,4135),(4135,4138),(4138,4141),(4141,4145),(4145,4148),(4148,4152),(4152,4156),(4156,4160),(4160,4165),(4165,4170),(4170,4175),(4175,4180),(4180,4185),(4185,4190),(4190,4194),(4194,4199),(4199,4204),(4204,4208),(4208,4213),(4213,4217),(4217,4221),(4221,4225),(4225,4230),(4230,4235),(4235,4238),(4238,4244),(4244,4249),(4249,4255),(4255,4261),(4261,4267),(4267,4272),(4272,4277),(4277,4282),(4282,4287),(4287,4291),(4291,4297),(4297,4302),(4302,4308),(4308,4314),(4314,4320),(4320,4332),(4332,4344),(4344,4356),(4356,4369),(4369,4381),(4381,4393),(4393,4397),(4397,None)]
+
+
+
+
+p_col_names = ['DUID', 'PID', 'DUPERSID', 'DRUGIDX', 'RXRECIDX', 'LINKIDX','PANEL', 'PURCHRD', 'RXBEGMM', 'RXBEGYRX', 'RXNAME',
+            'RXDRGNAM', 'RXNDC', 'RXQUANTY', 'RXFORM', 'RXFRMUNT','RXSTRENG', 'RXSTRUNT', 'RXDAYSUP', 'PHARTP1', 'PHARTP2',
+            'PHARTP3', 'PHARTP4', 'PHARTP5', 'PHARTP6', 'PHARTP7','PHARTP8', 'PHARTP9', 'RXFLG', 'IMPFLAG', 'PCIMPFLG',
+            'DIABEQUIP', 'INPCFLG', 'PREGCAT', 'TC1', 'TC1S1','TC1S1_1', 'TC1S1_2', 'TC1S2', 'TC1S2_1', 'TC1S3',
+            'TC1S3_1', 'TC2', 'TC2S1', 'TC2S1_1', 'TC2S1_2', 'TC2S2','TC3', 'TC3S1', 'TC3S1_1', 'RXSF18X', 'RXMR18X', 'RXMD18X',
+            'RXPV18X', 'RXVA18X', 'RXTR18X', 'RXOF18X', 'RXSL18X','RXWC18X', 'RXOT18X', 'RXOR18X', 'RXOU18X', 'RXXP18X',
+            'PERWT18F', 'VARSTR', 'VARPSU']
+
+p_col_spaces = [(0,7),(7,10),(10,20),(20,33),(33,52),(52,68),(68,70),(70,71),(71,74),(74,78),(78,128),(128,188),(188,199),
+                (199,206),(206,256),(256,306),(306,356),(356,406),(406,409),(409,412),(412,414),(414,416),(416,418),(418,420),(420,422),
+                (422,424),(424,426),(426,428),(428,429),(429,430),(430,431),(431,432),(432,433),(433,436),(436,439),(439,442),(442,445),
+                (445,448),(448,451),(451,454),(454,456),(456,458),(458,461),(461,464),(464,467),(467,470),(470,473),(473,476),(476,479),
+                (479,482),(482,490),(490,498),(498,506),(506,514),(514,522),(522,529),(529,536),(536,543),(543,550),(550,558),(558,566),
+                (566,573),(573,581),(581,593),(593,597),(597,None)]

From bdf7ef2ab065465c72723f87f1b808cd189ff7d3 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Fri, 7 May 2021 12:35:39 -0500
Subject: [PATCH 16/46] Change meps get_dataset to get response.content vs
 response

---
 src/mdt/meps/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mdt/meps/utils.py b/src/mdt/meps/utils.py
index 70e24c5..c7b73d4 100644
--- a/src/mdt/meps/utils.py
+++ b/src/mdt/meps/utils.py
@@ -20,7 +20,7 @@ def get_dataset(
     response = requests.get(url)
 
     if handler:
-        return handler(response)
+        return handler(response.content)
 
     (dest / url.split('/')[-1]).write_bytes(response.content)
 

From 2889fcbd6de4d46558d14669df38fbc68955b038 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Fri, 7 May 2021 12:38:34 -0500
Subject: [PATCH 17/46] Allow modules to be imported from package namespace

---
 src/mdt/meps/__init__.py   | 4 ++++
 src/mdt/rxnorm/__init__.py | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/src/mdt/meps/__init__.py b/src/mdt/meps/__init__.py
index e69de29..41a97c2 100644
--- a/src/mdt/meps/__init__.py
+++ b/src/mdt/meps/__init__.py
@@ -0,0 +1,4 @@
+from . import utils
+from . import columns
+
+__all__ = ['utils', 'columns']
diff --git a/src/mdt/rxnorm/__init__.py b/src/mdt/rxnorm/__init__.py
index e69de29..2b01e39 100644
--- a/src/mdt/rxnorm/__init__.py
+++ b/src/mdt/rxnorm/__init__.py
@@ -0,0 +1,4 @@
+from . import rxclass
+from . import utils
+
+__all__ = ['rxclass', 'utils']

From 73dd0e9da98512c5283e02243d51e9be12ece4c0 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Fri, 7 May 2021 12:45:00 -0500
Subject: [PATCH 18/46] Add load_meps function to database.py

---
 src/mdt/database.py | 69 +++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 64 insertions(+), 5 deletions(-)

diff --git a/src/mdt/database.py b/src/mdt/database.py
index 7e0e6b2..e2d0e04 100644
--- a/src/mdt/database.py
+++ b/src/mdt/database.py
@@ -1,7 +1,8 @@
-from .rxnorm.utils import get_dataset
-
+from . import rxnorm, meps
 from pathlib import Path
-import zipfile,io, sqlite3
+import zipfile
+import io
+import sqlite3
 import pandas as pd
 
 
@@ -59,7 +60,7 @@ def read_sql_string(file_name):
 def load_rxnorm():
     """downloads and loads RxNorm dataset into database"""
 
-    z = zipfile.ZipFile(get_dataset(handler=io.BytesIO))
+    z = zipfile.ZipFile(rxnorm.utils.get_dataset(handler=io.BytesIO))
 
     col_names = ['RXCUI','LAT','TS','LUI','STT','SUI','ISPREF','RXAUI','SAUI','SCUI','SDUI','SAB','TTY','CODE','STR','SRL','SUPPRESS','CVF','test']
     rxnconso = pd.read_csv(z.open('rrf/RXNCONSO.RRF'),sep='|',header=None,dtype=object,names=col_names)
@@ -76,4 +77,62 @@ def load_rxnorm():
     sql_create_table('rxnsat',rxnsat)
     del rxnsat 
 
-    del z
\ No newline at end of file
+    del z
+
+
+def load_meps():
+    '''Load Meps data into db'''
+    z = zipfile.ZipFile(
+        meps.utils.get_dataset('h206adat.zip', handler=io.BytesIO)
+    )
+
+    meps_prescription = pd.read_fwf(
+        z.open('H206A.dat'),
+        header=None,
+        names=meps.columns.p_col_names,
+        converters={col: str for col in meps.columns.p_col_names},
+        colspecs=meps.columns.p_col_spaces,
+    )
+
+    sql_create_table('meps_prescription', meps_prescription)
+    del meps_prescription
+    del z
+
+    z = zipfile.ZipFile(
+        meps.utils.get_dataset('h209dat.zip', handler=io.BytesIO)
+    )
+
+    meps_demographics = pd.read_fwf(
+        z.open('h209.dat'),
+        header=None,
+        names=meps.columns.d_col_names,
+        converters={col: str for col in meps.columns.d_col_names},
+        colspecs=meps.columns.d_col_spaces,
+        usecols=['DUPERSID', 'PERWT18F', "REGION18", 'SEX', 'AGELAST']
+    )
+
+    # removing numbers from meps_demographic column names, since the '18' in region18 and perwt18f in MEPS are year-specific
+    meps_demographics.columns = meps_demographics.columns.str.replace(r'\d+', '',regex=True)
+    sql_create_table('meps_demographics', meps_demographics)
+    del meps_demographics
+    del z
+
+    sql_create_table('meps_region_states', meps.columns.meps_region_states)
+
+    meps_reference_str = read_sql_string('meps_reference.sql')
+    meps_reference = db_query(meps_reference_str)
+    sql_create_table('meps_reference', meps_reference)
+    del meps_reference
+
+    # TEST!!!!!!!!!!!!!!!! reads record count from created database
+    meps_prescription = db_query("Select count(*) AS records from meps_prescription")
+    print('DB table meps_prescription  has {0} records'.format(meps_prescription['records'].iloc[0]))
+
+    meps_demographics = db_query("Select count(*) AS records from meps_demographics")
+    print('DB table meps_demographics has {0} records'.format(meps_demographics['records'].iloc[0]))
+
+    meps_reference = db_query("Select count(*) AS records from meps_reference")
+    print('DB table meps_reference has {0} records'.format(meps_reference['records'].iloc[0]))
+
+    meps_region_states = db_query("Select count(*) AS records from meps_region_states")
+    print('DB table meps_region_states has {0} records'.format(meps_region_states['records'].iloc[0]))

From 832ec257cf58873f094c50e05de9633432c02767 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Fri, 7 May 2021 12:45:22 -0500
Subject: [PATCH 19/46] Add load_meps to main function of run_mdt module

---
 src/mdt/run_mdt.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mdt/run_mdt.py b/src/mdt/run_mdt.py
index 096cb81..92c8ed5 100644
--- a/src/mdt/run_mdt.py
+++ b/src/mdt/run_mdt.py
@@ -1,8 +1,9 @@
-from mdt.database import load_rxnorm
+from mdt.database import load_rxnorm, load_meps
 
 
 def main():
     load_rxnorm()
+    load_meps()
 
 
 if __name__ == '__main__':

From bd9e747906184d03a91356bd4765a818f39dd799 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sat, 8 May 2021 09:11:06 -0500
Subject: [PATCH 20/46] Require requests and pandas to install if mdt is
 installed

---
 setup.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index 0f5544d..764f3f5 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@
     version='1.0.0',
     # description='A sample Python project',  # Optional
     # long_description=long_description,  # Optional
-    # long_description_content_type='text/markdown',  # Optional (see note above)
+    # long_description_content_type='text/markdown',  # Optional
     # url='https://github.com/pypa/sampleproject',  # Optional
     # author='A. Random Developer',  # Optional
     # author_email='author@example.com',  # Optional
@@ -19,13 +19,16 @@
     package_dir={'': 'src'},
     packages=find_packages(where='src'),
     python_requires='>=3.6, <4',
-    # install_requires=['peppercorn'],  # Optional
+    install_requires=[
+        'requests',
+        'pandas'
+    ],  # Optional
 
     # If there are data files included in your packages that need to be
     # installed, specify them here.
     # package_data={  # Optional
     #    'sample': ['package_data.dat'],
-    #},
+    # },
 
     # Although 'package_data' is the preferred approach, in some case you may
     # need to place data files outside of your packages. See:

From 3ce65feb6584a4dd1975d2c60ec48b86f2459838 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 9 May 2021 18:07:41 -0500
Subject: [PATCH 21/46] Change package install name to mdt, include .sql files
 in packages

---
 setup.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/setup.py b/setup.py
index 764f3f5..8893d23 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@
 long_description = (here / 'README.md').read_text(encoding='utf-8')
 
 setup(
-    name='medicationDiversification',
+    name='mdt',
     version='1.0.0',
     # description='A sample Python project',  # Optional
     # long_description=long_description,  # Optional
@@ -26,10 +26,9 @@
 
     # If there are data files included in your packages that need to be
     # installed, specify them here.
-    # package_data={  # Optional
-    #    'sample': ['package_data.dat'],
-    # },
-
+    package_data={
+        "":['*.sql']
+    }
     # Although 'package_data' is the preferred approach, in some case you may
     # need to place data files outside of your packages. See:
     # http://docs.python.org/distutils/setupscript.html#installing-additional-files

From ceee61e3c3c888de110d1df8dfe6a23607f8b675 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 9 May 2021 18:10:17 -0500
Subject: [PATCH 22/46] Add sql packages to rxnorm & meps, include sql files

---
 src/mdt/meps/sql/__init__.py        |   0
 src/mdt/meps/sql/meps_reference.sql |  14 ++
 src/mdt/rxnorm/sql/__init__.py      |   0
 src/mdt/rxnorm/sql/rxcui_ndc.sql    | 206 ++++++++++++++++++++++++++++
 4 files changed, 220 insertions(+)
 create mode 100644 src/mdt/meps/sql/__init__.py
 create mode 100644 src/mdt/meps/sql/meps_reference.sql
 create mode 100644 src/mdt/rxnorm/sql/__init__.py
 create mode 100644 src/mdt/rxnorm/sql/rxcui_ndc.sql

diff --git a/src/mdt/meps/sql/__init__.py b/src/mdt/meps/sql/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/mdt/meps/sql/meps_reference.sql b/src/mdt/meps/sql/meps_reference.sql
new file mode 100644
index 0000000..7cd2e62
--- /dev/null
+++ b/src/mdt/meps/sql/meps_reference.sql
@@ -0,0 +1,14 @@
+--"Sex" assignments are from MEPS, source: https://meps.ahrq.gov/mepsweb/data_stats/download_data_files_codebook.jsp?PUFId=PROJYR15&varName=SEX 
+
+SELECT DISTINCT 
+    t1.dupersid,
+    t2.perwtf AS person_weight,
+    t1.rxndc,
+    CASE WHEN t2.sex = 1 THEN 'M' 
+    WHEN t2.sex = 2 THEN 'F'
+    END AS gender,
+    t2.agelast, --patient's last known age; advantage of using this col over other age cols is every patient has age (no NULLs)
+    t2.region AS region_num
+    FROM meps_prescription AS t1
+    INNER JOIN meps_demographics AS t2
+    ON t1.dupersid = t2.dupersid
\ No newline at end of file
diff --git a/src/mdt/rxnorm/sql/__init__.py b/src/mdt/rxnorm/sql/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/mdt/rxnorm/sql/rxcui_ndc.sql b/src/mdt/rxnorm/sql/rxcui_ndc.sql
new file mode 100644
index 0000000..920fc4d
--- /dev/null
+++ b/src/mdt/rxnorm/sql/rxcui_ndc.sql
@@ -0,0 +1,206 @@
+select distinct
+    sq.medication_ingredient_rxcui
+    , sq.medication_ingredient_name
+    , sq.medication_ingredient_tty
+    , sq.medication_product_rxcui
+    , sq.medication_product_name
+    , sq.medication_product_tty
+
+    , df_rxnconso.rxcui as dose_form_rxcui
+    , df_rxnconso.str as dose_form_name
+    , df_rxnconso.tty as dose_form_tty
+    
+    --, dfg_rxnconso.rxcui as dose_form_group_rxcui
+    --, dfg_rxnconso.str as dose_form_group_name
+    --, dfg_rxnconso.tty as dose_form_group_tty
+
+    , ndc_rxnsat.atv as medication_ndc
+
+from (
+    
+    select in_rxnconso.rxcui as medication_ingredient_rxcui
+        , in_rxnconso.str as medication_ingredient_name
+        , in_rxnconso.tty as medication_ingredient_tty
+        , scd_rxnconso.rxcui as medication_product_rxcui
+        , scd_rxnconso.str as medication_product_name
+        , scd_rxnconso.tty as medication_product_tty
+
+    -- medication ingredient (IN)
+    from rxnconso in_rxnconso
+
+    -- medication product (SCDC -> SCD)
+    left join rxnrel scdc_rxnrel on scdc_rxnrel.rxcui2 = in_rxnconso.rxcui and scdc_rxnrel.rela = 'ingredient_of'
+    left join rxnconso scdc_rxnconso on scdc_rxnconso.rxcui = scdc_rxnrel.rxcui1 and scdc_rxnconso.sab = 'RXNORM' and scdc_rxnconso.tty = 'SCDC'
+    left join rxnrel scd_rxnrel on scd_rxnrel.rxcui2 = scdc_rxnrel.rxcui1 and scd_rxnrel.rela = 'constitutes'
+    left join rxnconso scd_rxnconso on scd_rxnconso.rxcui = scd_rxnrel.rxcui1 and scd_rxnconso.sab = 'RXNORM' and scd_rxnconso.tty = 'SCD'
+    
+    where in_rxnconso.tty = 'IN'
+        and in_rxnconso.sab = 'RXNORM'
+
+union all 
+    
+    select in_rxnconso.rxcui as medication_ingredient_rxcui
+        , in_rxnconso.str as medication_ingredient_name
+        , in_rxnconso.tty as medication_ingredient_tty
+        , sbd_rxnconso.rxcui as medication_product_rxcui
+        , sbd_rxnconso.str as medication_product_name
+        , sbd_rxnconso.tty as medication_product_tty
+
+    -- medication ingredient (IN)
+    from rxnconso in_rxnconso
+
+    -- medication product (BN -> SBD)
+    left join rxnrel bn_rxnrel on bn_rxnrel.rxcui2 = in_rxnconso.rxcui and bn_rxnrel.rela = 'has_tradename'
+    left join rxnconso bn_rxnconso on bn_rxnconso.rxcui = bn_rxnrel.rxcui1 and bn_rxnconso.sab = 'RXNORM' and bn_rxnconso.tty = 'BN'
+    left join rxnrel sbd_rxnrel on sbd_rxnrel.rxcui2 = bn_rxnrel.rxcui1 and sbd_rxnrel.rela = 'ingredient_of'
+    left join rxnconso sbd_rxnconso on sbd_rxnconso.rxcui = sbd_rxnrel.rxcui1 and sbd_rxnconso.sab = 'RXNORM' and sbd_rxnconso.tty = 'SBD'
+    
+    where in_rxnconso.tty = 'IN'
+        and in_rxnconso.sab = 'RXNORM'
+
+union all 
+
+    select in_rxnconso.rxcui as medication_ingredient_rxcui
+        , in_rxnconso.str as medication_ingredient_name
+        , in_rxnconso.tty as medication_ingredient_tty
+        , gpck_rxnconso.rxcui as medication_product_rxcui
+        , gpck_rxnconso.str as medication_product_name
+        , gpck_rxnconso.tty as medication_product_tty
+
+    -- medication ingredient (IN)
+    from rxnconso in_rxnconso
+
+    -- medication product (SCDC -> SCD -> GPCK)
+    left join rxnrel scdc_rxnrel on scdc_rxnrel.rxcui2 = in_rxnconso.rxcui and scdc_rxnrel.rela = 'ingredient_of'
+    left join rxnconso scdc_rxnconso on scdc_rxnconso.rxcui = scdc_rxnrel.rxcui1 and scdc_rxnconso.sab = 'RXNORM' and scdc_rxnconso.tty = 'SCDC'
+    left join rxnrel scd_rxnrel on scd_rxnrel.rxcui2 = scdc_rxnrel.rxcui1 and scd_rxnrel.rela = 'constitutes'
+    left join rxnconso scd_rxnconso on scd_rxnconso.rxcui = scd_rxnrel.rxcui1 and scd_rxnconso.sab = 'RXNORM' and scd_rxnconso.tty = 'SCD'
+    left join rxnrel gpck_rxnrel on gpck_rxnrel.rxcui2 = scd_rxnrel.rxcui1 and gpck_rxnrel.rela = 'contained_in'
+    left join rxnconso gpck_rxnconso on gpck_rxnconso.rxcui = gpck_rxnrel.rxcui1 and gpck_rxnconso.sab = 'RXNORM' and gpck_rxnconso.tty = 'GPCK'
+    
+    where in_rxnconso.tty = 'IN'
+        and in_rxnconso.sab = 'RXNORM'
+
+union all 
+
+    select in_rxnconso.rxcui as medication_ingredient_rxcui
+        , in_rxnconso.str as medication_ingredient_name
+        , in_rxnconso.tty as medication_ingredient_tty
+        , bpck_rxnconso.rxcui as medication_product_rxcui
+        , bpck_rxnconso.str as medication_product_name
+        , bpck_rxnconso.tty as medication_product_tty
+
+    -- medication ingredient (IN)
+    from rxnconso in_rxnconso
+
+    -- medication product (SCDC -> SCD -> GPCK -> BPCK)
+    left join rxnrel scdc_rxnrel on scdc_rxnrel.rxcui2 = in_rxnconso.rxcui and scdc_rxnrel.rela = 'ingredient_of'
+    left join rxnconso scdc_rxnconso on scdc_rxnconso.rxcui = scdc_rxnrel.rxcui1 and scdc_rxnconso.sab = 'RXNORM' and scdc_rxnconso.tty = 'SCDC'
+    left join rxnrel scd_rxnrel on scd_rxnrel.rxcui2 = scdc_rxnrel.rxcui1 and scd_rxnrel.rela = 'constitutes'
+    left join rxnconso scd_rxnconso on scd_rxnconso.rxcui = scd_rxnrel.rxcui1 and scd_rxnconso.sab = 'RXNORM' and scd_rxnconso.tty = 'SCD'
+    left join rxnrel gpck_rxnrel on gpck_rxnrel.rxcui2 = scd_rxnrel.rxcui1 and gpck_rxnrel.rela = 'contained_in'
+    left join rxnconso gpck_rxnconso on gpck_rxnconso.rxcui = gpck_rxnrel.rxcui1 and gpck_rxnconso.sab = 'RXNORM' and gpck_rxnconso.tty = 'GPCK'
+    left join rxnrel bpck_rxnrel on bpck_rxnrel.rxcui2 = gpck_rxnrel.rxcui1 and bpck_rxnrel.rela = 'has_tradename'
+    left join rxnconso bpck_rxnconso on bpck_rxnconso.rxcui = bpck_rxnrel.rxcui1 and bpck_rxnconso.sab = 'RXNORM' and bpck_rxnconso.tty = 'BPCK'
+
+    where in_rxnconso.tty = 'IN'
+        and in_rxnconso.sab = 'RXNORM'
+
+union all 
+
+    select min_rxnconso.rxcui as medication_ingredient_rxcui
+        , min_rxnconso.str as medication_ingredient_name
+        , min_rxnconso.tty as medication_ingredient_tty
+        , scd_rxnconso.rxcui as medication_product_rxcui
+        , scd_rxnconso.str as medication_product_name
+        , scd_rxnconso.tty as medication_product_tty
+
+    -- medication ingredient (MIN)
+    from rxnconso min_rxnconso
+
+    -- medication product (SCD)
+    left join rxnrel scd_rxnrel on scd_rxnrel.rxcui2 = min_rxnconso.rxcui and scd_rxnrel.rela = 'ingredients_of'
+    left join rxnconso scd_rxnconso on scd_rxnconso.rxcui = scd_rxnrel.rxcui1 and scd_rxnconso.sab = 'RXNORM' and scd_rxnconso.tty = 'SCD'
+    
+    where min_rxnconso.tty = 'MIN'
+        and min_rxnconso.sab = 'RXNORM'
+
+union all 
+
+    select min_rxnconso.rxcui as medication_ingredient_rxcui
+        , min_rxnconso.str as medication_ingredient_name
+        , min_rxnconso.tty as medication_ingredient_tty
+        , sbd_rxnconso.rxcui as medication_product_rxcui
+        , sbd_rxnconso.str as medication_product_name
+        , sbd_rxnconso.tty as medication_product_tty
+
+    -- medication ingredient (MIN)
+    from rxnconso min_rxnconso
+
+    -- medication product (SCD -> SBD)
+    left join rxnrel scd_rxnrel on scd_rxnrel.rxcui2 = min_rxnconso.rxcui and scd_rxnrel.rela = 'ingredients_of'
+    left join rxnconso scd_rxnconso on scd_rxnconso.rxcui = scd_rxnrel.rxcui1 and scd_rxnconso.sab = 'RXNORM' and scd_rxnconso.tty = 'SCD'
+    left join rxnrel sbd_rxnrel on sbd_rxnrel.rxcui2 = scd_rxnrel.rxcui1 and sbd_rxnrel.rela = 'has_tradename'
+    left join rxnconso sbd_rxnconso on sbd_rxnconso.rxcui = sbd_rxnrel.rxcui1 and sbd_rxnconso.sab = 'RXNORM' and sbd_rxnconso.tty = 'SBD'
+    
+    where min_rxnconso.tty = 'MIN'
+        and min_rxnconso.sab = 'RXNORM'
+
+union all 
+
+    select min_rxnconso.rxcui as medication_ingredient_rxcui
+        , min_rxnconso.str as medication_ingredient_name
+        , min_rxnconso.tty as medication_ingredient_tty
+        , gpck_rxnconso.rxcui as medication_product_rxcui
+        , gpck_rxnconso.str as medication_product_name
+        , gpck_rxnconso.tty as medication_product_tty
+
+    -- medication ingredient (MIN)
+    from rxnconso min_rxnconso
+
+    -- medication product (SCD -> GPCK)
+    left join rxnrel scd_rxnrel on scd_rxnrel.rxcui2 = min_rxnconso.rxcui and scd_rxnrel.rela = 'ingredients_of'
+    left join rxnconso scd_rxnconso on scd_rxnconso.rxcui = scd_rxnrel.rxcui1 and scd_rxnconso.sab = 'RXNORM' and scd_rxnconso.tty = 'SCD'
+    left join rxnrel gpck_rxnrel on gpck_rxnrel.rxcui2 = scd_rxnrel.rxcui1 and gpck_rxnrel.rela = 'contained_in'
+    left join rxnconso gpck_rxnconso on gpck_rxnconso.rxcui = gpck_rxnrel.rxcui1 and gpck_rxnconso.sab = 'RXNORM' and gpck_rxnconso.tty = 'GPCK'
+    
+    where min_rxnconso.tty = 'MIN'
+        and min_rxnconso.sab = 'RXNORM'
+
+union all 
+
+    select min_rxnconso.rxcui as medication_ingredient_rxcui
+        , min_rxnconso.str as medication_ingredient_name
+        , min_rxnconso.tty as medication_ingredient_tty
+        , bpck_rxnconso.rxcui as medication_product_rxcui
+        , bpck_rxnconso.str as medication_product_name
+        , bpck_rxnconso.tty as medication_product_tty
+
+    -- medication ingredient (MIN)
+    from rxnconso min_rxnconso
+
+    -- medication product (SCD -> SBD -> BPCK)
+    left join rxnrel scd_rxnrel on scd_rxnrel.rxcui2 = min_rxnconso.rxcui and scd_rxnrel.rela = 'ingredients_of'
+    left join rxnconso scd_rxnconso on scd_rxnconso.rxcui = scd_rxnrel.rxcui1 and scd_rxnconso.sab = 'RXNORM' and scd_rxnconso.tty = 'SCD'
+    left join rxnrel sbd_rxnrel on sbd_rxnrel.rxcui2 = scd_rxnrel.rxcui1 and sbd_rxnrel.rela = 'has_tradename'
+    left join rxnconso sbd_rxnconso on sbd_rxnconso.rxcui = sbd_rxnrel.rxcui1 and sbd_rxnconso.sab = 'RXNORM' and sbd_rxnconso.tty = 'SBD'
+    left join rxnrel bpck_rxnrel on bpck_rxnrel.rxcui2 = sbd_rxnrel.rxcui1 and bpck_rxnrel.rela = 'contained_in'
+    left join rxnconso bpck_rxnconso on bpck_rxnconso.rxcui = bpck_rxnrel.rxcui1 and bpck_rxnconso.sab = 'RXNORM' and bpck_rxnconso.tty = 'BPCK'
+    
+    where min_rxnconso.tty = 'MIN'
+        and min_rxnconso.sab = 'RXNORM'
+) as sq
+
+-- dose form
+left join rxnrel df_rxnrel on df_rxnrel.rxcui2 = sq.medication_product_rxcui and df_rxnrel.rela = 'has_dose_form'
+left join rxnconso df_rxnconso on df_rxnconso.rxcui = df_rxnrel.rxcui1 and df_rxnconso.sab = 'RXNORM' and df_rxnconso.tty = 'DF'
+
+-- dose form group
+--left join rxnrel dfg_rxnrel on dfg_rxnrel.rxcui2 = df_rxnrel.rxcui1 and dfg_rxnrel.rela = 'isa'
+--left join rxnconso dfg_rxnconso on dfg_rxnconso.rxcui = dfg_rxnrel.rxcui1 and dfg_rxnconso.sab = 'RXNORM' and dfg_rxnconso.tty = 'DFG'
+
+-- ndc
+left join rxnsat ndc_rxnsat on ndc_rxnsat.rxcui = sq.medication_product_rxcui and ndc_rxnsat.sab = 'RXNORM' and ndc_rxnsat.atn = 'NDC'
+
+where ndc_rxnsat.atv is not null
+--    and sq.medication_ingredient_rxcui in ('285155','10582','10814','10565','325521','10572')
\ No newline at end of file

From e2defa1d66b0af5b85c5bd0f1e6b8a70115f806c Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 9 May 2021 18:14:03 -0500
Subject: [PATCH 23/46] Initial get_sql function to get meps package sql files

---
 src/mdt/meps/utils.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/mdt/meps/utils.py b/src/mdt/meps/utils.py
index c7b73d4..e2b9650 100644
--- a/src/mdt/meps/utils.py
+++ b/src/mdt/meps/utils.py
@@ -1,8 +1,12 @@
 import os
+import importlib.resources as pkg_resources
 from pathlib import Path
 from typing import Callable
 import requests
 
+from . import sql
+
+
 
 def get_dataset(
         dat_name: str,
@@ -25,3 +29,8 @@ def get_dataset(
     (dest / url.split('/')[-1]).write_bytes(response.content)
 
     return response
+
+
+def get_sql(file_name):
+    meps_sql = pkg_resources.read_text(sql, file_name)
+    return meps_sql

From 5f5a454316cacb07479d8c9c6e294565891aec85 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Mon, 10 May 2021 07:51:10 -0500
Subject: [PATCH 24/46] Import lib requires python >3.7

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 8893d23..be37c6e 100644
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,7 @@
     # keywords='sample, setuptools, development',  # Optional
     package_dir={'': 'src'},
     packages=find_packages(where='src'),
-    python_requires='>=3.6, <4',
+    python_requires='>=3.7 <4',
     install_requires=[
         'requests',
         'pandas'

From 90a7f0897f12b9616b7330bf50d3ea5f59ade8c9 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sat, 15 May 2021 08:23:38 -0500
Subject: [PATCH 25/46] Missing comma in setup.py

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index be37c6e..fe22fef 100644
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,7 @@
     # keywords='sample, setuptools, development',  # Optional
     package_dir={'': 'src'},
     packages=find_packages(where='src'),
-    python_requires='>=3.7 <4',
+    python_requires='>=3.7, <4',
     install_requires=[
         'requests',
         'pandas'

From f303c5cf1febad2dfac1fa028ce9714c7f4a6060 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 16 May 2021 08:30:40 -0500
Subject: [PATCH 26/46] Use meps utils function to get reference sql

---
 src/mdt/database.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mdt/database.py b/src/mdt/database.py
index e2d0e04..07b288f 100644
--- a/src/mdt/database.py
+++ b/src/mdt/database.py
@@ -119,7 +119,7 @@ def load_meps():
 
     sql_create_table('meps_region_states', meps.columns.meps_region_states)
 
-    meps_reference_str = read_sql_string('meps_reference.sql')
+    meps_reference_str = meps.utils.get_sql('meps_reference.sql')
     meps_reference = db_query(meps_reference_str)
     sql_create_table('meps_reference', meps_reference)
     del meps_reference

From fcbe75bb7f5b5643e5a67a46d112cd75ab7e4579 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 16 May 2021 08:33:18 -0500
Subject: [PATCH 27/46] Add get_sql function to rxnorm utils

---
 src/mdt/rxnorm/utils.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/mdt/rxnorm/utils.py b/src/mdt/rxnorm/utils.py
index a8d49c8..b4a56ee 100644
--- a/src/mdt/rxnorm/utils.py
+++ b/src/mdt/rxnorm/utils.py
@@ -1,7 +1,11 @@
 from pathlib import Path
+import importlib.resources as pkg_resources
 import requests, os
 from typing import Callable
 
+from . import sql
+
+
 def json_extract(obj, key):
     """Recursively fetch values from nested JSON."""
     arr = []
@@ -60,3 +64,7 @@ def get_dataset(
     (dest / url.split('/')[-1]).write_bytes(response.content)
     return response
 
+
+def get_sql(file_name):
+    meps_sql = pkg_resources.read_text(sql, file_name)
+    return meps_sql

From 2d08999324b8899874f7f3b4930260acb05787c5 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 16 May 2021 14:43:12 -0500
Subject: [PATCH 28/46] Rename synthea.py to utils.py

---
 src/mdt/{synthea.py => utils.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/mdt/{synthea.py => utils.py} (100%)

diff --git a/src/mdt/synthea.py b/src/mdt/utils.py
similarity index 100%
rename from src/mdt/synthea.py
rename to src/mdt/utils.py

From 9f46e230520aa3ec7a9eb275c31f3f7fc6d73f76 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 16 May 2021 14:45:57 -0500
Subject: [PATCH 29/46] Basic mdt config.py, will need future refactor

---
 src/mdt/config.py | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 src/mdt/config.py

diff --git a/src/mdt/config.py b/src/mdt/config.py
new file mode 100644
index 0000000..6fb0551
--- /dev/null
+++ b/src/mdt/config.py
@@ -0,0 +1,5 @@
+MEPS_CONFIG={
+    "age": ["0-3", "4-7", "8-11", "12-18", "19-49", "50-64", "65-99"],
+    "demographic_distrib_flags" : {"age": "Y", "gender": "Y", "state": "Y"},
+    "meps_year" : "18"
+}

From a1dbc3888bcba9893a5fc6041739093d6799bee6 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 16 May 2021 16:13:08 -0500
Subject: [PATCH 30/46] Add missing payload constructor import

---
 src/mdt/rxnorm/rxclass.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mdt/rxnorm/rxclass.py b/src/mdt/rxnorm/rxclass.py
index 70f422e..73cb971 100644
--- a/src/mdt/rxnorm/rxclass.py
+++ b/src/mdt/rxnorm/rxclass.py
@@ -1,3 +1,4 @@
+from .utils import payload_constructor
 
 
 def rxclass_findclassesbyid_payload(class_id):

From 9b3b854959715df8ee792d383f3aa0d6d478b3b8 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 16 May 2021 16:17:39 -0500
Subject: [PATCH 31/46] Add missing urllib import

---
 src/mdt/rxnorm/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mdt/rxnorm/utils.py b/src/mdt/rxnorm/utils.py
index b4a56ee..bd88458 100644
--- a/src/mdt/rxnorm/utils.py
+++ b/src/mdt/rxnorm/utils.py
@@ -1,3 +1,4 @@
+import urllib
 from pathlib import Path
 import importlib.resources as pkg_resources
 import requests, os

From 25ed016a259ed3b9b42129565095cec26f2601f0 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 16 May 2021 16:20:50 -0500
Subject: [PATCH 32/46] Add missing imports, re, pandas, meps, database
 functions

---
 src/mdt/utils.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/mdt/utils.py b/src/mdt/utils.py
index 198074a..2f2653d 100644
--- a/src/mdt/utils.py
+++ b/src/mdt/utils.py
@@ -1,4 +1,8 @@
-
+import json
+import re
+import pandas as pd
+from mdt.database import db_query, read_sql_string
+from mdt import meps
 
 def read_json(file_name):
     # Opening JSON file
@@ -108,7 +112,7 @@ def generate_module(rxcui_ndc_df, rxclass_name):
         meps_rxcui = meps_rxcui.merge(age_ranges.astype(str), how='inner', left_on='AGELAST', right_on='age_values')
     #Optional: State-region mapping from MEPS 
     if demographic_distrib_flags['state'] == 'Y':
-        meps_rxcui = meps_rxcui.merge(meps_region_states.astype(str), how='inner', left_on='region_num', right_on='region_value')
+        meps_rxcui = meps_rxcui.merge(meps.columns.meps_region_states.astype(str), how='inner', left_on='region_num', right_on='region_value')
 
 
     #Clean text to JSON/SQL-friendly format 

From 1ebb93443c0363f8f130122b01933d29b4916655 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 16 May 2021 16:26:34 -0500
Subject: [PATCH 33/46] Move rx_api script into run_mdt.py, fix imports,
 currently broken

---
 src/mdt/run_mdt.py | 62 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/src/mdt/run_mdt.py b/src/mdt/run_mdt.py
index 92c8ed5..cd4d2e9 100644
--- a/src/mdt/run_mdt.py
+++ b/src/mdt/run_mdt.py
@@ -1,10 +1,72 @@
 from mdt.database import load_rxnorm, load_meps
+from mdt import rxnorm
+from mdt.utils import (
+    rxcui_ndc_matcher,
+    output_df,
+    generate_module
+)
 
 
 def main():
     load_rxnorm()
     load_meps()
 
+    #TODO: replace this with config settings or JSON input
+    #For testing: D007037 = Hypothyroidism, D001249 = Asthma
+    rxclass_id = 'D001249'
+    rxclass_rela = 'may_treat'
+
+    #Call RxClass FindClassesById API to get class info (name primarily) of the specified class
+    rxclass_response = rxnorm.utils.rxapi_get_requestor(
+        rxnorm.rxclass.rxclass_findclassesbyid_payload(rxclass_id)
+    )
+    rxclass_names =  rxnorm.utils.json_extract(rxclass_response, 'className')
+    #TODO: allow for name override in input settings
+    #TODO: build in better error handling if rxclass_id is garbage or returns no info
+    rxclass_name = rxclass_names[0] if len(rxclass_names) > 0 else 'unspecified'
+
+    #Call RxClass GetClassMember API to get members of the specified class with specified relationship(s)
+    rxclass_response = rxnorm.utils.rxapi_get_requestor(
+        rxnorm.rxclass.rxclass_getclassmember_payload(rxclass_id, rxclass_rela)
+    )
+
+    #First, get all medications that contain one of the ingredient RXCUIs
+    #This will result in duplicate NDCs and potentially no MINs
+    rxcui_ingredient_list = rxnorm.utils.json_extract(rxclass_response, 'rxcui')
+    rxcui_ingredient_df = rxcui_ndc_matcher(rxcui_ingredient_list)
+
+    #Second, get all of the medications that contain one of the product RXCUIs in the df above
+    #This will result in potentially INs and MINs, but still duplicate NDCs
+    rxcui_product_list = rxcui_ingredient_df['medication_product_rxcui'].drop_duplicates().tolist()
+    rxcui_product_df = rxcui_ndc_matcher(rxcui_product_list)
+
+    #Third, query the df above with a window function to group by NDC and prefer MIN over IN
+    #This will result in only distinct NDCs that map to either an MIN (preferred) or an IN
+    #https://pandas.pydata.org/pandas-docs/stable/getting_started/comparison/comparison_with_sql.html#top-n-rows-per-group
+    rxcui_ndc_df = rxcui_product_df.assign(
+        rn = rxcui_product_df.sort_values(['medication_ingredient_tty'], ascending=False)
+        .groupby(['medication_ndc'])
+        .cumcount()
+        + 1
+    ).query('rn < 2').drop(columns=['rn'])
+
+    #Filter by dose form group (DFG) or dose form (DF)
+    #Function expects the rxcui_ndc_df, a list of DFG or DF names, and a flag for whether to include (default) or exclude
+    #If list of DFGs or DFs is empty, then nothing is filtered out
+    #https://www.nlm.nih.gov/research/umls/rxnorm/docs/appendix3.html
+
+    # Add in after adding dfg info
+    # dfg_df_list = []
+    # rxcui_ndc_df = filter_by_df(rxcui_ndc_df, dfg_df_list)
+
+    #Saves df to csv
+    output_df(rxcui_ndc_df)
+
+    #Gets distributions for the rxcui_ndc_df products
+    #TODO: adjust the second argument so that it'll grab the rxclass_sources (class + description, e.g., asthma_may_prevent or ATC, e.g., CCBs)
+    #TODO: maybe add an input for a population_df so we can modularize MEPS in case they replace it with another population source
+    generate_module(rxcui_ndc_df, rxclass_name)
+
 
 if __name__ == '__main__':
     main()

From c016d304d55721e57681c86c19beb5a986746779 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 16 May 2021 16:40:17 -0500
Subject: [PATCH 34/46] Use meps get_sql function

---
 src/mdt/database.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/mdt/database.py b/src/mdt/database.py
index 07b288f..e8631e0 100644
--- a/src/mdt/database.py
+++ b/src/mdt/database.py
@@ -79,6 +79,10 @@ def load_rxnorm():
 
     del z
 
+    rxcui_ndc = db_query(rxnorm.utils.get_sql('rxcui_ndc.sql'))
+    sql_create_table('rxcui_ndc', rxcui_ndc)
+    del rxcui_ndc
+
 
 def load_meps():
     '''Load Meps data into db'''

From ed6483ad64fae124c918e52d17d70018957fae0e Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 16 May 2021 16:40:56 -0500
Subject: [PATCH 35/46] Use meps get_sql function in mdt.utils

---
 src/mdt/utils.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/mdt/utils.py b/src/mdt/utils.py
index 2f2653d..fa7d5ba 100644
--- a/src/mdt/utils.py
+++ b/src/mdt/utils.py
@@ -90,8 +90,7 @@ def generate_module(rxcui_ndc_df, rxclass_name):
     #Get tuples of medication_product names and medication_product RXCUIs and loop through to generate MedicationOrders 
 
     #Read in MEPS Reference table
-    meps_reference_str = read_sql_string('meps_reference.sql')
-    meps_reference = db_query(meps_reference_str)
+    meps_reference = db_query(meps.utils.get_sql('meps_reference.sql'))
 
     #Join MEPS to filtered rxcui_ndc dataframe (rxcui_list)
     meps_rxcui = meps_reference.astype(str).merge(rxcui_ndc_df.astype(str)[['medication_ingredient_name', 'medication_ingredient_rxcui','medication_product_name', 'medication_product_rxcui', 'medication_ndc']], how = 'inner', left_on = 'RXNDC', right_on = 'medication_ndc')

From d689303b4ef6d971b0c4bddd1aaaa82157edefa1 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 16 May 2021 16:53:08 -0500
Subject: [PATCH 36/46] Monkey patch to read age and age values from python
 config.py

---
 src/mdt/utils.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/mdt/utils.py b/src/mdt/utils.py
index fa7d5ba..dcb7335 100644
--- a/src/mdt/utils.py
+++ b/src/mdt/utils.py
@@ -1,6 +1,7 @@
 import json
 import re
 import pandas as pd
+from mdt.config import MEPS_CONFIG
 from mdt.database import db_query, read_sql_string
 from mdt import meps
 
@@ -13,11 +14,12 @@ def read_json(file_name):
     return data
 
 
-def age_values(file_name):
+# Monkey patched this function to get run_mdt working by removing the filename arg and importing from config
+def age_values():
     """reads age_ranges from JSON to create dataframe with age_values"""
-    
+
     data = {}
-    data['age'] = read_json('mdt_config.json')['age']
+    data['age'] = MEPS_CONFIG['age']
     data['age_values'] = [list(range(int(age.split('-')[0]), int(age.split('-')[1])+1)) for age in data['age']]
     df = pd.DataFrame(data)
     df = df.explode('age_values')
@@ -98,7 +100,7 @@ def generate_module(rxcui_ndc_df, rxclass_name):
     #Optional: Age range join - can be customized in the mdt_config.json file
     #groupby_demographic_variable: must be either an empty list [] or list of patient demographics (e.g., age, gender, state) - based on user inputs in the mdt_config.json file
 
-    data = read_json('mdt_config.json')
+    data = MEPS_CONFIG
     demographic_distrib_flags = data['demographic_distrib_flags']
 
     groupby_demographic_variables = []
@@ -107,7 +109,7 @@ def generate_module(rxcui_ndc_df, rxclass_name):
                groupby_demographic_variables.append(k)  
         
     if demographic_distrib_flags['age'] == 'Y':
-        age_ranges = age_values('mdt_config.json')
+        age_ranges = age_values()
         meps_rxcui = meps_rxcui.merge(age_ranges.astype(str), how='inner', left_on='AGELAST', right_on='age_values')
     #Optional: State-region mapping from MEPS 
     if demographic_distrib_flags['state'] == 'Y':

From 1848d756c43d1126a7469dfd00be64676610040a Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 16 May 2021 16:55:29 -0500
Subject: [PATCH 37/46] Skip loading rxnorm and meps if MDT.db exists

---
 src/mdt/run_mdt.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/mdt/run_mdt.py b/src/mdt/run_mdt.py
index cd4d2e9..759bce4 100644
--- a/src/mdt/run_mdt.py
+++ b/src/mdt/run_mdt.py
@@ -1,3 +1,4 @@
+from pathlib import Path
 from mdt.database import load_rxnorm, load_meps
 from mdt import rxnorm
 from mdt.utils import (
@@ -8,8 +9,10 @@
 
 
 def main():
-    load_rxnorm()
-    load_meps()
+
+    if not (Path.cwd() / 'data' / 'MDT.db'):
+        load_rxnorm()
+        load_meps()
 
     #TODO: replace this with config settings or JSON input
     #For testing: D007037 = Hypothyroidism, D001249 = Asthma

From 270bf4feae3b5e49802d1a64b6aeecead9da7355 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 16 May 2021 16:58:57 -0500
Subject: [PATCH 38/46] Uses system args to pass rxclass_id and rxclass_rela to
 run_mdt.py

---
 src/mdt/run_mdt.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/mdt/run_mdt.py b/src/mdt/run_mdt.py
index 759bce4..f03a4d1 100644
--- a/src/mdt/run_mdt.py
+++ b/src/mdt/run_mdt.py
@@ -1,3 +1,4 @@
+import sys
 from pathlib import Path
 from mdt.database import load_rxnorm, load_meps
 from mdt import rxnorm
@@ -8,17 +9,15 @@
 )
 
 
-def main():
+#TODO: replace this with config settings or JSON input
+#For testing: D007037 = Hypothyroidism, D001249 = Asthma
+
+def main(rxclass_id, rxclass_rela):
 
     if not (Path.cwd() / 'data' / 'MDT.db'):
         load_rxnorm()
         load_meps()
 
-    #TODO: replace this with config settings or JSON input
-    #For testing: D007037 = Hypothyroidism, D001249 = Asthma
-    rxclass_id = 'D001249'
-    rxclass_rela = 'may_treat'
-
     #Call RxClass FindClassesById API to get class info (name primarily) of the specified class
     rxclass_response = rxnorm.utils.rxapi_get_requestor(
         rxnorm.rxclass.rxclass_findclassesbyid_payload(rxclass_id)
@@ -72,4 +71,6 @@ def main():
 
 
 if __name__ == '__main__':
-    main()
+    rxclass_id = sys.argv[1]
+    rxclass_rela = sys.argv[2]
+    main(rxclass_id, rxclass_rela)

From 23569cdcee9bce10be29089f60860669806afbc5 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 16 May 2021 17:16:44 -0500
Subject: [PATCH 39/46] Add rxnorm dosage form sql

---
 src/mdt/rxnorm/sql/dfg_df.sql | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 src/mdt/rxnorm/sql/dfg_df.sql

diff --git a/src/mdt/rxnorm/sql/dfg_df.sql b/src/mdt/rxnorm/sql/dfg_df.sql
new file mode 100644
index 0000000..e7a57c1
--- /dev/null
+++ b/src/mdt/rxnorm/sql/dfg_df.sql
@@ -0,0 +1,10 @@
+select distinct df_rxnconso.str as df, dfg_rxnconso.str as dfg
+
+-- dose form
+from rxnconso df_rxnconso
+
+-- dose form group
+left join rxnrel dfg_rxnrel on dfg_rxnrel.rxcui2 = df_rxnconso.rxcui and dfg_rxnrel.rela = 'isa'
+left join rxnconso dfg_rxnconso on dfg_rxnconso.rxcui = dfg_rxnrel.rxcui1 and dfg_rxnconso.sab = 'RXNORM' and dfg_rxnconso.tty = 'DFG'
+
+where df_rxnconso.sab = 'RXNORM' and df_rxnconso.tty = 'DF'

From fffc61e1c3802b1e36c042afdb7b526c953369ff Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 16 May 2021 17:17:44 -0500
Subject: [PATCH 40/46] Load dfg table with load_rxnorm

---
 src/mdt/database.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/mdt/database.py b/src/mdt/database.py
index e8631e0..ae5f560 100644
--- a/src/mdt/database.py
+++ b/src/mdt/database.py
@@ -75,7 +75,7 @@ def load_rxnorm():
     col_names = ['RXCUI','LUI','SUI','RXAUI','STYPE','CODE','ATUI','SATUI','ATN','SAB','ATV','SUPPRESS','CVF','test']
     rxnsat = pd.read_csv(z.open('rrf/RXNSAT.RRF'),sep='|',dtype=object,header=None,names=col_names)
     sql_create_table('rxnsat',rxnsat)
-    del rxnsat 
+    del rxnsat
 
     del z
 
@@ -83,6 +83,10 @@ def load_rxnorm():
     sql_create_table('rxcui_ndc', rxcui_ndc)
     del rxcui_ndc
 
+    dfg_df = db_query(rxnorm.utils.get_sql('dfg_df.sql'))
+    sql_create_table('dfg_df', dfg_df)
+    del dfg_df
+
 
 def load_meps():
     '''Load Meps data into db'''

From 029fd498c944d1eee0129cd2a96af22cc31f4799 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 16 May 2021 17:19:20 -0500
Subject: [PATCH 41/46] Add filter_by_df function to mdt.utils, missing
 path.exists added

---
 src/mdt/run_mdt.py |  7 ++++---
 src/mdt/utils.py   | 23 +++++++++++++++++++++++
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/src/mdt/run_mdt.py b/src/mdt/run_mdt.py
index f03a4d1..15f6697 100644
--- a/src/mdt/run_mdt.py
+++ b/src/mdt/run_mdt.py
@@ -4,6 +4,7 @@
 from mdt import rxnorm
 from mdt.utils import (
     rxcui_ndc_matcher,
+    filter_by_df,
     output_df,
     generate_module
 )
@@ -14,7 +15,7 @@
 
 def main(rxclass_id, rxclass_rela):
 
-    if not (Path.cwd() / 'data' / 'MDT.db'):
+    if not (Path.cwd() / 'data' / 'MDT.db').exists():
         load_rxnorm()
         load_meps()
 
@@ -58,8 +59,8 @@ def main(rxclass_id, rxclass_rela):
     #https://www.nlm.nih.gov/research/umls/rxnorm/docs/appendix3.html
 
     # Add in after adding dfg info
-    # dfg_df_list = []
-    # rxcui_ndc_df = filter_by_df(rxcui_ndc_df, dfg_df_list)
+    dfg_df_list = []
+    rxcui_ndc_df = filter_by_df(rxcui_ndc_df, dfg_df_list)
 
     #Saves df to csv
     output_df(rxcui_ndc_df)
diff --git a/src/mdt/utils.py b/src/mdt/utils.py
index dcb7335..662f62c 100644
--- a/src/mdt/utils.py
+++ b/src/mdt/utils.py
@@ -38,6 +38,29 @@ def rxcui_ndc_matcher(rxcui_list):
     
     return filtered_df
 
+def filter_by_df(rxcui_ndc_df, dfg_df_list, method='include'):
+    """Gets DFs from dfg_df table that match either a DF in the list, or have a DFG that matches a DFG in the list
+    If dfg_df list is empty, return the rxcui_ndc_df without filtering
+    Select method option of include or exclude....include is default"""
+
+    if len(dfg_df_list) == 0:
+        return rxcui_ndc_df
+
+    dfg_df_df = db_query('SELECT * FROM dfg_df')
+    filtered_dfg_df_df = dfg_df_df[dfg_df_df['dfg'].isin(dfg_df_list) | dfg_df_df['df'].isin(dfg_df_list)]
+    df_list = filtered_dfg_df_df['df'].tolist()
+
+    if method == 'include':
+        filtered_rxcui_ndc_df = rxcui_ndc_df[rxcui_ndc_df['dose_form_name'].isin(df_list)]
+    elif method == 'exclude':
+        filtered_rxcui_ndc_df = rxcui_ndc_df[~rxcui_ndc_df['dose_form_name'].isin(df_list)]
+    else:
+        filtered_rxcui_ndc_df = rxcui_ndc_df
+
+    print("RXCUI list filtered on DF matched on {0} NDCs".format(filtered_rxcui_ndc_df['medication_ndc'].count()))
+
+    return filtered_rxcui_ndc_df
+
 
 def output_df(df,output='csv', filename='df_output'):
     """Outputs a dataframe to a csv of clipboard if you use the output=clipboard arguement"""

From d87c2ca859d75c588a74246e39adb7eef5feb1a0 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 16 May 2021 17:25:41 -0500
Subject: [PATCH 42/46] Initial FDA subpackage setup

---
 src/mdt/fda/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 src/mdt/fda/__init__.py

diff --git a/src/mdt/fda/__init__.py b/src/mdt/fda/__init__.py
new file mode 100644
index 0000000..e69de29

From 08804335717282e8669af9401d0e0e9951767734 Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 16 May 2021 17:28:42 -0500
Subject: [PATCH 43/46] fda utils module setup, get_dataset function

---
 src/mdt/fda/__init__.py |  1 +
 src/mdt/fda/utils.py    | 17 +++++++++++++++++
 2 files changed, 18 insertions(+)
 create mode 100644 src/mdt/fda/utils.py

diff --git a/src/mdt/fda/__init__.py b/src/mdt/fda/__init__.py
index e69de29..eb018c3 100644
--- a/src/mdt/fda/__init__.py
+++ b/src/mdt/fda/__init__.py
@@ -0,0 +1 @@
+from . import utils
diff --git a/src/mdt/fda/utils.py b/src/mdt/fda/utils.py
new file mode 100644
index 0000000..0d7a332
--- /dev/null
+++ b/src/mdt/fda/utils.py
@@ -0,0 +1,17 @@
+import requests
+from pathlib import Path
+
+
+def get_dataset(
+    dest = Path.cwd(),
+    handler = None
+):
+    url = f'https://www.accessdata.fda.gov/cder/ndctext.zip'
+    response = requests.get(url)
+
+    if handler:
+        return handler(response.content)
+
+    (dest / url.split('/')[-1]).write_bytes(response.content)
+
+    return response

From e1a413e902507a44bf6e91f319bac44584b0247b Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 16 May 2021 17:46:00 -0500
Subject: [PATCH 44/46] load_fda function setup

---
 src/mdt/database.py | 35 ++++++++++++++++++++++++++++++++++-
 src/mdt/run_mdt.py  |  3 ++-
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/src/mdt/database.py b/src/mdt/database.py
index ae5f560..dc2266a 100644
--- a/src/mdt/database.py
+++ b/src/mdt/database.py
@@ -1,4 +1,4 @@
-from . import rxnorm, meps
+from . import rxnorm, meps, fda
 from pathlib import Path
 import zipfile
 import io
@@ -144,3 +144,36 @@ def load_meps():
 
     meps_region_states = db_query("Select count(*) AS records from meps_region_states")
     print('DB table meps_region_states has {0} records'.format(meps_region_states['records'].iloc[0]))
+
+
+def load_fda():
+    '''Load FDA tables into db'''
+
+    z = zipfile.ZipFile(
+        fda.utils.get_dataset(handler=io.BytesIO)
+    )
+    product = pd.read_csv(z.open('product.txt'),sep='\t',dtype=object,header=0,encoding='cp1252')
+    package = pd.read_csv(z.open('package.txt'),sep='\t',dtype=object,header=0,encoding='cp1252')
+    sql_create_table('product',product)
+    sql_create_table('package',package)
+    del product
+    del package
+
+    #deletes FDA ZIP
+    del z
+
+    #NOTE: Rob's python code to join one of these tables with the rxcui_ndc table goes here
+    """
+    rxcui_ndc_string = read_sql_string('rxcui_ndc.sql')
+    rxcui_ndc = db_query(rxcui_ndc_string)
+    sql_create_table('rxcui_ndc', rxcui_ndc)
+    del rxcui_ndc
+    """
+
+
+    #TEST!!!!!!!!!!!!!!!! reads record count from created database
+    product = db_query("Select count(*) AS records from product limit 1")
+    print('DB table product has {0} records'.format(product['records'].iloc[0]))
+
+    package = db_query("Select count(*) AS records from package limit 1")
+    print('DB table package has {0} records'.format(package['records'].iloc[0]))
diff --git a/src/mdt/run_mdt.py b/src/mdt/run_mdt.py
index 15f6697..cbafb2a 100644
--- a/src/mdt/run_mdt.py
+++ b/src/mdt/run_mdt.py
@@ -1,6 +1,6 @@
 import sys
 from pathlib import Path
-from mdt.database import load_rxnorm, load_meps
+from mdt.database import load_rxnorm, load_meps, load_fda
 from mdt import rxnorm
 from mdt.utils import (
     rxcui_ndc_matcher,
@@ -18,6 +18,7 @@ def main(rxclass_id, rxclass_rela):
     if not (Path.cwd() / 'data' / 'MDT.db').exists():
         load_rxnorm()
         load_meps()
+        load_fda()
 
     #Call RxClass FindClassesById API to get class info (name primarily) of the specified class
     rxclass_response = rxnorm.utils.rxapi_get_requestor(

From 5f72506a44655f3ac3f320c6bd6e2535f1fa46ec Mon Sep 17 00:00:00 2001
From: Yevgeny Bulochnik <yevgeny.bulochnik@gmail.com>
Date: Sun, 16 May 2021 17:54:48 -0500
Subject: [PATCH 45/46] Dev setup in readme

---
 README.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/README.md b/README.md
index 0fc6b50..8d1839b 100644
--- a/README.md
+++ b/README.md
@@ -85,3 +85,11 @@ src/
 │  │  │  ├─ hypothyroidism.json
 │  │  │  ├─ ...
 ```
+
+## Contribution Guide
+1. Setup a venv with `python -m venv venv`, this will create a a directory called venv in your current working directory
+2. Activate your venv with `source venv/bin/activate` or on windows `venv/Scripts/Activate`
+3. Install MDT with `pip install -e .`, this sets up mdt as an installed editable package
+4. Run MDT with `python -m mdt.run_mdt D007037 may_treat`
+    - `run_mdt` takes two system args the rxclass_id and rxclass_rela these must be specified
+    - the initial run of `run_mdt` will download all necessary files and build the database in `data/` in the current working directory

From cc1154d98327c5a521578e114d31fb8c9abaf43a Mon Sep 17 00:00:00 2001
From: kristentaytok <kristentaytok@gmail.com>
Date: Sun, 16 May 2021 22:28:35 -0700
Subject: [PATCH 46/46] #30 adding marketyears to the generate_module function

Taking Eugene's restructure branch (from PR #59), this updates the following:
1. Added cleaned version of Rob's code to database.by --> creates an ingredient_rxcui_year table & a product_rxcui_year table, for their respective distributions in generate_module
2. added 'year' column to the generate_module dataframes/CSV files.
3. fixed default_probability typo in utils.py (if idx == 1 --> changed to if idx == 0)
---
 src/mdt/database.py | 60 +++++++++++++++++++++++++++++++++++++++------
 src/mdt/utils.py    | 40 +++++++++++++++++++-----------
 2 files changed, 79 insertions(+), 21 deletions(-)

diff --git a/src/mdt/database.py b/src/mdt/database.py
index dc2266a..3e216f9 100644
--- a/src/mdt/database.py
+++ b/src/mdt/database.py
@@ -1,9 +1,10 @@
-from . import rxnorm, meps, fda
+from mdt import rxnorm, meps, fda
 from pathlib import Path
 import zipfile
 import io
 import sqlite3
 import pandas as pd
+from datetime import datetime
 
 
 def to_data():
@@ -152,28 +153,73 @@ def load_fda():
     z = zipfile.ZipFile(
         fda.utils.get_dataset(handler=io.BytesIO)
     )
+
+    #moves FDA files to sqlite database by reading as dataframes
     product = pd.read_csv(z.open('product.txt'),sep='\t',dtype=object,header=0,encoding='cp1252')
     package = pd.read_csv(z.open('package.txt'),sep='\t',dtype=object,header=0,encoding='cp1252')
     sql_create_table('product',product)
     sql_create_table('package',package)
-    del product
-    del package
+
 
     #deletes FDA ZIP
     del z
 
-    #NOTE: Rob's python code to join one of these tables with the rxcui_ndc table goes here
-    """
+
+
+    #join product table with the rxcui_ndc table
     rxcui_ndc_string = read_sql_string('rxcui_ndc.sql')
     rxcui_ndc = db_query(rxcui_ndc_string)
     sql_create_table('rxcui_ndc', rxcui_ndc)
-    del rxcui_ndc
-    """
 
 
+    product['PRODUCTNDC'] = product['PRODUCTNDC'].str.replace('-', '').str.zfill(9) 
+    rxcui_ndc['medication_ndc'] = rxcui_ndc['medication_ndc'].astype(str).str.zfill(9)  
+    product_rxcui = product.merge(rxcui_ndc, left_on = 'PRODUCTNDC', right_on = rxcui_ndc['medication_ndc'].str.slice(start=0,stop=9), how = 'left')
+
+
+    #extract year from startmarketingdate & endmarketingdate
+    #fill NULL endmarketingyear with current year 
+    product_rxcui['STARTMARKETINGYEAR'] = product_rxcui['STARTMARKETINGDATE'].str.slice(start=0, stop=4).astype(int)
+    product_rxcui['ENDMARKETINGYEAR'] = product_rxcui['ENDMARKETINGDATE'].str.slice(start=0, stop=4)
+    product_rxcui['ENDMARKETINGYEAR'] = product_rxcui['ENDMARKETINGYEAR'].fillna(datetime.now().year)
+    product_rxcui['ENDMARKETINGYEAR'] = product_rxcui['ENDMARKETINGYEAR'].astype(int)
+    product_rxcui = product_rxcui[['medication_ingredient_rxcui', 'medication_ingredient_name', 'medication_product_rxcui',
+    'medication_product_name', 'STARTMARKETINGYEAR', 'ENDMARKETINGYEAR']]
+
+    med_marketing_year_dict = {}
+    med_state_level_list = ['medication_ingredient', 'medication_product']
+
+    #create a dictionary of df's (one for ingredient, other for product) that contains a range of years that each rxcui was available o nthe market
+    def med_marketing_year(med_state_level_list):
+        for med_state_level in med_state_level_list:
+            #takes MIN startmarketingdate and MAX endmarketingdate for each rxcui
+            med_marketing_year_dict[med_state_level+'_max_marketingyear_range'] = product_rxcui.groupby([med_state_level+'_rxcui', med_state_level+'_name']).agg({'STARTMARKETINGYEAR': 'min', 'ENDMARKETINGYEAR': 'max'}).reset_index()
+
+            #creates a row for each year between startmarketingdate and endmarketingdate for each rxcui
+            zipped = zip(med_marketing_year_dict[med_state_level+'_max_marketingyear_range'][med_state_level+'_rxcui'], med_marketing_year_dict[med_state_level+'_max_marketingyear_range']['STARTMARKETINGYEAR'], med_marketing_year_dict[med_state_level+'_max_marketingyear_range']['ENDMARKETINGYEAR'])
+            med_marketing_year_dict[med_state_level+'_rxcui_years'] = pd.DataFrame([(i, y) for i, s, e in zipped for y in range(s, e+1)],
+                            columns=[med_state_level+'_rxcui','year'])
+            sql_create_table(med_state_level+'_rxcui_years',med_marketing_year_dict[med_state_level+'_rxcui_years'])
+            print(med_state_level+'_rxcui_years')
+            
+    med_marketing_year(med_state_level_list)
+
+    #deletes other dataframes
+    del product
+    del package
+    del rxcui_ndc
+    del medication_ingredient_rxcui_years
+    del medication_product_rxcui_years
+
     #TEST!!!!!!!!!!!!!!!! reads record count from created database
     product = db_query("Select count(*) AS records from product limit 1")
     print('DB table product has {0} records'.format(product['records'].iloc[0]))
 
     package = db_query("Select count(*) AS records from package limit 1")
     print('DB table package has {0} records'.format(package['records'].iloc[0]))
+
+    medication_product_rxcui_years = db_query("Select count(*) AS records from medication_product_rxcui_years limit 1")
+    print('DB table medication_product_rxcui_years has {0} records'.format(medication_product_rxcui_years['records'].iloc[0]))
+
+    medication_ingredient_rxcui_years = db_query("Select count(*) AS records from medication_ingredient_rxcui_years limit 1")
+    print('DB table medication_ingredient_rxcui_years has {0} records'.format(medication_ingredient_rxcui_years['records'].iloc[0]))
\ No newline at end of file
diff --git a/src/mdt/utils.py b/src/mdt/utils.py
index 662f62c..367edbe 100644
--- a/src/mdt/utils.py
+++ b/src/mdt/utils.py
@@ -117,9 +117,16 @@ def generate_module(rxcui_ndc_df, rxclass_name):
     #Read in MEPS Reference table
     meps_reference = db_query(meps.utils.get_sql('meps_reference.sql'))
 
+     #Read in FDA Ingredient-RxCUI-Years Reference table (for years that a given ingredient was available on the market)
+    ingredient_rxcui_years = db_query('SELECT * FROM medication_ingredient_rxcui_years')
+
+     #Read in FDA Product-RxCUI-Years Reference table (for years that a given product was available on the market)
+    product_rxcui_years = db_query('SELECT * FROM medication_product_rxcui_years')
+
     #Join MEPS to filtered rxcui_ndc dataframe (rxcui_list)
     meps_rxcui = meps_reference.astype(str).merge(rxcui_ndc_df.astype(str)[['medication_ingredient_name', 'medication_ingredient_rxcui','medication_product_name', 'medication_product_rxcui', 'medication_ndc']], how = 'inner', left_on = 'RXNDC', right_on = 'medication_ndc')
 
+
     #Optional: Age range join - can be customized in the mdt_config.json file
     #groupby_demographic_variable: must be either an empty list [] or list of patient demographics (e.g., age, gender, state) - based on user inputs in the mdt_config.json file
 
@@ -162,20 +169,23 @@ def generate_module(rxcui_ndc_df, rxclass_name):
 
     filename = rxclass_name + '_ingredient_distrib'
     #1
-    dcp_dict['patient_count_ingredient'] = meps_rxcui[['medication_ingredient_name',  'medication_ingredient_rxcui', 'person_weight', 'DUPERSID']+groupby_demographic_variables].groupby(['medication_ingredient_name',  'medication_ingredient_rxcui', 'person_weight']+groupby_demographic_variables)['DUPERSID'].nunique()
+    #Join MEPS to ingredient_rxcui_years dataframe (rxcuis_by_fda_marketingdates)
+    meps_rxcui_ingred_years = meps_rxcui.astype(str).merge(ingredient_rxcui_years.astype(str)[['medication_ingredient_rxcui', 'year']], how = 'inner', on = 'medication_ingredient_rxcui')
+    dcp_dict['patient_count_ingredient'] = meps_rxcui_ingred_years[['medication_ingredient_name',  'medication_ingredient_rxcui', 'year', 'person_weight', 'DUPERSID']+groupby_demographic_variables].groupby(['medication_ingredient_name',  'medication_ingredient_rxcui', 'year',  'person_weight']+groupby_demographic_variables)['DUPERSID'].nunique()
     dcp_df = pd.DataFrame(dcp_dict['patient_count_ingredient']).reset_index()
     #2
     dcp_df['weighted_patient_count_ingredient'] = dcp_df['person_weight'].astype(float)*dcp_df['DUPERSID']
     #3
-    dcp_dict['patients_by_demographics_ingredient'] = dcp_df.groupby(['medication_ingredient_name']+groupby_demographic_variables)['weighted_patient_count_ingredient'].sum()
+    dcp_dict['patients_by_demographics_ingredient'] = dcp_df.groupby(['medication_ingredient_name', 'year']+groupby_demographic_variables)['weighted_patient_count_ingredient'].sum()
     dcp_demographic_df = pd.DataFrame(dcp_dict['patients_by_demographics_ingredient']).reset_index()
     #4
     if len(groupby_demographic_variables) > 0:
-        dcp_demographictotal_df = pd.merge(dcp_demographic_df,  dcp_demographic_df.groupby(groupby_demographic_variables)['weighted_patient_count_ingredient'].sum(), how = 'inner', left_on = groupby_demographic_variables, right_index=True, suffixes = ('_demographic', '_total'))
+        dcp_demographictotal_df = pd.merge(dcp_demographic_df,  dcp_demographic_df.groupby(groupby_demographic_variables+['year'])['weighted_patient_count_ingredient'].sum(), how = 'inner', left_on = groupby_demographic_variables+['year'], right_index=True, suffixes = ('_demographic', '_total'))
     else:
-        dcp_demographictotal_df = dcp_demographic_df
-        dcp_demographictotal_df['weighted_patient_count_ingredient_demographic'] = dcp_demographic_df['weighted_patient_count_ingredient']
-        dcp_demographictotal_df['weighted_patient_count_ingredient_total'] = dcp_demographic_df['weighted_patient_count_ingredient'].sum()
+        # dcp_demographictotal_df = dcp_demographic_df
+        # dcp_demographictotal_df['weighted_patient_count_ingredient_demographic'] = dcp_demographic_df['weighted_patient_count_ingredient']
+        # dcp_demographictotal_df['weighted_patient_count_ingredient_total'] = dcp_demographic_df['weighted_patient_count_ingredient'].sum()
+        dcp_demographictotal_df = pd.merge(dcp_demographic_df,  dcp_demographic_df.groupby('year')['weighted_patient_count_ingredient'].sum(), how = 'inner', left_on = 'year', right_index=True, suffixes = ('_demographic', '_total'))
     #5
     dcp_demographictotal_df['percent_ingredient_patients'] = round(dcp_demographictotal_df['weighted_patient_count_ingredient_demographic']/dcp_demographictotal_df['weighted_patient_count_ingredient_total'], 3)
     #6 TODO: change this column to medication_product_state_name(?)
@@ -199,9 +209,9 @@ def generate_module(rxcui_ndc_df, rxclass_name):
     #7
     dcp_dict['percent_ingredient_patients'] = dcp_demographictotal_df
     if len(groupby_demographic_variables) > 0:
-        dcp_dict['percent_ingredient_patients'] = dcp_dict['percent_ingredient_patients'].reset_index().pivot(index= groupby_demographic_variables, columns = 'medication_ingredient_name', values='percent_ingredient_patients').reset_index()
+        dcp_dict['percent_ingredient_patients'] = dcp_dict['percent_ingredient_patients'].reset_index().pivot(index= groupby_demographic_variables+['year'], columns = 'medication_ingredient_name', values='percent_ingredient_patients').reset_index()
     else:
-        dcp_dict['percent_ingredient_patients'] = dcp_dict['percent_ingredient_patients'][['medication_ingredient_name', 'percent_ingredient_patients']].set_index('medication_ingredient_name').T
+        dcp_dict['percent_ingredient_patients'] = dcp_dict['percent_ingredient_patients'][['medication_ingredient_name', 'percent_ingredient_patients', 'year']].set_index('medication_ingredient_name').T
         
     #Fill NULLs and save as CSV
     dcp_dict['percent_ingredient_patients'].fillna(0, inplace=True)
@@ -216,17 +226,19 @@ def generate_module(rxcui_ndc_df, rxclass_name):
     for ingred_name in medication_ingredient_list:
         filename = rxclass_name + '_product_' + ingred_name + '_distrib'
         #0
-        meps_rxcui_ingred = meps_rxcui[meps_rxcui['medication_ingredient_name']==ingred_name][['medication_product_name',  'medication_product_rxcui', 'medication_ingredient_name',  'medication_ingredient_rxcui', 'person_weight', 'DUPERSID']+groupby_demographic_variables]
+        #Join MEPS to product_rxcui_years dataframe (rxcuis_by_fda_marketingdates)
+        meps_rxcui_prod_years = meps_rxcui.astype(str).merge(product_rxcui_years.astype(str)[['medication_product_rxcui', 'year']], how = 'inner', on = 'medication_product_rxcui')
+        meps_rxcui_ingred = meps_rxcui_prod_years[meps_rxcui_prod_years['medication_ingredient_name']==ingred_name][['medication_product_name',  'medication_product_rxcui', 'medication_ingredient_name',  'medication_ingredient_rxcui', 'year', 'person_weight', 'DUPERSID']+groupby_demographic_variables]
         #1
-        dcp_dict['patient_count_product'] = meps_rxcui_ingred.groupby(['medication_product_name',  'medication_product_rxcui',  'medication_ingredient_name',  'medication_ingredient_rxcui', 'person_weight']+groupby_demographic_variables)['DUPERSID'].nunique()
+        dcp_dict['patient_count_product'] = meps_rxcui_ingred.groupby(['medication_product_name',  'medication_product_rxcui',  'medication_ingredient_name',  'medication_ingredient_rxcui', 'year', 'person_weight']+groupby_demographic_variables)['DUPERSID'].nunique()
         dcp_df = pd.DataFrame(dcp_dict['patient_count_product']).reset_index()
         #2
         dcp_df['weighted_patient_count_product'] = dcp_df['person_weight'].astype(float)*dcp_df['DUPERSID']
         #3
-        dcp_dict['patients_by_demographics_product'] = dcp_df.groupby(['medication_product_name', 'medication_ingredient_name']+groupby_demographic_variables)['weighted_patient_count_product'].sum()
+        dcp_dict['patients_by_demographics_product'] = dcp_df.groupby(['medication_product_name', 'medication_ingredient_name', 'year']+groupby_demographic_variables)['weighted_patient_count_product'].sum()
         dcp_demographic_df = pd.DataFrame(dcp_dict['patients_by_demographics_product']).reset_index()
         #4
-        dcp_demographictotal_df = pd.merge(dcp_demographic_df,  dcp_demographic_df.groupby(['medication_ingredient_name']+groupby_demographic_variables)['weighted_patient_count_product'].sum(), how = 'inner', left_on = ['medication_ingredient_name']+groupby_demographic_variables, right_index=True, suffixes = ('_demographic', '_total'))
+        dcp_demographictotal_df = pd.merge(dcp_demographic_df,  dcp_demographic_df.groupby(['medication_ingredient_name', 'year']+groupby_demographic_variables)['weighted_patient_count_product'].sum(), how = 'inner', left_on = ['medication_ingredient_name', 'year']+groupby_demographic_variables, right_index=True, suffixes = ('_demographic', '_total'))
         #5
         dcp_demographictotal_df['percent_product_patients'] = round(dcp_demographictotal_df['weighted_patient_count_product_demographic']/dcp_demographictotal_df['weighted_patient_count_product_total'], 3)
         #6 TODO: change this column to medication_product_state_name or medication_product_transition_name(?)
@@ -250,9 +262,9 @@ def generate_module(rxcui_ndc_df, rxclass_name):
         #7
         dcp_dict['percent_product_patients'] = dcp_demographictotal_df
         if len(groupby_demographic_variables) > 0:
-            dcp_dict['percent_product_patients'] = dcp_dict['percent_product_patients'].reset_index().pivot(index= groupby_demographic_variables, columns = 'medication_product_name', values='percent_product_patients').reset_index()
+            dcp_dict['percent_product_patients'] = dcp_dict['percent_product_patients'].reset_index().pivot(index= groupby_demographic_variables+['year'], columns = 'medication_product_name', values='percent_product_patients').reset_index()
         else:
-            dcp_dict['percent_product_patients'] = dcp_dict['percent_product_patients'][['medication_product_name', 'percent_product_patients']].set_index('medication_product_name').T
+            dcp_dict['percent_product_patients'] = dcp_dict['percent_product_patients'][['medication_product_name', 'percent_product_patients', 'year']].set_index('medication_product_name').T
         
         #Fill NULLs and save as CSV 
         dcp_dict['percent_product_patients'].fillna(0, inplace=True)