absolute_covariates.py¶

absolute_covariates: Python Source Code¶

# ----------------------------------------------------------------------------
# imports
# ----------------------------------------------------------------------------
import math
import sys
import os
import copy
import time
import csv
import random
import shutil
import dismod_at
from math import exp
#
# import at_cascade with a preference current directory version
current_directory = os.getcwd()
if os.path.isfile( current_directory + '/at_cascade/__init__.py' ) :
   sys.path.insert(0, current_directory)
import at_cascade
# -----------------------------------------------------------------------------
# global variables
# -----------------------------------------------------------------------------
# BEGIN_FIT_GOAL
fit_goal_set = { 'n3', 'n4', 'n2' }
fit_goal_table = [  { 'node_id' : 3 }, {'node_id' : 4 }, { 'node_id' : 2 } ]
# END_FIT_GOAL
#
# BEGIN split_reference_table
option_all            = {
   'refit_split':               'true',
   'result_dir':                'build/example',
   'root_node_name':            'n0',
   'root_split_reference_name': 'both',
   'split_covariate_name':      'sex',
   'max_number_cpu':            '1',
}
option_all['root_database'] = option_all['result_dir'] + '/root.db'
split_reference_table = [
   {'split_reference_name': 'female', 'split_reference_value': 1.0},
   {'split_reference_name': 'both',   'split_reference_value': 2.0},
   {'split_reference_name': 'male',   'split_reference_value': 3.0},
]
# END split_reference_table
#
# BEGIN split_index
split_index = 2
# END split_index
#
# BEGIN avg_income
avg_income = dict()
leaf_node_set     = { 3, 4, 5, 6 }
for node_id in leaf_node_set :
   node_name = 'n' + str(node_id)
   avg_income[node_name] = [ 1.0 - node_id / 10.0, 1.0, 1.0 + node_id / 10.0 ]
# child_list
# children of node 0, 1, 2 in that order
child_list = [ (1,2), (3,4), (5,6) ]
for node_id in [2, 1, 0] :
   avg_list = list()
   for split_reference_id in range(3) :
      avg = 0.0
      for child_id in child_list[node_id] :
         child_name = 'n' + str(child_id)
         avg += avg_income[child_name][split_reference_id]
      avg = avg / len( child_list[node_id] )
      avg_list.append( avg )
   node_name = 'n' + str(node_id)
   #
   avg_income[node_name] = avg_list
# END avg_income
#
# BEGIN alpha_true
alpha_true = {'vaccine': -0.3,  'income': -0.2}
# END alpha_true
#
# BEGIN split_reference_list
split_reference_list = list()
for row in split_reference_table :
   split_reference_list.append( row['split_reference_value'] )
# END split_reference_list
#
# BEGIN_1 absolute_covariates
option_all['absolute_covariates'] = 'vaccine'
# END_1 absolute_covariates
# ----------------------------------------------------------------------------
# functions
# ----------------------------------------------------------------------------
# BEGIN rate_true
def rate_true(rate, a, t, n, c) :
   sex     = c[0]
   vaccine = c[1]
   income  = c[2]
   r_0     = avg_income['n0'][split_index]
   effect  = alpha_true['income']*(income - r_0)
   effect += alpha_true['vaccine'] * vaccine
   if rate == 'iota' :
      return 1e-2 * exp(effect)
   if rate == 'omega' :
      return 2e-2 * exp(effect)
   return 0.0
# END rate_true
# ----------------------------------------------------------------------------
def root_node_db(file_name) :
   #
   # iota_n0
   sex     = split_reference_list[split_index]
   vaccine = 0.0
   income  = avg_income['n0'][split_index]
   covariate_list = [ sex, vaccine, income ]
   iota_n0        = rate_true('iota', None, None, None, covariate_list)
   # END iota_50
   #
   # prior_table
   prior_table = list()
   prior_table = [
      # BEGIN parent_value_prior
      {   'name':    'parent_value_prior',
         'density': 'gaussian',
         'lower':   iota_n0 / 10.0,
         'upper':   iota_n0 * 10.0,
         'mean':    iota_n0 ,
         'std':     iota_n0 * 10.0,
         'eta':     iota_n0 * 1e-3
      },
      # END parent_value_prior
      # BEGIN alpha_value_prior
      {
         'name':    'alpha_vaccine_value_prior',
         'density': 'gaussian',
         'lower':   - 10 * abs(alpha_true['vaccine']),
         'upper':   + 10 * abs(alpha_true['vaccine']),
         'std':     + 10 * abs(alpha_true['vaccine']),
         'mean':    0.0,
      },{
         'name':    'alpha_income_value_prior',
         'density': 'gaussian',
         'lower':   - 10 * abs(alpha_true['income']),
         'upper':   + 10 * abs(alpha_true['income']),
         'std':     + 10 * abs(alpha_true['income']),
         'mean':    0.0,
      }
      # END alpha_value_prior
   ]
   #
   # smooth_table
   smooth_table = list()
   #
   # parent_smooth
   fun = lambda a, t : ('parent_value_prior', None, None)
   smooth_table.append({
      'name':       'parent_smooth',
      'age_id':     [0],
      'time_id':    [0],
      'fun':        fun,
   })
   #
   # alpha_vaccine_smooth
   fun = lambda a, t : ('alpha_vaccine_value_prior', None, None)
   smooth_table.append({
      'name':       'alpha_vaccine_smooth',
      'age_id':     [0],
      'time_id':    [0],
      'fun':        fun,
   })
   #
   # alpha_income_smooth
   fun = lambda a, t : ('alpha_income_value_prior', None, None)
   smooth_table.append({
      'name':       'alpha_income_smooth',
      'age_id':     [0],
      'time_id':    [0],
      'fun':        fun,
   })
   #
   # node_table
   node_table = [
      { 'name':'n0',        'parent':''   },
      { 'name':'n1',        'parent':'n0' },
      { 'name':'n2',        'parent':'n0' },
      { 'name':'n3',        'parent':'n1' },
      { 'name':'n4',        'parent':'n1' },
      { 'name':'n5',        'parent':'n2' },
      { 'name':'n6',        'parent':'n2' },
   ]
   #
   # rate_table
   rate_table = [ {
      'name':           'iota',
      'parent_smooth':  'parent_smooth',
      'child_smooth':   None ,
   } ]
   #
   # covariate_table
   covariate_table = list()
   covariate_table = [
      {
      'name':     'sex',
      'reference': split_reference_list[split_index],
      },{
      'name':     'vaccine',
      'reference': 0.0,      # 0 for no vaccine, 1 for yes vaccine
      },{
      'name':     'income',
      'reference': avg_income['n0'][split_index],
      }
   ]
   #
   # mulcov_table
   mulcov_table = [
      {
      # alpha_vaccine
      'covariate':  'vaccine',
      'type':       'rate_value',
      'effected':   'iota',
      'group':      'world',
      'smooth':     'alpha_vaccine_smooth',
      },{
      # alpha_income
      'covariate':  'income',
      'type':       'rate_value',
      'effected':   'iota',
      'group':      'world',
      'smooth':     'alpha_income_smooth',
      }
   ]
   #
   # subgroup_table
   subgroup_table = [ {'subgroup': 'world', 'group':'world'} ]
   #
   # integrand_table
   integrand_table = [ {'name':'Sincidence'} ]
   for mulcov_id in range( len(mulcov_table) ) :
      integrand_table.append( { 'name': f'mulcov_{mulcov_id}' } )
   #
   # avgint_table
   avgint_table = list()
   row = {
      'node':         'n0',
      'subgroup':     'world',
      'weight':       '',
      'time_lower':   2000.0,
      'time_upper':   2000.0,
      'age_lower':    50.0,
      'age_upper':    50.0,
      'sex':          None,
      'vaccine':      None,
      'income':       None,
      'integrand':    'Sincidence',
   }
   avgint_table.append( copy.copy(row) )
   #
   # data_table
   data_table  = list()
   leaf_set    = { 'n3', 'n4', 'n5', 'n6' }
   row = {
      'subgroup':     'world',
      'weight':       '',
      'time_lower':   2000.0,
      'time_upper':   2000.0,
      'age_lower':      50.0,
      'age_upper':      50.0,
      'integrand':    'Sincidence',
      'density':      'gaussian',
      'hold_out':     False,
   }
   for node in leaf_set :
      sex        = split_reference_list[split_index]
      income     = avg_income[node][split_index]
      for vaccine in [ 0.0, 1.0 ] :
         covariate_list = [ None, vaccine, income ]
         meas_value = rate_true('iota', None, None, None, covariate_list)
         #
         row['node']       = node
         row['meas_value'] = meas_value
         row['sex']        = sex
         row['vaccine']    = vaccine
         row['income']     = income
         row['meas_std']   = meas_value / 10.0
         data_table.append( copy.copy(row) )
   #
   # age_grid
   age_grid = [ 0.0, 100.0 ]
   #
   # time_grid
   time_grid = [ 1980.0, 2020.0 ]
   #
   # weight table:
   weight_table = list()
   #
   # nslist_table
   nslist_table = dict()
   #
   # option_table
   option_table = [
      { 'name':'parent_node_name',      'value':'n0'},
      { 'name':'rate_case',             'value':'iota_pos_rho_zero'},
      { 'name': 'zero_sum_child_rate',  'value':'iota'},
      { 'name':'quasi_fixed',           'value':'false'},
      { 'name':'max_num_iter_fixed',    'value':'50'},
      { 'name':'tolerance_fixed',       'value':'1e-8'},
   ]
   # ----------------------------------------------------------------------
   # create database
   dismod_at.create_database(
      file_name,
      age_grid,
      time_grid,
      integrand_table,
      node_table,
      subgroup_table,
      weight_table,
      covariate_table,
      avgint_table,
      data_table,
      prior_table,
      smooth_table,
      nslist_table,
      rate_table,
      mulcov_table,
      option_table
   )
# ----------------------------------------------------------------------------
# main
# ----------------------------------------------------------------------------
def main() :
   # -------------------------------------------------------------------------
   # result_dir
   result_dir = option_all['result_dir']
   at_cascade.empty_directory(result_dir)
   #
   # Create root.db
   root_database       = option_all['root_database']
   root_node_db(root_database)
   #
   # avgint_table
   # This also erases the avgint table from root_database
   avgint_table = at_cascade.extract_avgint( root_database )
   #
   # connection
   connection   = dismod_at.create_connection(
      root_database, new = False, readonly = False
   )
   #
   # omega_grid
   age_table    = dismod_at.get_table_dict(connection, 'age')
   time_table   = dismod_at.get_table_dict(connection, 'time')
   age_id_list  = list( range( len(age_table) ) )
   time_id_list = list( range( len(age_table) ) )
   omega_grid   = { 'age': age_id_list, 'time' : time_id_list }
   #
   # connection
   connection.close()
   #
   # n_split
   n_split = len( split_reference_list )
   #
   # omega_data
   omega_data      = dict()
   for node_name in [ 'n0', 'n1', 'n2', 'n3', 'n4', 'n5', 'n6' ] :
      omega_data[node_name] = list()
      for k in range(n_split) :
         omega_data[node_name].append( list() )
         for age_id in omega_grid['age'] :
            for time_id in omega_grid['time'] :
               age     = age_table[age_id]['age']
               time    = time_table[time_id]['time']
               sex     = split_reference_list[k]
               vaccine = 0.0
               income  = avg_income[node_name][k]
               cov     = [ sex, vaccine, income ]
               omega   = rate_true('omega', None, None, None, cov)
               omega_data[node_name][k].append( omega )
   #
   # Create all_node.db
   all_node_database = f'{result_dir}/all_node.db'
   at_cascade.create_all_node_db(
      all_node_database      = all_node_database,
      split_reference_table  = split_reference_table,
      option_all             = option_all,
      omega_grid             = omega_grid,
      omega_data             = omega_data,
      fit_goal_table         = fit_goal_table,
   )
   #
   # cascade starting at root node
   at_cascade.cascade_root_node(
      all_node_database  = all_node_database ,
      fit_goal_set       = fit_goal_set      ,
   )
   #
   # check results
   for goal_dir in [ 'n0/n1/n3', 'n0/n1/n4', 'n0/n2' ] :
      goal_database = f'{result_dir}/{goal_dir}/dismod.db'
      at_cascade.check_cascade_node(
         rate_true          = rate_true,
         all_node_database  = all_node_database,
         fit_database       = goal_database,
         avgint_table       = avgint_table,
         relative_tolerance = 1e-2
      )
   #
   # check that fits were not run for n5 and n6
   for not_fit_dir in [ 'n0/n2/n5', 'n0/n2/n6' ] :
      assert not os.path.exists( f'{result_dir}/{not_fit_dir}' )
#
if __name__ == '__main__' :
   main()
   print('absolute_covariates: OK')