\(\newcommand{\B}[1]{ {\bf #1} }\) \(\newcommand{\R}[1]{ {\rm #1} }\)
relrisk¶
View page sourceExample Fitting Relative Risk Data¶
Nodes¶
The following is a diagram of the node tree for this example (the root_node is n0):
n0
/-----/\-----\
n1 n2
/ \ / \
n3 n4 n5 n6
node_table = [
{ 'name':'n0', 'parent':'' },
{ 'name':'n1', 'parent':'n0' },
{ 'name':'n2', 'parent':'n0' },
{ 'name':'n3', 'parent':'n1' },
{ 'name':'n4', 'parent':'n1' },
{ 'name':'n5', 'parent':'n2' },
{ 'name':'n6', 'parent':'n2' },
]
Discussion¶
This example is interesting because it gives a prefect fit to the data for all nodes except node n0. This is because there is two levels of random effects for node n0, one level of random effects for node n1 and n2, and no random effects for nodes n3, n4, n5, n6.
relative_tolerance¶
This is the relative tolerance that we will use when checking that the results are correct:
relative_tolerance = 1e-14
The fixed effect values for nodes n1 through n6 are checked to see that they are correct to this tolerance. The random effects for node n3, corresponding to the fit of node n1, is also checked. Note that all the random effects are zero except for the omega constraints. In addition, the leaf nodes n3, n4, n5, and n6 do not have any omega random effects.
fit_goal_set¶
The fit_goal_set is the leaf nodes:
fit_goal_set = { 'n3', 'n4', 'n5', 'n6' }
rate_true¶
The true value of the rates (for this example) are:
def rate_true(rate_name, node_name) :
iota_true = 1e-3
rho_true = 0.0
chi_true = 1e-1
omega_true = {
'n3' : 1e-2,
'n4' : 2e-2,
'n5' : 3e-2,
'n6' : 4e-2
}
omega_true['n1'] = (omega_true['n3'] + omega_true['n4']) / 2
omega_true['n2'] = (omega_true['n5'] + omega_true['n6']) / 2
omega_true['n0'] = (omega_true['n1'] + omega_true['n2']) / 2
#
rate2true = {
'iota' : iota_true ,
'rho' : rho_true ,
'chi' : chi_true ,
'omega' : omega_true[node_name]
}
return rate2true[ rate_name ]
Rate Priors¶
The fitted rates for this example are iota and chi (rho is zero and omega is constrained to have its true value). The parent and child priors each fitted rate is set as follows
prior_table = list()
for rate_name in [ 'iota', 'chi' ] :
rate_0 = rate_true(rate_name, 'n0')
prior_table.append(
{ 'name': f'parent_{rate_name}_prior',
'density': 'uniform',
'lower': rate_0 / 10.0,
'upper': rate_0 * 10.0,
'mean': rate_0 * 2,
}
)
Data Table¶
Each leaf node has two data values, the true value of Sincidence and relrisk for the node.
avgint Table¶
Each leaf node has predictions for three avgint values, Sincidence, mtexcess, and relrisk. The values corresponding to the fit for each leaf node is compared to the truth using check_cascade_node .
Source Code¶
# ----------------------------------------------------------------------------
# imports
# ----------------------------------------------------------------------------
import numpy
import sys
import os
import copy
import multiprocessing
import dismod_at
#
# import at_cascade with a preference current directory version
current_directory = os.getcwd()
if os.path.isfile( current_directory + '/at_cascade/__init__.py' ) :
sys.path.insert(0, current_directory)
import at_cascade
# -----------------------------------------------------------------------------
# global variables
# -----------------------------------------------------------------------------
# BEGIN relative_tolerance
relative_tolerance = 1e-14
# END relative_tolerance
#
# BEGIN fit_goal_set
fit_goal_set = { 'n3', 'n4', 'n5', 'n6' }
# END fit_goal_set
#
# BEGIN option_all_table
max_number_cpu = max(1, multiprocessing.cpu_count() - 1)
option_all = {
'result_dir': 'build/example',
'root_node_name': 'n0',
'refit_split': 'false',
'max_number_cpu': max_number_cpu,
}
option_all['root_database'] = option_all['result_dir'] + '/root.db'
# END option_all_table
#
# ----------------------------------------------------------------------------
# functions
# ----------------------------------------------------------------------------
# BEGIN rate_true
def rate_true(rate_name, node_name) :
iota_true = 1e-3
rho_true = 0.0
chi_true = 1e-1
omega_true = {
'n3' : 1e-2,
'n4' : 2e-2,
'n5' : 3e-2,
'n6' : 4e-2
}
omega_true['n1'] = (omega_true['n3'] + omega_true['n4']) / 2
omega_true['n2'] = (omega_true['n5'] + omega_true['n6']) / 2
omega_true['n0'] = (omega_true['n1'] + omega_true['n2']) / 2
#
rate2true = {
'iota' : iota_true ,
'rho' : rho_true ,
'chi' : chi_true ,
'omega' : omega_true[node_name]
}
return rate2true[ rate_name ]
# END rate_true
# ----------------------------------------------------------------------------
def root_node_db(file_name) :
#
# BEGIN prior_table
prior_table = list()
for rate_name in [ 'iota', 'chi' ] :
rate_0 = rate_true(rate_name, 'n0')
prior_table.append(
{ 'name': f'parent_{rate_name}_prior',
'density': 'uniform',
'lower': rate_0 / 10.0,
'upper': rate_0 * 10.0,
'mean': rate_0 * 2,
}
)
# END prior_table
#
# smooth_table
smooth_table = list()
fun = lambda a, t : ( f'parent_iota_prior', None, None)
smooth_table.append({
'name': f'parent_iota_smooth',
'age_id': [0],
'time_id': [0],
'fun': fun,
})
fun = lambda a, t : ( f'parent_chi_prior', None, None)
smooth_table.append({
'name': f'parent_chi_smooth',
'age_id': [0],
'time_id': [0],
'fun': fun,
})
# BEGIN node_table
node_table = [
{ 'name':'n0', 'parent':'' },
{ 'name':'n1', 'parent':'n0' },
{ 'name':'n2', 'parent':'n0' },
{ 'name':'n3', 'parent':'n1' },
{ 'name':'n4', 'parent':'n1' },
{ 'name':'n5', 'parent':'n2' },
{ 'name':'n6', 'parent':'n2' },
]
# END node_table
#
# rate_table
rate_table = list()
for rate_name in [ 'iota', 'chi' ] :
rate_table.append( {
'name': rate_name,
'parent_smooth': f'parent_{rate_name}_smooth',
'child_smooth': None ,
} )
#
# covariate_table
covariate_table = list()
#
# mulcov_table
mulcov_table = list()
#
# subgroup_table
subgroup_table = [ {'subgroup': 'world', 'group':'world'} ]
#
# integrand_table
integrand_table = [
{ 'name' : 'Sincidence' } ,
{ 'name' : 'mtexcess' } ,
{ 'name' : 'relrisk' } ,
]
#
# avgint_table
avgint_table = list()
for integrand in [ 'Sincidence', 'mtexcess', 'relrisk' ] :
row = {
'node': 'n0',
'subgroup': 'world',
'weight': '',
'time_lower': 2000.0,
'time_upper': 2000.0,
'age_lower': 50.0,
'age_upper': 50.0,
'integrand': integrand,
}
avgint_table.append( copy.copy(row) )
#
# data_table
data_table = list()
leaf_set = fit_goal_set
for integrand in [ 'Sincidence', 'relrisk' ] :
row = {
'subgroup': 'world',
'weight': '',
'time_lower': 2000.0,
'time_upper': 2000.0,
'age_lower': 50.0,
'age_upper': 50.0,
'integrand': integrand,
'density': 'gaussian',
'hold_out': False,
}
for node in leaf_set :
#
# meas_value
iota = rate_true('iota', node)
chi = rate_true('chi', node)
omega = rate_true('omega', node)
relrisk = 1 + chi / omega
if integrand == 'Sincidence' :
meas_value = iota
elif integrand == 'mtexcess' :
meas_value = chi
elif integrand == 'relrisk' :
meas_value = relrisk
#
# row
row['node'] = node
row['meas_value'] = meas_value
row['meas_std'] = meas_value / 10.0
#
# data_table
data_table.append( copy.copy(row) )
#
# age_grid
age_grid = [ 0.0, 100.0 ]
#
# time_grid
time_grid = [ 1980.0, 2020.0 ]
#
# weight table:
weight_table = list()
#
# nslist_table
nslist_table = dict()
#
# option_table
option_table = [
{ 'name':'parent_node_name', 'value':'n0'},
{ 'name':'rate_case', 'value':'iota_pos_rho_zero'},
{ 'name': 'zero_sum_child_rate', 'value':'iota'},
{ 'name':'quasi_fixed', 'value':'false'},
{ 'name':'max_num_iter_fixed', 'value':'50'},
{ 'name':'tolerance_fixed', 'value':relative_tolerance},
]
# ----------------------------------------------------------------------
# create database
dismod_at.create_database(
file_name,
age_grid,
time_grid,
integrand_table,
node_table,
subgroup_table,
weight_table,
covariate_table,
avgint_table,
data_table,
prior_table,
smooth_table,
nslist_table,
rate_table,
mulcov_table,
option_table
)
# ----------------------------------------------------------------------------
# main
# ----------------------------------------------------------------------------
def main() :
# -------------------------------------------------------------------------
# result_dir
result_dir = option_all['result_dir']
at_cascade.empty_directory(result_dir)
#
# Create root.db
root_database = option_all['root_database']
root_node_db(root_database)
#
# omega_grid
connection = dismod_at.create_connection(
root_database, new = False, readonly = True
)
age_table = dismod_at.get_table_dict(connection, 'age')
time_table = dismod_at.get_table_dict(connection, 'time')
age_id_list = list( range( len(age_table) ) )
time_id_list = list( range( len(age_table) ) )
omega_grid = { 'age': age_id_list, 'time' : time_id_list }
connection.close()
#
# omega_data
omega_data = dict()
for node_id in range(7) :
node_name = f'n{node_id}'
omega_data[node_name] = list()
omega_data[node_name].append( list() )
for age_id in omega_grid['age'] :
for time_id in omega_grid['time'] :
age = age_table[age_id]['age']
time = time_table[time_id]['time']
omega = rate_true('omega', node_name)
omega_data[node_name][0].append( omega )
#
# Create all_node.db
all_node_database = f'{result_dir}/all_node.db'
at_cascade.create_all_node_db(
all_node_database = all_node_database,
split_reference_table = None,
node_split_table = None,
option_all = option_all,
omega_grid = omega_grid,
omega_data = omega_data,
)
#
# root_node_dir
root_node_dir = f'{result_dir}/n0'
os.mkdir(root_node_dir)
#
# avgint_table
# This also erases the avgint table from root_database
avgint_table = at_cascade.extract_avgint( root_database )
#
# cascade starting at root node
at_cascade.cascade_root_node(
all_node_database = all_node_database ,
fit_goal_set = fit_goal_set ,
)
#
# check fixed effects
# The parent rate values for all but node n0 should fit exactly because
# the prior is uniform and there is only one level of random effects
for subdir in [ 'n1', 'n2', 'n1/n3', 'n1/n4', 'n2/n5', 'n2/n6' ] :
goal_database = f'{result_dir}/n0/{subdir}/dismod.db'
rate_fun = lambda r, a, t, n, c : rate_true(r, n)
at_cascade.check_cascade_node(
rate_true = rate_fun,
all_node_database = all_node_database,
fit_database = goal_database,
avgint_table = avgint_table,
relative_tolerance = float( numpy.finfo(float).eps * 100.0 ),
)
# -------------------------------------------------------------------------
# Check omega random effects are set correctly
# -------------------------------------------------------------------------
#
# n1_database
# n1 is the parent node for this fit
n1_database = f'{result_dir}/n0/n1/dismod.db'
#
# avgint_table
# n3 is the child node for the predictions
connection = dismod_at.create_connection(
n1_database, new = False, readonly = False
)
for row in avgint_table :
row['node_id'] = 3
tbl_name = 'avgint'
dismod_at.replace_table(connection, tbl_name, avgint_table)
connection.close()
#
# predict
# create predictions for node n3 corresponding to fit for n1
command = [ 'dismod_at', n1_database, 'predict', 'fit_var' ]
dismod_at.system_command_prc(command)
#
# name_table
# for name = predict, integrand, node
fit_or_root = at_cascade.fit_or_root_class(
n1_database, root_database
)
predict_table = fit_or_root.get_table('predict')
integrand_table = fit_or_root.get_table('integrand')
node_table = fit_or_root.get_table('node')
connection.close()
#
for (predict_id, predict_row) in enumerate(predict_table) :
#
# avgint_id
avgint_id = predict_row['avgint_id']
avgint_row = avgint_table[avgint_id]
assert avgint_id == predict_id
#
# node_name
node_id = avgint_row['node_id']
node_name = node_table[node_id]['node_name']
assert node_name == 'n3'
#
# integrand_name
integrand_id = avgint_table[avgint_id]['integrand_id']
integrand_name = integrand_table[integrand_id]['integrand_name']
#
# avg_integrand
avg_integrand = predict_table[avgint_id]['avg_integrand']
#
# check
iota = rate_true('iota', node_name)
chi = rate_true('chi', node_name)
omega = rate_true('omega', node_name)
if integrand_name == 'Sincidence' :
check = iota
elif integrand_name == 'mtexcess' :
check = chi
else :
assert integrand_name == 'relrisk'
check = 1.0 + chi / omega
#
# rel_error
rel_error = 1.0 - avg_integrand / check
if abs(rel_error) > relative_tolerance :
msg = f'subdir = {subdir}, rel_error = {rel_error}'
assert False, msg
#
if __name__ == '__main__' :
main()
print('relrisk: OK')