relrisk

View page source

Example Fitting Relative Risk Data

Nodes

The following is a diagram of the node tree for this example (the root_node is n0):

               n0
       /-----/\-----\
    n1              n2
   /  \            /  \
n3    n4        n5    n6
    node_table = [
        { 'name':'n0',        'parent':''   },
        { 'name':'n1',        'parent':'n0' },
        { 'name':'n2',        'parent':'n0' },
        { 'name':'n3',        'parent':'n1' },
        { 'name':'n4',        'parent':'n1' },
        { 'name':'n5',        'parent':'n2' },
        { 'name':'n6',        'parent':'n2' },
    ]

Discussion

This example is interesting because it gives a prefect fit to the data for all nodes except node n0. This is because there is two levels of random effects for node n0, one level of random effects for node n1 and n2, and no random effects for nodes n3, n4, n5, n6.

relative_tolerance

This is the relative tolerance that we will use when checking that the results are correct:

relative_tolerance = 1e-14

The fixed effect values for nodes n1 through n6 are checked to see that they are correct to this tolerance. The random effects for node n3, corresponding to the fit of node n1, is also checked. Note that all the random effects are zero except for the omega constraints. In addition, the leaf nodes n3, n4, n5, and n6 do not have any omega random effects.

fit_goal_set

The fit_goal_set is the leaf nodes:

fit_goal_set = { 'n3', 'n4', 'n5', 'n6' }

rate_true

The true value of the rates (for this example) are:

def rate_true(rate_name, node_name) :
    iota_true  = 1e-3
    rho_true   = 0.0
    chi_true   = 1e-1
    omega_true = {
        'n3' : 1e-2,
        'n4' : 2e-2,
        'n5' : 3e-2,
        'n6' : 4e-2
    }
    omega_true['n1'] = (omega_true['n3'] + omega_true['n4']) / 2
    omega_true['n2'] = (omega_true['n5'] + omega_true['n6']) / 2
    omega_true['n0'] = (omega_true['n1'] + omega_true['n2']) / 2
    #
    rate2true = {
        'iota'   : iota_true ,
        'rho'    : rho_true  ,
        'chi'    : chi_true  ,
        'omega'  : omega_true[node_name]
    }
    return rate2true[ rate_name ]

Rate Priors

The fitted rates for this example are iota and chi (rho is zero and omega is constrained to have its true value). The parent and child priors each fitted rate is set as follows

    prior_table = list()
    for rate_name in [ 'iota', 'chi' ] :
        rate_0   = rate_true(rate_name, 'n0')
        prior_table.append(
            {   'name':    f'parent_{rate_name}_prior',
                'density': 'uniform',
                'lower':   rate_0 / 10.0,
                'upper':   rate_0 * 10.0,
                'mean':    rate_0 * 2,
            }
        )

Data Table

Each leaf node has two data values, the true value of Sincidence and relrisk for the node.

avgint Table

Each leaf node has predictions for three avgint values, Sincidence, mtexcess, and relrisk. The values corresponding to the fit for each leaf node is compared to the truth using check_cascade_node .

Source Code

# ----------------------------------------------------------------------------
# imports
# ----------------------------------------------------------------------------
import numpy
import sys
import os
import copy
import multiprocessing
import dismod_at
#
# import at_cascade with a preference current directory version
current_directory = os.getcwd()
if os.path.isfile( current_directory + '/at_cascade/__init__.py' ) :
    sys.path.insert(0, current_directory)
import at_cascade
# -----------------------------------------------------------------------------
# global variables
# -----------------------------------------------------------------------------
# BEGIN relative_tolerance
relative_tolerance = 1e-14
# END relative_tolerance
#
# BEGIN fit_goal_set
fit_goal_set = { 'n3', 'n4', 'n5', 'n6' }
# END fit_goal_set
#
# BEGIN option_all_table
max_number_cpu = max(1, multiprocessing.cpu_count() - 1)
option_all            = {
    'result_dir':                 'build/example',
    'root_node_name':             'n0',
    'refit_split':                'false',
    'max_number_cpu':             max_number_cpu,
}
option_all['root_database'] = option_all['result_dir'] + '/root.db'
# END option_all_table
#
# ----------------------------------------------------------------------------
# functions
# ----------------------------------------------------------------------------
# BEGIN rate_true
def rate_true(rate_name, node_name) :
    iota_true  = 1e-3
    rho_true   = 0.0
    chi_true   = 1e-1
    omega_true = {
        'n3' : 1e-2,
        'n4' : 2e-2,
        'n5' : 3e-2,
        'n6' : 4e-2
    }
    omega_true['n1'] = (omega_true['n3'] + omega_true['n4']) / 2
    omega_true['n2'] = (omega_true['n5'] + omega_true['n6']) / 2
    omega_true['n0'] = (omega_true['n1'] + omega_true['n2']) / 2
    #
    rate2true = {
        'iota'   : iota_true ,
        'rho'    : rho_true  ,
        'chi'    : chi_true  ,
        'omega'  : omega_true[node_name]
    }
    return rate2true[ rate_name ]
# END rate_true
# ----------------------------------------------------------------------------
def root_node_db(file_name) :
    #
    # BEGIN prior_table
    prior_table = list()
    for rate_name in [ 'iota', 'chi' ] :
        rate_0   = rate_true(rate_name, 'n0')
        prior_table.append(
            {   'name':    f'parent_{rate_name}_prior',
                'density': 'uniform',
                'lower':   rate_0 / 10.0,
                'upper':   rate_0 * 10.0,
                'mean':    rate_0 * 2,
            }
        )
    # END prior_table
    #
    # smooth_table
    smooth_table = list()
    fun = lambda a, t : ( f'parent_iota_prior', None, None)
    smooth_table.append({
        'name':       f'parent_iota_smooth',
        'age_id':     [0],
        'time_id':    [0],
        'fun':        fun,
    })
    fun = lambda a, t : ( f'parent_chi_prior', None, None)
    smooth_table.append({
        'name':       f'parent_chi_smooth',
        'age_id':     [0],
        'time_id':    [0],
        'fun':        fun,
    })
    # BEGIN node_table
    node_table = [
        { 'name':'n0',        'parent':''   },
        { 'name':'n1',        'parent':'n0' },
        { 'name':'n2',        'parent':'n0' },
        { 'name':'n3',        'parent':'n1' },
        { 'name':'n4',        'parent':'n1' },
        { 'name':'n5',        'parent':'n2' },
        { 'name':'n6',        'parent':'n2' },
    ]
    # END node_table
    #
    # rate_table
    rate_table = list()
    for rate_name in [ 'iota', 'chi' ] :
        rate_table.append( {
            'name':           rate_name,
            'parent_smooth':  f'parent_{rate_name}_smooth',
            'child_smooth':   None ,
        } )
    #
    # covariate_table
    covariate_table = list()
    #
    # mulcov_table
    mulcov_table = list()
    #
    # subgroup_table
    subgroup_table = [ {'subgroup': 'world', 'group':'world'} ]
    #
    # integrand_table
    integrand_table = [
        { 'name' : 'Sincidence' } ,
        { 'name' : 'mtexcess' }   ,
        { 'name' : 'relrisk' }    ,
    ]
    #
    # avgint_table
    avgint_table = list()
    for integrand in [ 'Sincidence', 'mtexcess', 'relrisk' ] :
        row = {
            'node':         'n0',
            'subgroup':     'world',
            'weight':       '',
            'time_lower':   2000.0,
            'time_upper':   2000.0,
            'age_lower':    50.0,
            'age_upper':    50.0,
            'integrand':    integrand,
        }
        avgint_table.append( copy.copy(row) )
    #
    # data_table
    data_table  = list()
    leaf_set    = fit_goal_set
    for integrand in [ 'Sincidence', 'relrisk' ] :
        row = {
            'subgroup':     'world',
            'weight':       '',
            'time_lower':   2000.0,
            'time_upper':   2000.0,
            'age_lower':      50.0,
            'age_upper':      50.0,
            'integrand':    integrand,
            'density':      'gaussian',
            'hold_out':     False,
        }
        for node in leaf_set :
            #
            # meas_value
            iota    = rate_true('iota',  node)
            chi     = rate_true('chi',   node)
            omega   = rate_true('omega', node)
            relrisk = 1 + chi / omega
            if integrand == 'Sincidence' :
                meas_value = iota
            elif integrand == 'mtexcess' :
                meas_value = chi
            elif integrand == 'relrisk' :
                meas_value = relrisk
            #
            # row
            row['node']       = node
            row['meas_value'] = meas_value
            row['meas_std']   = meas_value / 10.0
            #
            # data_table
            data_table.append( copy.copy(row) )
    #
    # age_grid
    age_grid = [ 0.0, 100.0 ]
    #
    # time_grid
    time_grid = [ 1980.0, 2020.0 ]
    #
    # weight table:
    weight_table = list()
    #
    # nslist_table
    nslist_table = dict()
    #
    # option_table
    option_table = [
        { 'name':'parent_node_name',      'value':'n0'},
        { 'name':'rate_case',             'value':'iota_pos_rho_zero'},
        { 'name': 'zero_sum_child_rate',  'value':'iota'},
        { 'name':'quasi_fixed',           'value':'false'},
        { 'name':'max_num_iter_fixed',    'value':'50'},
        { 'name':'tolerance_fixed',       'value':relative_tolerance},
    ]
    # ----------------------------------------------------------------------
    # create database
    dismod_at.create_database(
        file_name,
        age_grid,
        time_grid,
        integrand_table,
        node_table,
        subgroup_table,
        weight_table,
        covariate_table,
        avgint_table,
        data_table,
        prior_table,
        smooth_table,
        nslist_table,
        rate_table,
        mulcov_table,
        option_table
    )
# ----------------------------------------------------------------------------
# main
# ----------------------------------------------------------------------------
def main() :
    # -------------------------------------------------------------------------
    # result_dir
    result_dir = option_all['result_dir']
    at_cascade.empty_directory(result_dir)
    #
    # Create root.db
    root_database       = option_all['root_database']
    root_node_db(root_database)
    #
    # omega_grid
    connection   = dismod_at.create_connection(
        root_database, new = False, readonly = True
    )
    age_table    = dismod_at.get_table_dict(connection, 'age')
    time_table   = dismod_at.get_table_dict(connection, 'time')
    age_id_list  = list( range( len(age_table) ) )
    time_id_list = list( range( len(age_table) ) )
    omega_grid   = { 'age': age_id_list, 'time' : time_id_list }
    connection.close()
    #
    # omega_data
    omega_data      = dict()
    for node_id in range(7) :
        node_name             = f'n{node_id}'
        omega_data[node_name] = list()
        omega_data[node_name].append( list() )
        for age_id in omega_grid['age'] :
            for time_id in omega_grid['time'] :
                age    = age_table[age_id]['age']
                time   = time_table[time_id]['time']
                omega  = rate_true('omega', node_name)
                omega_data[node_name][0].append( omega )
    #
    # Create all_node.db
    all_node_database = f'{result_dir}/all_node.db'
    at_cascade.create_all_node_db(
        all_node_database      = all_node_database,
        split_reference_table  = None,
        node_split_table       = None,
        option_all             = option_all,
        omega_grid             = omega_grid,
        omega_data             = omega_data,
    )
    #
    # root_node_dir
    root_node_dir = f'{result_dir}/n0'
    os.mkdir(root_node_dir)
    #
    # avgint_table
    # This also erases the avgint table from root_database
    avgint_table = at_cascade.extract_avgint( root_database )
    #
    # cascade starting at root node
    at_cascade.cascade_root_node(
        all_node_database  = all_node_database ,
        fit_goal_set       = fit_goal_set      ,
    )
    #
    # check fixed effects
    # The parent rate values for all but node n0 should fit exactly because
    # the prior is uniform and there is only one level of random effects
    for subdir in [ 'n1', 'n2', 'n1/n3', 'n1/n4', 'n2/n5', 'n2/n6' ] :
        goal_database = f'{result_dir}/n0/{subdir}/dismod.db'
        rate_fun      = lambda r, a, t, n, c : rate_true(r, n)
        at_cascade.check_cascade_node(
            rate_true          = rate_fun,
            all_node_database  = all_node_database,
            fit_database       = goal_database,
            avgint_table       = avgint_table,
            relative_tolerance = float( numpy.finfo(float).eps * 100.0 ),
        )
    # -------------------------------------------------------------------------
    # Check omega random effects are set correctly
    # -------------------------------------------------------------------------
    #
    # n1_database
    # n1 is the parent node for this fit
    n1_database = f'{result_dir}/n0/n1/dismod.db'
    #
    # avgint_table
    # n3 is the child node for the predictions
    connection = dismod_at.create_connection(
        n1_database, new = False, readonly = False
    )
    for row in avgint_table :
        row['node_id'] = 3
    tbl_name    = 'avgint'
    dismod_at.replace_table(connection, tbl_name, avgint_table)
    connection.close()
    #
    # predict
    # create predictions for node n3 corresponding to fit for n1
    command = [ 'dismod_at', n1_database, 'predict', 'fit_var' ]
    dismod_at.system_command_prc(command)
    #
    # name_table
    # for name = predict, integrand, node
    fit_or_root     = at_cascade.fit_or_root_class(
        n1_database, root_database
    )
    predict_table   = fit_or_root.get_table('predict')
    integrand_table = fit_or_root.get_table('integrand')
    node_table      = fit_or_root.get_table('node')
    connection.close()
    #
    for (predict_id, predict_row) in enumerate(predict_table) :
        #
        # avgint_id
        avgint_id  = predict_row['avgint_id']
        avgint_row = avgint_table[avgint_id]
        assert avgint_id == predict_id
        #
        # node_name
        node_id   = avgint_row['node_id']
        node_name = node_table[node_id]['node_name']
        assert node_name == 'n3'
        #
        # integrand_name
        integrand_id   = avgint_table[avgint_id]['integrand_id']
        integrand_name = integrand_table[integrand_id]['integrand_name']
        #
        # avg_integrand
        avg_integrand  = predict_table[avgint_id]['avg_integrand']
        #
        # check
        iota  = rate_true('iota', node_name)
        chi   = rate_true('chi',  node_name)
        omega = rate_true('omega', node_name)
        if integrand_name == 'Sincidence' :
            check = iota
        elif integrand_name == 'mtexcess' :
            check = chi
        else :
            assert integrand_name == 'relrisk'
            check = 1.0 + chi / omega
        #
        # rel_error
        rel_error = 1.0 - avg_integrand / check
        if abs(rel_error) > relative_tolerance :
            msg = f'subdir = {subdir}, rel_error = {rel_error}'
            assert False, msg

#
if __name__ == '__main__' :
    main()
    print('relrisk: OK')