csv.binomial

View page source

Example Using Binomial Density

binomial_rate

This function scales binomial data so that its mean is the success rate in the binomial distribution:

def binomial_rate(sample_size, mean_rate) :
   count = numpy.random.binomial(n = sample_size, p = mean_rate)
   rate  = count / sample_size
   return rate

random_seed

We us the current time in seconds to seed the random number generator:

random_seed = str( int( time.time() ) )

age_grid, time_grid

We use one age and time grid for the, covariates, rate model, and the data:

age_grid   = list( range(0, 101, 5) )
time_grid  = [1980.0, 2020.0]

no_effect_iota

The iota value used to simulate the data is constant and equal to no_effect_iota = 0.02

fit_file

This dictionary will contain all the files that are written to the fit_dir : fit_file = dict()

option_fit.csv

fit_file['option_fit.csv']  =  \
'''name,value
hold_out_integrand,Sincidence
refit_split,false
ode_step_size,5.0
quasi_fixed,false
max_num_iter_fixed,50
tolerance_fixed,1e-8
'''
fit_file['option_fit.csv'] += f'random_seed,{random_seed}\n'

option_predict.csv

fit_file['option_predict.csv']  =  \
'''name,value
plot,true
db2csv,true
'''

node.csv

There is just one node, n0, in this example.

fit_file['node.csv'] = \
'''node_name,parent_name
n0,
'''

fit_goal.csv

fit_file['fit_goal.csv'] = \
'''node_name
n0
'''

covariate.csv

There are no covariates and omega is constant in this example.

omega_truth = 0.01
fit_file['covariate.csv'] = 'node_name,sex,age,time,omega\n'
for sex in [ 'female', 'male' ] :
   for age in age_grid :
      for time in time_grid :
         row   = f'n0,{sex},{age},{time},{omega_truth}\n'
         fit_file['covariate.csv'] += row

prior.csv

  1. uniform_eps_0.1 is uniform on the interval [eps,.1] where eps = 1e-6

  2. delta_prior is log Gaussian with mean 0, std 0.03, and eta=1e-5

fit_file['prior.csv'] = \
'''name,density,mean,std,eta,lower,upper
uniform_eps_0.1,uniform,0.02,,,1e-6,0.1
delta_prior,log_gaussian,0.0,0.03,1e-5,,
'''

parent_rate.csv

The parent rate model using the age-time grid and the uniform_eps_0.1 prior

data = 'rate_name,age,time,value_prior,dage_prior,dtime_prior,const_value\n'
for age in age_grid :
   for time in time_grid :
      data += f'iota,{age},{time},uniform_eps_0.1,delta_prior,delta_prior,\n'
fit_file['parent_rate.csv'] = data

predict_integrand.csv

This example will predict for incidence and prevalence using the optimal estimates of iota

fit_file['predict_integrand.csv'] = \
'''integrand_name
Sincidence
prevalence
'''

child_rate.csv

There are no child rates

fit_file['child_rate.csv'] = \
'''rate_name,value_prior
'''

mulcov.csv

There are no covariate multipliers

fit_file['mulcov.csv'] = \
'covariate,type,effected,value_prior,const_value\n'

Rest of Source Code

# fit
def fit(fit_dir) :
   #
   # csv files in fit_file
   for name in fit_file :
      file_name = f'{fit_dir}/{name}'
      file_ptr  = open(file_name, 'w')
      file_ptr.write( fit_file[name] )
      file_ptr.close()
   #
   # row
   row = dict()
   row['node_name']       = 'n0'
   row['integrand_name']  = 'prevalence'
   row['hold_out']        = 0
   row['density_name']    = 'binomial'
   #
   # table
   table   = list()
   data_id = -1
   for sex in [ 'female', 'male' ] :
      for age in age_grid[1:] :
         for time in time_grid :
            data_id += 1
            #
            # row
            row['sex']          = sex
            row['age_lower']    = age
            row['age_upper']    = age
            row['time_lower']   = time
            row['time_upper']   = time
            row['data_id']      = data_id
            relative_age        = age / age_grid[2]
            prevalence          = 1.0 - math.exp( -no_effect_iota * age )
            sample_size         =  relative_age**2 / prevalence
            meas_value          = binomial_rate(sample_size, prevalence)
            row['meas_value']   = meas_value
            row['sample_size']  = sample_size
            row['eta']          = None
            row['nu']           = None
            row['meas_std']     = None
            #
            table.append( copy.copy(row) )
   #
   # data_in.csv
   at_cascade.csv.write_table(
         file_name = f'{fit_dir}/data_in.csv' ,
         table     = table ,
   )
   #
   # fit, predict
   at_cascade.csv.fit(fit_dir)
   at_cascade.csv.predict(fit_dir)
   #
   # fit_predict_dict
   fit_predict_table = at_cascade.csv.read_table(
      file_name = f'{fit_dir}/fit_predict.csv'
   )
   #
   # row
   max_error  = 0.0
   predict_node_set = set()
   for row in fit_predict_table :
      node_name = row['node_name']
      assert node_name == 'n0'
      sex            = row['sex']
      age            = float( row['age'] )
      integrand_name = row['integrand_name']
      if integrand_name == 'Sincidence' :
         node_name     = row['node_name']
         time          = float( row['time'] )
         avg_integrand = float( row['avg_integrand'] )
         iota          = no_effect_iota
         rel_error     = (1.0 - avg_integrand / iota )
         max_error     = max(max_error, abs(rel_error) )
   if max_error > 1e-1 :
      msg  = f'max_error = {max_error}\n'
      msg += 'binomial.py: Relative error is to large (see above)'
      assert False, msg
# -----------------------------------------------------------------------------
# Without this, the mac will try to execute main on each processor.
if __name__ == '__main__' :
   #
   # fit_dir
   fit_dir = 'build/example/csv/fit'
   at_cascade.empty_directory(fit_dir)
   #
   # fit
   fit(fit_dir)
   #
   print('binomial.py: OK')
   sys.exit(0)