----------------------------------------------- lines 17-371 of file: example/csv/diphtheria.py ----------------------------------------------- {xrst_begin csv.diphtheria} {xrst_spell const dage dtime mtwith mtexcess mtspecific Sincidence num cv pos meas iter std dtp } Csv Example Simulating, Fitting and Predicting Diphtheria ######################################################### Prior Versus Posterior ********************** This example does a check of priors versus posteriors; see the following function at the end: {xrst_literal # BEGIN_CHECK_SAM_PREDICT # END_CHECK_SAM_PREDICT } Global Variables **************** Global variables besides :ref:`csv.diphtheria@sim_file` and :ref:`csv.diphtheria@fit_file`. Variables that do not appear in a heading are temporaries. child_prior_std_factor ====================== The factor that multiplies the prior standard deviations: {xrst_code py}''' child_prior_std_factor = 2.0 '''{xrst_code} ode_step_size ============= The step size used to approximate the ode solution and to average integrals with respect to age and time. {xrst_code py}''' ode_step_size = 5.0 '''{xrst_code} integrand_list ============== Generate data and predict for the following integrands: {xrst_code py}''' integrand_list = [ 'mtexcess', 'mtspecific', 'mtwith', 'Sincidence' ] '''{xrst_code} age_grid, time_grid =================== Use this age-time grid for values the covariate grid and the parent rage grid. {xrst_code py}''' # age_grid = dict() age_grid['iota'] = [ 0.0, 100.00 ] age_grid['chi'] = [ 0.0 ] age_grid['rho'] = [ 0.0 ] age_grid['all'] = sorted( set( [100.0] + age_grid['iota'] + age_grid['chi'] + age_grid['rho'] ) ) # time_grid = dict() time_grid['iota'] = [ 1980.0, 2025.0 ] time_grid['chi'] = [ 1980.0 ] time_grid['rho'] = [ 1980.0 ] time_grid['all'] = sorted( set( [2025.0] + time_grid['iota'] + time_grid['chi'] + time_grid['rho'] ) ) '''{xrst_code} node_dict ========= Keys are nodes, values are corresponding parent node: {xrst_code py}''' node_dict = { 'n0' : '' , 'n1' : 'n0' , 'n2' : 'n0' , } '''{xrst_code} no_effect_rate_truth ==================== The true values (values used during simulation) for iota, rho, and chi are constant w.r.t age and time: {xrst_code py}''' no_effect_rate_truth = { 'iota' : 1e-6 , 'rho' : 13.0 , 'chi' : 1e-1 , } '''{xrst_code} omega_truth =========== {xrst_code py}''' omega_truth = 0.02 '''{xrst_code} std_random_effects_truth ======================== This is the true standard deviation of the random effects {xrst_code py}''' std_random_effects_truth = 1.0 '''{xrst_code} dtp3_multiplier_truth ===================== This factor multiples the dtp3 covariate to get the reduction in diphtheria incidence due to the vaccine: {xrst_code py}''' dtp3_multiplier_truth = -3.0 '''{xrst_code} sim_file ******** Input CSV files that are placed in the simulate directory: {xrst_code py}''' sim_file = dict() '''{xrst_code} option_sim.csv ============== {xrst_code py}''' sim_file['option_sim.csv'] = \ 'name,value\n' + \ 'float_precision,5\n' + \ 'random_depend_sex,false\n' + \ f'integrand_step_size,{ode_step_size}\n' + \ f'std_random_effects_iota,{std_random_effects_truth}\n' '''{xrst_code} node.csv ======== {xrst_code py}''' sim_file['node.csv'] = \ 'node_name,parent_name\n' for node_name in node_dict : parent_name = node_dict[node_name] sim_file['node.csv'] += f'{node_name},{parent_name}\n' '''{xrst_code} covariate.csv ============= The covariate dtp3 is the fraction of the population that received the diphtheria-tetanus-pertussis vaccine during their first year of life. {xrst_code py}''' sim_file['covariate.csv'] = 'node_name,sex,age,time,omega,dtp3\n' for node_name in node_dict : for sex in [ 'female', 'male' ] : for age in age_grid['all' ] : for time in time_grid['all'] : r = (time - 2020.0) / 100.0 dtp3 = 0.9 * ( 1.0 - r * r ) row = f'{node_name},{sex},{age},{time},{omega_truth},{dtp3}\n' sim_file['covariate.csv'] += row '''{xrst_code} multiplier_sim.csv ================== There are is one covariate multiplier in this example. {xrst_code py}''' sim_file['multiplier_sim.csv'] = \ 'multiplier_id,rate_name,covariate_or_sex,multiplier_truth\n' + \ f'0,iota,dtp3,{dtp3_multiplier_truth}' '''{xrst_code} simulate.csv ============ {xrst_code py}''' header = 'simulate_id,integrand_name,node_name,sex,age_lower,age_upper,' header += 'time_lower,time_upper,meas_std_cv,meas_std_min\n' meas_std_cv = 1.0 std_min = 1e-6 simulate_id = 0 sim_file['simulate.csv'] = header for integrand_name in integrand_list : for node_name in node_dict : for sex in [ 'female', 'male' ] : for age in age_grid['all'] : for time in time_grid['all'] : row = f'{simulate_id},{integrand_name},{node_name},{sex},' row += f'{age},{age},{time},{time},' row += f'{meas_std_cv},{std_min}\n' sim_file['simulate.csv'] += row simulate_id += 1 '''{xrst_code} no_effect_rate.csv ================== The no effect rates are constant, w.r.t age and time, during the simulation. {xrst_code py}''' sim_file['no_effect_rate.csv'] = 'rate_name,age,time,rate_truth\n' for rate_name in no_effect_rate_truth : rate_truth = no_effect_rate_truth[rate_name] sim_file['no_effect_rate.csv'] += f'{rate_name},0.0,0.0,{rate_truth}\n' '''{xrst_code} fit_file ******** Input CSV files that are placed in the fit directory: {xrst_code py}''' fit_file = dict() '''{xrst_code} Copies of Simulation Files ========================== {xrst_code py}''' fit_file['node.csv'] = sim_file['node.csv'] fit_file['covariate.csv'] = sim_file['covariate.csv'] '''{xrst_code} option_fit.csv ============== #. We use ``censor_asymptotic`` to make sure we do not get negative samples for iota and prevalence, which would cause the predictions to fail. #. We are completely ignoring the mtexcess data. It gets set to zero just before the fit, to test ignoring it. #. We are using a large number of samples so that the two realizations of the no effect iota have close sample standard deviation. {xrst_code py}''' fit_file['option_fit.csv'] = \ """name,value max_fit,500 max_fit_parent,1000 sample_method,censor_asymptotic number_sample,1000 max_num_iter_fixed,50 root_node_name,n0 refit_split,false max_abs_effect,4 quasi_fixed,false ode_method,iota_pos_rho_pos balance_sex,false freeze_type,mean child_prior_std_factor_mulcov,1 tolerance_fixed,1e-10 no_ode_ignore,mtexcess hold_out_integrand,mtexcess """ fit_file['option_fit.csv'] += \ f'child_prior_std_factor,{child_prior_std_factor}\n' fit_file['option_fit.csv'] += \ f'ode_step_size,{ode_step_size}\n' '''{xrst_code} option_predict.csv ================== A predict is run using the same directory as the corresponding fit. All of its input files are also inputs for the fit except for the option_predict.csv file. {xrst_code py}''' fit_file['option_predict.csv'] = \ """name,value db2csv,true plot,true float_precision,5 """ '''{xrst_code} fit_goal.csv ============ An empty fit_goal.csv corresponds to fitting n0 an n1 (skipping n2): {xrst_code py}''' fit_file['fit_goal.csv'] = \ """node_name n1 """ '''{xrst_code} prior.csv ========= We are using a uniform for the random effects because we have good data, know the true random effects, and are trying to reproduce it with the fit. (Often one uses a Gaussian prior on the random effects.) We are using a Gaussian for the fixed effects. If we used a uniform for the fixed effects, the child prior standard deviations would not matter. Note that the stand deviation at the root level is 1.0 (which is very large relative to the true rate values). {xrst_code py}''' delta_prior_std = 0.2 fit_file['prior.csv'] = \ 'name,density,mean,std,eta,lower,upper\n' + \ f'delta_prior,log_gaussian,0.0,{delta_prior_std},1e-10,,\n' + \ f'random_effects_prior,uniform,0.0,,,,,\n' for rate_name in no_effect_rate_truth : rate_truth = no_effect_rate_truth[rate_name] if rate_name == 'rho' : lower = rate_truth upper = rate_truth else : lower = rate_truth / 10.0 upper = rate_truth * 10.0 fit_file['prior.csv'] += \ f'prior_{rate_name},gaussian,{rate_truth},1.0,,{lower},{upper}\n' '''{xrst_code} parent_rate.csv =============== The rates are constant during simulation, but not during fitting. {xrst_code py}''' fit_file['parent_rate.csv'] = \ 'rate_name,age,time,value_prior,dage_prior,dtime_prior,const_value\n' for rate_name in no_effect_rate_truth : for age in age_grid[rate_name] : for time in time_grid[rate_name] : row = f'{rate_name},{age},{time},prior_{rate_name},' row += 'delta_prior,delta_prior,\n' fit_file['parent_rate.csv'] += row '''{xrst_code} child_rate.csv ============== {xrst_code py}''' fit_file['child_rate.csv'] = \ """rate_name,value_prior iota,random_effects_prior """ '''{xrst_code} mulcov.csv ========== {xrst_code py}''' fit_file['mulcov.csv'] = \ 'covariate,type,effected,value_prior,const_value\n' + \ f'dtp3,rate_value,iota,,{dtp3_multiplier_truth}\n' '''{xrst_code} predict_integrand.csv ===================== {xrst_code py}''' fit_file['predict_integrand.csv'] = 'integrand_name\n' for integrand_name in integrand_list : fit_file['predict_integrand.csv'] += f'{integrand_name}\n' '''{xrst_code} Rest of Source Code ******************* {xrst_literal BEGIN PYTHON END PYTHON } {xrst_end csv.diphtheria}