Prev
Next
Index->
contents
reference
index
search
external
Up->
dismod_at
user_example
user_sim_log.py
user_sim_log.py
@(@\newcommand{\B}[1]{ {\bf #1} }
\newcommand{\R}[1]{ {\rm #1} }
\newcommand{\W}[1]{ \; #1 \; }@)@This is dismod_at-20221105 documentation: Here is a link to its
current documentation
.
Simulating Data with Log Transformed Distribution
See Also
Example Parameters
Model
Data
Covariate Multiplier
Notation
sigma
delta
Simulations
Source Code
See Also
user_data_sim.py
Example Parameters
The following values are used to simulate the data
number_simulate = 2000
iota_true = 0.01
meas_value_global = iota_true * 1.5
eta_global = iota_true * 1e-3
meas_std_global = meas_value_global * 0.25
gamma_global = meas_value_global * 0.25
Model
The only non-zero model variable for this example is
the rate of incidence for the world which is constant in age and time.
Data
There is only one data point for this example and it's integrand is
Sincidence
.
This data has a log transformed distribution with mean
iota_true
,
offset
eta_global
, and standard deviation
meas_std_global
.
Covariate Multiplier
For this example there is one covariate multiplier.
It is a meas_noise
multiplier
and the corresponding covariate value is one.
Notation
@(@
y
@)@
is the measurement value,
meas_value_global
@(@
\mu
@)@
mean of the data,
iota_true
@(@
\eta
@)@
offset in log transform,
eta_global
@(@
\Delta
@)@
data measurement error,
meas_std_global
@(@
\gamma
@)@
meta regression error,
gamma_global
@(@
n
@)@
number of simulated data values,
number_simulate
@(@
z_i
@)@
i
-th simulate data for @(@
i = 1, \ldots , n
@)@
sigma
The transformed standard deviation
is given by
@[@
\sigma = \log( y + \eta + \Delta ) - \log(y + \eta)
@]@
delta
For this example we use the
add_std_scale_none
option in the definition of the
adjusted standard deviation
@(@
\delta
@)@ ; i.e.,
@[@
\delta = \sigma + \gamma
@]@
Simulations
The offset log transform of each simulated measurement @(@
z_i
@)@ has
the following Gaussian distribution:
@[@
\log( z_i + \eta ) - \log( \mu + \eta ) \sim N(0, \delta^2 )
@]@
Source Code
import sys
import os
import subprocess
import copy
import time
import numpy
test_program = 'example/user/sim_log.py'
if sys. argv[ 0 ] != test_program or len ( sys. argv) != 1 :
usage = 'python3 ' + test_program + '\n'
usage += 'where python3 is the python 3 program on your system\n'
usage += 'and working directory is the dismod_at distribution directory\n'
sys. exit ( usage)
print ( test_program)
#
# import dismod_at
local_dir = os. getcwd () + '/python'
if ( os. path. isdir ( local_dir + '/dismod_at' ) ) :
sys. path. insert ( 0 , local_dir)
import dismod_at
#
# change into the build/example/user directory
if not os. path. exists ( 'build/example/user' ) :
os. makedirs ( 'build/example/user' )
os. chdir ( 'build/example/user' )
#
# random_seed_str
random_seed_str = str ( int ( time. time () ) )
# ----------------------------------------------------------------------------
# run a system command
def system_command ( command) :
print ( ' ' . join ( command) )
flag = subprocess. call ( command )
if flag != 0 :
msg = 'command failed: flag = ' + str ( flag)
msg += ', random_seed = ' + random_seed_str
sys. exit ( msg)
return
# ------------------------------------------------------------------------
def fun_iota_parent ( a, t) :
return ( 'prior_iota_parent' , None, None)
def fun_gamma ( a, t):
return ( 'prior_gamma' , None, None)
# ------------------------------------------------------------------------
def example_db ( file_name) :
# ----------------------------------------------------------------------
# age table:
age_list = [ 0.0 , 100.0 ]
#
# time table:
time_list = [ 1990.0 , 2020.0 ]
#
# integrand table:
integrand_table = [
{ 'name' : 'Sincidence' , 'minimum_meas_cv' : 0.0 }
]
#
# node table:
node_table = [ { 'name' : 'world' , 'parent' : '' } ]
#
# empty tables
weight_table = list ()
covariate_table = list ()
mulcov_table = list ()
avgint_table = list ()
nslist_table = dict ()
#
# covariate table
covariate_table = [
{ 'name' : 'one' , 'reference' : 0.0 }
]
#
# mulcov table
mulcov_table = [ {
'covariate' : 'one' ,
'type' : 'meas_noise' ,
'effected' : 'Sincidence' ,
'group' : 'world' ,
'smooth' : 'smooth_gamma' ,
} ]
# ----------------------------------------------------------------------
# data table:
# values that are the same for all data rows
row = {
'meas_value' : meas_value_global,
'eta' : eta_global,
'weight' : '' ,
'hold_out' : False,
'time_lower' : 2000 .,
'time_upper' : 2000 .,
'age_lower' : 40.0 ,
'age_upper' : 40.0 ,
'subgroup' : 'world' ,
'density' : 'log_gaussian' ,
'meas_std' : meas_std_global,
'node' : 'world' ,
'integrand' : 'Sincidence' ,
'one' : 1.0 ,
}
data_table = [ row ]
# ----------------------------------------------------------------------
# prior_table
# Note that the prior mean is equal to the true value for iota
prior_table = [
{ # prior_iota_parent
'name' : 'prior_iota_parent' ,
'density' : 'uniform' ,
'lower' : iota_true / 100 .,
'upper' : iota_true * 10.0 ,
'mean' : iota_true ,
'std' : None,
'eta' : None
},{
# prior_gamma
'name' : 'prior_gamma' ,
'density' : 'uniform' ,
'lower' : gamma_global,
'upper' : gamma_global,
'mean' : gamma_global,
}
]
# ----------------------------------------------------------------------
# smooth table
age_id = 0
time_id = 0
name = 'smooth_iota_parent'
fun = fun_iota_parent
smooth_table = [
{ 'name' : name, 'age_id' :[ age_id], 'time_id' :[ time_id], 'fun' : fun }
]
name = 'smooth_gamma'
fun = fun_gamma
smooth_table. append (
{ 'name' : name, 'age_id' :[ age_id], 'time_id' :[ time_id], 'fun' : fun }
)
# ----------------------------------------------------------------------
# rate table:
rate_table = [
{ 'name' : 'iota' ,
'parent_smooth' : 'smooth_iota_parent' ,
'child_smooth' : None,
'child_nslist' : None
}
]
# ----------------------------------------------------------------------
# option_table
option_table = [
{ 'name' : 'rate_case' , 'value' : 'iota_pos_rho_zero' },
{ 'name' : 'parent_node_name' , 'value' : 'world' },
{ 'name' : 'random_seed' , 'value' : random_seed_str },
{ 'name' : 'meas_noise_effect' , 'value' : 'add_std_scale_none' },
{ 'name' : 'quasi_fixed' , 'value' : 'false' },
{ 'name' : 'max_num_iter_fixed' , 'value' : '50' },
{ 'name' : 'print_level_fixed' , 'value' : '0' },
{ 'name' : 'tolerance_fixed' , 'value' : '1e-6' },
{ 'name' : 'max_num_iter_random' , 'value' : '100' },
{ 'name' : 'print_level_random' , 'value' : '0' },
{ 'name' : 'tolerance_random' , 'value' : '1e-8' }
]
# ----------------------------------------------------------------------
# subgroup_table
subgroup_table = [ { 'subgroup' : 'world' , 'group' : 'world' } ]
# ----------------------------------------------------------------------
# create database
dismod_at. create_database (
file_name,
age_list,
time_list,
integrand_table,
node_table,
subgroup_table,
weight_table,
covariate_table,
avgint_table,
data_table,
prior_table,
smooth_table,
nslist_table,
rate_table,
mulcov_table,
option_table
)
# ----------------------------------------------------------------------
return
# ===========================================================================
# file_name
file_name = 'example.db'
#
# create example.db
example_db ( file_name)
#
# program
program = '../../devel/dismod_at'
#
# init database
system_command ([ program, file_name, 'init' ])
#
# Note that the prior mean is equal to the true value for iota
system_command ([ program, file_name, 'set' , 'truth_var' , 'prior_mean' ])
#
# create data_sim table
system_command ([ program, file_name, 'simulate' , str ( number_simulate) ])
# -----------------------------------------------------------------------
# check simulated data
from math import log
new = False
connection = dismod_at. create_connection ( file_name, new)
data_sim_table = dismod_at. get_table_dict ( connection, 'data_sim' )
data_table = dismod_at. get_table_dict ( connection, 'data' )
data_subset_table = dismod_at. get_table_dict ( connection, 'data_subset' )
residual_list = list ()
for row in data_sim_table :
data_sim_value = row[ 'data_sim_value' ]
data_subset_id = row[ 'data_subset_id' ]
data_id = data_subset_table[ data_subset_id][ 'data_id' ]
meas_value = data_table[ data_id][ 'meas_value' ]
Delta = data_table[ data_id][ 'meas_std' ]
eta = data_table[ data_id][ 'eta' ]
sigma = log ( meas_value + eta + Delta) - log ( meas_value + eta)
delta = sigma + gamma_global
residual = ( log ( data_sim_value + eta) - log ( iota_true + eta) )/ delta
residual_list. append ( residual )
residual_array = numpy. array ( residual_list )
residual_mean = residual_array. mean ()
residual_std = residual_array. std ( ddof= 1 )
# check that the mean of the residuals is within 2.5 standard deviations
assert abs ( residual_mean) <= 2.5 / numpy. sqrt ( number_simulate)
# check that the standard deviation of the residuals is near one
assert abs ( residual_std - 1.0 ) <= 2.5 / numpy. sqrt ( number_simulate)
# -----------------------------------------------------------------------------
print ( 'fit_sim.py: OK' )
Input File: example/user/sim_log.py