Prev
Next
Index->
contents
reference
index
search
external
Up->
dismod_at
user_example
user_data_sim.py
user_data_sim.py
@(@\newcommand{\B}[1]{ {\bf #1} }
\newcommand{\R}[1]{ {\rm #1} }
\newcommand{\W}[1]{ \; #1 \; }@)@This is dismod_at-20221105 documentation: Here is a link to its
current documentation
.
Explanation of Simulated Data Table, data_sim
See Also
Purpose
Random Effects
Priors
Iota
Other Rates
Covariate Multiplier
Data
Data Subset
meas_noise_effect
Notation Before Simulation
y
Capital Delta
sigma
E
delta
Simulation Notation
z
Source Code
See Also
user_sim_log.py
Purpose
This example explains the data_sim_table
by showing that the
Adjusted standard deviation
for the simulated data is the same as for the original data.
Random Effects
There are no random effects in this example.
Priors
The priors do not matter for this example except for the fact that
the truth_var_table
values for the model_variables
must satisfy the lower and upper limits in the corresponding priors.
Iota
The value
iota_true
is the simulated true rate for iota.
There is only one grid point (one model_variable
)
corresponding to
iota
, hence it is constant in age and time.
Other Rates
For this example the other rates are all zero.
This is specified by setting the
parent_smooth_id
and
child_smooth_id
to null
for the other rates.
Covariate Multiplier
There is one covariate multiplier on the covariate column one
and the rate iota
.
This is a measurement noise covariate multiplier
gamma
.
The true value for this multiplier, used to simulate data, is returned by
gamma_true( meas_noise_effect )
.
There is only one grid point in the covariate multiplier,
hence it is constant in age and time. It follows that
average noise effect
@(@
E_i ( \theta )
@)@ is constant and equal to
gamma_true
.
Data
There are
n_data
measurements of Sincidence and each has a standard
deviation
meas_std
(before adding the covariate effect).
The meas_value
do not affect (do affect)
the values in data_sim_table
when the
density
is
linear
(log scaled
).
Data Subset
Data is only simulated for
data_id
values that appear in the data_subset table.
For this case, this includes all the
data_id
values in the data table.
meas_noise_effect
see meas_noise_effect
.
Notation Before Simulation
The following values do not depend on the simulated data:
y
This is the measured value; see
y
.
Capital Delta
This is the minimum cv standard deviation corresponding to @(@
y
@)@ ; see
Delta
.
sigma
This is the transformed standard deviation corresponding to @(@
y
@)@ ; see
sigma
.
E
This is the average noise effect corresponding to @(@
y
@)@ ; see
E
.
delta
This is the adjusted standard deviation corresponding to @(@
y
@)@ ; see
delta
.
Simulation Notation
z
This is the simulated measurement value, before censoring,
in the data_sim table; see z
.
Source Code
# You can changed the values below and rerun this program
iota_true = 0.01
meas_std = 0.001
n_data = 2000
# You can changed the values above and rerun this program
# ----------------------------------------------------------------------------
import math
import sys
import os
import copy
import numpy
test_program = 'example/user/data_sim.py'
if sys. argv[ 0 ] != test_program or len ( sys. argv) != 1 :
usage = 'python3 ' + test_program + '\n'
usage += 'where python3 is the python 3 program on your system\n'
usage += 'and working directory is the dismod_at distribution directory\n'
sys. exit ( usage)
print ( test_program)
#
# import dismod_at
local_dir = os. getcwd () + '/python'
if ( os. path. isdir ( local_dir + '/dismod_at' ) ) :
sys. path. insert ( 0 , local_dir)
import dismod_at
#
# change into the build/example/user directory
if not os. path. exists ( 'build/example/user' ) :
os. makedirs ( 'build/example/user' )
os. chdir ( 'build/example/user' )
# ---------------------------------------------------------------------------
# log_density
def log_density ( density) :
assert not density. startswith ( 'cen_' )
return density. startswith ( 'log_' )
# ---------------------------------------------------------------------------
# gamma_true
def gamma_true ( meas_noise_effect) :
if meas_noise_effect. startswith ( 'add_std_' ) :
result = meas_std
elif meas_noise_effect. startswith ( 'add_var_' ) :
result = meas_std * meas_std
else :
assert False
return result
# ---------------------------------------------------------------------------
# delta_effect
def delta_effect ( meas_noise_effect, sigma, E) :
# add_std
if meas_noise_effect == 'add_std_scale_all' :
delta = sigma * ( 1.0 + E)
elif meas_noise_effect == 'add_std_scale_none' :
delta = sigma + E
elif meas_noise_effect == 'add_std_scale_log' :
if log_density ( density) :
delta = sigma * ( 1.0 + E)
else :
delta = sigma + E
# add var
elif meas_noise_effect == 'add_var_scale_all' :
delta = sigma * math. sqrt ( 1.0 + E)
elif meas_noise_effect == 'add_var_scale_none' :
delta = math. sqrt ( sigma * sigma + E )
else :
assert meas_noise_effect == 'add_var_scale_log'
if log_density ( density) :
delta = sigma * math. sqrt ( 1.0 + E)
else :
delta = math. sqrt ( sigma * sigma + E )
return delta
# ------------------------------------------------------------------------
# Note that the a, t values are not used for this example
def example_db ( file_name) :
# note that the a, t values are not used for this case
def fun_iota ( a, t) :
return ( 'prior_iota' , None, None)
def fun_gamma ( a, t) :
return ( 'prior_gamma' , None, None)
# ----------------------------------------------------------------------
# age table:
age_list = [ 0.0 , 100.0 ]
#
# time table:
time_list = [ 1990.0 , 2010.0 ]
#
# integrand table:
integrand_table = [
{ 'name' : 'Sincidence' }
]
#
# node table:
node_table = [ { 'name' : 'world' , 'parent' : '' } ]
#
# weight table:
weight_table = list ()
#
# covariate table:
covariate_table = [
{ 'name' : 'one' , 'reference' : 0.0 }
]
#
# mulcov table:
mulcov_table = [
{
'covariate' : 'one' ,
'type' : 'meas_noise' ,
'effected' : 'Sincidence' ,
'group' : 'world' ,
'smooth' : 'smooth_gamma'
}
]
#
# avgint table: empty
avgint_table = list ()
#
# nslist_table:
nslist_table = dict ()
# ----------------------------------------------------------------------
# data table:
data_table = list ()
# values that are the same for all data rows
row = {
'weight' : '' ,
'hold_out' : False,
'node' : 'world' ,
'subgroup' : 'world' ,
'one' : 1.0 ,
'age_lower' : 50.0 ,
'age_upper' : 50.0 ,
'time_lower' : 2000 .,
'time_upper' : 2000 .,
'integrand' : 'Sincidence' ,
'meas_std' : meas_std,
'eta' : meas_std,
'nu' : 10
}
# The censored densities are not included becasue one cannot recover
# sigma when censoring occurs.
density_list = [
'gaussian' , 'log_gaussian' ,
'laplace' , 'log_laplace' ,
'students' , 'log_students' ,
]
# values that change between rows:
for data_id in range ( n_data ) :
if data_id % 2 == 0 :
row[ 'meas_value' ] = 0.9 * iota_true
else :
row[ 'meas_value' ] = 1.1 * iota_true
density = density_list[ data_id % len ( density_list) ]
row[ 'density' ] = density
data_table. append ( copy. copy ( row) )
#
# ----------------------------------------------------------------------
# prior_table
prior_table = [
{ # prior_iota
'name' : 'prior_iota' ,
'density' : 'uniform' ,
'lower' : iota_true / 100.0 ,
'upper' : 1.0 ,
'mean' : iota_true / 10.0
},{ # prior_gamma
'name' : 'prior_gamma' ,
'density' : 'uniform' ,
'lower' : 0.0 ,
'upper' : 10.0 ,
'mean' : 0.01
}
]
# ----------------------------------------------------------------------
# smooth table
name = 'smooth_iota'
fun = fun_iota
age_id = 0
time_id = 0
smooth_table = [
{ 'name' : name, 'age_id' :[ age_id], 'time_id' :[ time_id], 'fun' : fun }
]
name = 'smooth_iota'
#
name = 'smooth_gamma'
fun = fun_gamma
smooth_table. append (
{ 'name' : name, 'age_id' :[ age_id], 'time_id' :[ time_id], 'fun' : fun }
)
# ----------------------------------------------------------------------
# rate table:
rate_table = [
{ 'name' : 'iota' ,
'parent_smooth' : 'smooth_iota' ,
'child_smooth' : None
}
]
# ----------------------------------------------------------------------
# option_table
option_table = [
{ 'name' : 'rate_case' , 'value' : 'iota_pos_rho_zero' },
{ 'name' : 'parent_node_name' , 'value' : 'world' },
{ 'name' : 'random_seed' , 'value' : '0' },
]
# ----------------------------------------------------------------------
# subgroup_table
subgroup_table = [ { 'subgroup' : 'world' , 'group' : 'world' } ]
# ----------------------------------------------------------------------
# create database
dismod_at. create_database (
file_name,
age_list,
time_list,
integrand_table,
node_table,
subgroup_table,
weight_table,
covariate_table,
avgint_table,
data_table,
prior_table,
smooth_table,
nslist_table,
rate_table,
mulcov_table,
option_table
)
# ----------------------------------------------------------------------
return
# ===========================================================================
# Run the init command to create the var table
file_name = 'example.db'
example_db ( file_name)
program = '../../devel/dismod_at'
#
dismod_at. system_command_prc ([ program, file_name, 'init' ])
# -----------------------------------------------------------------------
# read database
new = False
connection = dismod_at. create_connection ( file_name, new)
var_table = dismod_at. get_table_dict ( connection, 'var' )
rate_table = dismod_at. get_table_dict ( connection, 'rate' )
integrand_table = dismod_at. get_table_dict ( connection, 'integrand' )
covariate_table = dismod_at. get_table_dict ( connection, 'covariate' )
node_table = dismod_at. get_table_dict ( connection, 'node' )
# -----------------------------------------------------------------------
# truth table:
# -----------------------------------------------------------------------
meas_noise_effect_list = [
'add_std_scale_all' , 'add_std_scale_none' , 'add_std_scale_log' ,
'add_var_scale_all' , 'add_var_scale_none' , 'add_var_scale_log' ,
]
for meas_noise_effect in meas_noise_effect_list :
dismod_at. system_command_prc ([ program, file_name,
'set' , 'option' , 'meas_noise_effect' , meas_noise_effect
])
# ------------------------------------------------------------------------
# truth_var table
tbl_name = 'truth_var'
col_name = [ 'truth_var_value' ]
col_type = [ 'real' ]
row_list = list ()
var_id2true = list ()
for var_id in range ( len ( var_table) ) :
var_info = var_table[ var_id]
truth_var_value = None
var_type = var_info[ 'var_type' ]
if var_type == 'mulcov_meas_noise' :
integrand_id = var_info[ 'integrand_id' ]
integrand_name = integrand_table[ integrand_id][ 'integrand_name' ]
assert integrand_name == 'Sincidence'
#
covariate_id = var_info[ 'covariate_id' ]
covariate_name = covariate_table[ covariate_id][ 'covariate_name' ]
assert ( covariate_name == 'one' )
#
truth_var_value = gamma_true ( meas_noise_effect)
else :
assert ( var_type == 'rate' )
rate_id = var_info[ 'rate_id' ]
rate_name = rate_table[ rate_id][ 'rate_name' ]
node_id = var_info[ 'node_id' ]
node_name = node_table[ node_id][ 'node_name' ]
assert node_name == 'world'
#
truth_var_value = iota_true
#
var_id2true. append ( truth_var_value )
row_list. append ( [ truth_var_value ] )
dismod_at. sql_command ( connection, 'DROP TABLE IF EXISTS truth_var' )
dismod_at. create_table ( connection, tbl_name, col_name, col_type, row_list)
# -------------------------------------------------------------------------
# create and check the data_sim table
dismod_at. system_command_prc ([ program, file_name, 'simulate' , '1' ])
#
# check results in data_sim table
new = False
connection = dismod_at. create_connection ( file_name, new)
density_table = dismod_at. get_table_dict ( connection, 'density' )
data_table = dismod_at. get_table_dict ( connection, 'data' )
data_subset_table = dismod_at. get_table_dict ( connection, 'data_subset' )
data_sim_table = dismod_at. get_table_dict ( connection, 'data_sim' )
#
# check that all the data_id appear in the data_subset table
for data_subset_id in range ( len ( data_subset_table)) :
data_id = data_subset_table[ data_subset_id][ 'data_id' ]
assert data_id == data_subset_id
#
# check the values in the data_sim table
eps99 = 99.0 * sys. float_info. epsilon
residual_list = list ()
for data_sim_id in range ( len ( data_sim_table) ) :
#
# data_sim table valeus
row = data_sim_table[ data_sim_id]
simulate_index = row[ 'simulate_index' ]
data_subset_id = row[ 'data_subset_id' ]
data_sim_value = row[ 'data_sim_value' ]
#
# only one set of data is simulated
assert simulate_index == 0
assert data_subset_id == data_sim_id
#
# data table values
data_id = data_subset_table[ data_subset_id][ 'data_id' ]
meas_value = data_table[ data_id][ 'meas_value' ]
Delta = data_table[ data_id][ 'meas_std' ]
eta = data_table[ data_id][ 'eta' ]
density_id = data_table[ data_id][ 'density_id' ]
density = density_table[ density_id][ 'density_name' ]
#
# Values that do not depend on simulated data
y = meas_value
E = gamma_true ( meas_noise_effect)
if log_density ( density) :
sigma = math. log ( y + eta + Delta) - math. log ( y + eta)
else :
sigma = Delta
delta = delta_effect ( meas_noise_effect, sigma, E)
#
# residual
z = data_sim_value # simulated data withoout censoring
mu = iota_true # mean of the simulated data
if log_density ( density) :
residual = ( math. log ( z + eta) - math. log ( mu + eta)) / delta
else :
residual = ( z - mu) / delta
residual_list. append ( residual)
residual_array = numpy. array ( residual_list )
residual_mean = residual_array. mean ()
residual_std = residual_array. std ( ddof= 1 )
#
# check that the mean of the residuals is within 2.5 standard deviations
assert abs ( residual_mean) <= 2.5 / numpy. sqrt ( n_data)
# check that the standard deviation of the residuals is near one
assert abs ( residual_std - 1.0 ) <= 2.5 / numpy. sqrt ( n_data)
#
connection. close ()
# -----------------------------------------------------------------------------
print ( 'data_sim.py: OK' )
# -----------------------------------------------------------------------------
Input File: example/user/data_sim.py