Prev
Next
Index->
contents
reference
index
search
external
Up->
dismod_at
user_example
user_sample_sim.py
user_sample_sim.py
@(@\newcommand{\B}[1]{ {\bf #1} }
\newcommand{\R}[1]{ {\rm #1} }
\newcommand{\W}[1]{ \; #1 \; }@)@This is dismod_at-20221105 documentation: Here is a link to its
current documentation
.
Sample Posterior Using Simulated Data
Purpose
Node Table
Model Variables
iota
u
x
Model Parameters
y
s
delta
Values
Likelihood
number_sample
number_metropolis
Truth Table
random_seed
Source Code
Purpose
The command
dismod_at database sample simulate number_sample
samples form the posterior distribution of the model_variables
using simulated data.
Node Table
For this example, the node_table
is
north_america
/ \
mexico canada
Model Variables
All of the rates are constant w.r.t. age an time in this example.
iota
We use @(@
\iota_n
@)@ to denote the incidence rate for
the parent region north_america which is a
fixed effect
.
u
We use @(@
u_m
@)@ (@(@
u_c
@)@ ) to denote the
child rate effect
for mexico (canada).
x
The model variables for this example are combined into a single vector
@(@
x \in \B{R}^3
@)@ where
@(@
x_0 = \iota_n
@)@ ,
@(@
x_1 = u_m
@)@ , and
@(@
x_2 = u_c
@)@ .
Model Parameters
y
We use @(@
y_n, y_m, y_c
@)@ to denote the measured
Sincidence
for north_america, mexico, and canada respectively.
s
We use @(@
s_n, s_m, s_c
@)@ to denote the
standard deviation of the measured noise
for north_america, mexico, and canada respectively.
delta
We use @(@
\delta
@)@ to denote the standard deviation of
the child rate effects.
(The mean for the child rate effects is zero.)
Values
import math
y_n = 1e-2
y_m = 2.0 * y_n
y_c = y_n / 2.0
s_n = y_n / 10.0
s_m = y_m / 10.0
s_c = y_c / 10.0
delta = math. log ( 2.0 )
Likelihood
We define @(@
h(y, \mu , \sigma)
@)@
to be the log-density for a @(@
\B{N}( \mu, \sigma^2 )
@)@ distribution;
i.e.,
@[@
h(y, \mu, \sigma) =
- \frac{ ( y - \mu )^2 }{ \sigma^2 }
- \log \left( \sigma \sqrt{ 2 \pi } \right)
@]@
The total log-likelihood for this example is
@[@
h( y_n, \iota_n, s_n ] +
h[ y_m, \exp( u_m ) \iota_n, s_m ] +
h[ y_c, \exp( u_c ) \iota_n, s_c ] +
h( u_m, 0, \delta ) + h( u_c , 0 , \delta )
@]@
number_sample
This is the number of samples in the
number_sample
specified by the sample command.
number_sample = 30
number_metropolis
This is the number of mcmc samples generated by the
metropolis
mcmc method.
These samples are used to check the simulate method.
number_metropolis = 500 * number_sample
Truth Table
For the data simulation, we use the following model variable values:
iota_n_true = y_n
u_m_true = math. log ( y_m / y_n )
u_c_true = math. log ( y_c / y_n )
It follows that
y_m = exp( u_m_true ) * iota_n_true
and
y_c = exp( u_c_true ) * iota_n_true
.
random_seed
Set one random seed for use by both the python code and dismod_at.
In addition, if an error occurs, this random seed will be reported; see
random_seed
in the source code below.
import time
random_seed = int ( time. time () )
Source Code
import numpy
import sys
import os
import copy
test_program = 'example/user/sample_sim.py'
if sys. argv[ 0 ] != test_program or len ( sys. argv) != 1 :
usage = 'python3 ' + test_program + '\n'
usage += 'where python3 is the python 3 program on your system\n'
usage += 'and working directory is the dismod_at distribution directory\n'
sys. exit ( usage)
print ( test_program)
#
# import dismod_at
local_dir = os. getcwd () + '/python'
if ( os. path. isdir ( local_dir + '/dismod_at' ) ) :
sys. path. insert ( 0 , local_dir)
import dismod_at
#
# change into the build/example/user directory
if not os. path. exists ( 'build/example/user' ) :
os. makedirs ( 'build/example/user' )
os. chdir ( 'build/example/user' )
# ---------------------------------------------------------------------------
#
# no need to include sqrt{2 \pi} term (it does not depend on model variables)
def h ( y, mu, sigma ) :
if sigma <= 0.0 :
return - float ( "inf" )
res = ( y - mu ) / sigma
return - res * res - math. log ( sigma )
#
def log_f ( x) :
iota_n = x[ 0 ]
u_m = x[ 1 ]
u_c = x[ 2 ]
#
ret = h ( y_n, iota_n, s_n)
ret += h ( y_m, math. exp ( u_m) * iota_n, s_m )
ret += h ( y_c, math. exp ( u_c) * iota_n, s_c )
ret += h ( u_m, 0.0 , delta) + h ( u_c, 0.0 , delta)
return ret
# ---------------------------------------------------------------------------
# Note that the a, t values are not used for this example
def example_db ( file_name) :
def fun_iota_child ( a, t) :
return ( 'prior_iota_child' , None, None)
def fun_iota_parent ( a, t) :
return ( 'prior_iota_parent' , None, None)
# ----------------------------------------------------------------------
# age table
age_list = [ 0.0 , 100.0 ]
#
# time table
time_list = [ 1995.0 , 2015.0 ]
#
# integrand table
integrand_table = [ { 'name' : 'Sincidence' } ]
#
# node table: north_america -> (mexico, canada)
node_table = [
{ 'name' : 'north_america' , 'parent' : '' },
{ 'name' : 'mexico' , 'parent' : 'north_america' },
{ 'name' : 'canada' , 'parent' : 'north_america' }
]
#
# weight table:
weight_table = list ()
#
# covariate table: no covriates
covariate_table = list ()
#
# mulcov table
mulcov_table = list ()
#
# avgint table:
avgint_table = list ()
#
# nslist_table:
nslist_table = dict ()
# ----------------------------------------------------------------------
# data table:
data_table = list ()
row = {
'node' : 'north_america' ,
'subgroup' : 'world' ,
'density' : 'gaussian' ,
'weight' : '' ,
'hold_out' : False,
'time_lower' : 2000.0 ,
'time_upper' : 2000.0 ,
'age_lower' : 50.0 ,
'age_upper' : 50.0 ,
'integrand' : 'Sincidence' ,
'meas_value' : y_n,
'meas_std' : s_n,
}
data_table. append ( copy. copy ( row) )
#
row[ 'node' ] = 'mexico' ;
row[ 'meas_value' ] = y_m
row[ 'meas_std' ] = s_m
data_table. append ( copy. copy ( row) )
#
row[ 'node' ] = 'canada' ;
row[ 'meas_value' ] = y_c
row[ 'meas_std' ] = s_c
data_table. append ( copy. copy ( row) )
#
# ----------------------------------------------------------------------
# prior_table
prior_table = [
{ # prior_iota_parent
'name' : 'prior_iota_parent' ,
'density' : 'uniform' ,
'lower' : y_n / 100.0 ,
'upper' : y_n * 100.0 ,
'mean' : y_n
},{ # prior_iota_child
'name' : 'prior_iota_child' ,
'density' : 'gaussian' ,
'mean' : 0.0 ,
'std' : delta,
}
]
# ----------------------------------------------------------------------
# smooth table
smooth_table = [
{ # smooth_iota_parent
'name' : 'smooth_iota_parent' ,
'age_id' : [ 0 ],
'time_id' : [ 0 ],
'fun' : fun_iota_parent
}, { # smooth_iota_child
'name' : 'smooth_iota_child' ,
'age_id' : [ 0 ],
'time_id' : [ 0 ],
'fun' : fun_iota_child
}
]
# ----------------------------------------------------------------------
# rate table
rate_table = [
{
'name' : 'iota' ,
'parent_smooth' : 'smooth_iota_parent' ,
'child_smooth' : 'smooth_iota_child' ,
}
]
# ----------------------------------------------------------------------
# option_table
option_table = [
{ 'name' : 'parent_node_name' , 'value' : 'north_america' },
{ 'name' : 'ode_step_size' , 'value' : '10.0' },
{ 'name' : 'random_seed' , 'value' : str ( random_seed) },
{ 'name' : 'rate_case' , 'value' : 'iota_pos_rho_zero' },
{ 'name' : 'quasi_fixed' , 'value' : 'true' },
{ 'name' : 'derivative_test_fixed' , 'value' : 'first-order' },
{ 'name' : 'max_num_iter_fixed' , 'value' : '100' },
{ 'name' : 'print_level_fixed' , 'value' : '0' },
{ 'name' : 'tolerance_fixed' , 'value' : '1e-12' },
{ 'name' : 'max_num_iter_random' , 'value' : '100' },
{ 'name' : 'print_level_random' , 'value' : '0' },
{ 'name' : 'tolerance_random' , 'value' : '1e-12' },
{ 'name' : 'zero_sum_child_rate' , 'value' : 'iota' },
]
# ----------------------------------------------------------------------
# subgroup_table
subgroup_table = [ { 'subgroup' : 'world' , 'group' : 'world' } ]
# ----------------------------------------------------------------------
# create database
dismod_at. create_database (
file_name,
age_list,
time_list,
integrand_table,
node_table,
subgroup_table,
weight_table,
covariate_table,
avgint_table,
data_table,
prior_table,
smooth_table,
nslist_table,
rate_table,
mulcov_table,
option_table
)
# ===========================================================================
file_name = 'example.db'
example_db ( file_name)
#
program = '../../devel/dismod_at'
na_string = str ( number_sample)
dismod_at. system_command_prc ([ program, file_name, 'init' ])
#
# -----------------------------------------------------------------------
# create truth_table
# -----------------------------------------------------------------------
# truth table:
new = False
connection = dismod_at. create_connection ( file_name, new)
var_table = dismod_at. get_table_dict ( connection, 'var' )
rate_table = dismod_at. get_table_dict ( connection, 'rate' )
node_table = dismod_at. get_table_dict ( connection, 'node' )
assert len ( var_table) == 3
#
tbl_name = 'truth_var'
col_name = [ 'truth_var_value' ]
col_type = [ 'real' ]
row_list = list ()
node_name2var_id = dict ()
for var_id in range ( len ( var_table) ) :
var_info = var_table[ var_id]
rate_id = var_info[ 'rate_id' ]
node_id = var_info[ 'node_id' ]
rate_name = rate_table[ rate_id][ 'rate_name' ]
node_name = node_table[ node_id][ 'node_name' ]
assert ( rate_name == 'iota' )
#
if node_name == 'north_america' :
truth_var_value = iota_n_true
elif node_name == 'mexico' :
truth_var_value = u_m_true
else :
assert node_name == 'canada'
truth_var_value = u_c_true
#
row_list. append ( [ truth_var_value ] )
node_name2var_id[ node_name] = var_id
dismod_at. create_table ( connection, tbl_name, col_name, col_type, row_list)
connection. close ()
# -----------------------------------------------------------------------
# sample simulate results
#
ns_string = str ( number_sample)
dismod_at. system_command_prc (
[ program, file_name, 'simulate' , ns_string ]
)
dismod_at. system_command_prc (
[ program, file_name, 'set' , 'start_var' , 'truth_var' ]
)
dismod_at. system_command_prc (
[ program, file_name, 'set' , 'scale_var' , 'truth_var' ]
)
dismod_at. system_command_prc (
[ program, file_name, 'sample' , 'simulate' , 'both' , ns_string ]
)
#
new = False
connection = dismod_at. create_connection ( file_name, new)
sample_table = dismod_at. get_table_dict ( connection, 'sample' )
#
# convert samples to a numpy array
sample_array = numpy. zeros ( ( number_sample, 3 ), dtype = float )
for row in sample_table :
var_id = row[ 'var_id' ]
sample_index = row[ 'sample_index' ]
sample_array[ sample_index, var_id ] = row[ 'var_value' ]
#
# compute statistics
sim_avg = numpy. average ( sample_array, axis= 0 );
sim_std = numpy. std ( sample_array, axis= 0 , ddof= 1 );
# -----------------------------------------------------------------------
# MCMC results
print ( 'mcmc' )
numpy. random. seed ( seed = random_seed )
x_start = numpy. array ( [ y_n, 0.0 , 0.0 ] )
scale = numpy. array ( [ y_n, delta, delta] ) * 0.2
( a, c) = dismod_at. metropolis ( log_f, number_metropolis, x_start, scale)
burn_in = int ( 0.1 * number_metropolis )
c = c[ burn_in :, :]
mcmc_avg = numpy. average ( c, axis= 0 )
mcmc_std = numpy. std ( c, axis= 0 , ddof= 1 )
mcmc_order = [ 'north_america' , 'mexico' , 'canada' ]
# -----------------------------------------------------------------------
# now compare values
for i in range ( len ( var_table) ) :
# node that this model variable corresponds to
node_name = mcmc_order[ i]
var_id = node_name2var_id[ node_name]
sim = sim_avg[ var_id]
mcmc = mcmc_avg[ i]
err = sim / mcmc - 1.0
if abs ( err) > 0.05 :
print ( node_name, '_avg (sim, mcmc, err) = ' , sim, mcmc, err)
print ( 'random_seed = ' , random_seed)
assert ( False)
sim = sim_std[ var_id]
mcmc = mcmc_std[ i]
err = sim / mcmc - 1.0
if abs ( err) > 0.5 :
print ( node_name, '_std (sim, mcmc, err) = ' , sim, mcmc, err)
print ( 'random_seed = ' , random_seed)
assert ( False)
# ----------------------------------------------------------------------------
print ( 'sample_sim.py: OK' )
Input File: example/user/sample_sim.py