Using hold_out in Data, Subset Data, and Option Tables

user_hold_out_1.py

@(@\newcommand{\B}[1]{ {\bf #1} } \newcommand{\R}[1]{ {\rm #1} } \newcommand{\W}[1]{ \; #1 \; }@)@This is dismod_at-20221105 documentation: Here is a link to its current documentation . Using hold_out in Data, Subset Data, and Option Tables
Purpose
Integrands
Nodes
Data
prevalence
Sincidence
Model
Fit
Source Code

Purpose
This example shows how to use hold_out in the data_table , option_table , and hold_out_command .

Integrands
For this example there are two integrand, Sincidence and prevalence.

Nodes
The node table is set up so that there are lots of child nodes (with no data. This makes sure that the data being fit gets evenly distributed between the nodes that do have data and reaches max_fit .

Data

prevalence
All of the prevalence data is zero, but it is held out using hold_out_integrand .

Sincidence
There are many incidence data points. The first Sincidence data value is zero and it is held out using the data table hold_out equal to one. The other two Sincidence data are the true value for incidence and have the data table hold_out equal to zero. The hold_out_command is used to randomly select one of these two points to be held out.

Model
There is only one rate iota and it is constant in age and time. The corresponding model for the Sincidence data is iota . The corresponding mode for the prevalence data is 1 - exp( iota * age) .

Fit
Because the zero prevalence data and zero incidence data is held out, the fitting value for iota is very close to the true value for incidence.

Source Code


# values used to simulate data
iota_true   = 0.01
n_node      = 10
# ------------------------------------------------------------------------
import sys
import os
import copy
import math
test_program = 'example/user/hold_out_1.py'
if sys.argv[0] != test_program  or len(sys.argv) != 1 :
   usage  = 'python3 ' + test_program + '\n'
   usage += 'where python3 is the python 3 program on your system\n'
   usage += 'and working directory is the dismod_at distribution directory\n'
   sys.exit(usage)
print(test_program)
#
# import dismod_at
local_dir = os.getcwd() + '/python'
if( os.path.isdir( local_dir + '/dismod_at' ) ) :
   sys.path.insert(0, local_dir)
import dismod_at
#
# change into the build/example/user directory
if not os.path.exists('build/example/user') :
   os.makedirs('build/example/user')
os.chdir('build/example/user')
# ------------------------------------------------------------------------
# Note that the a, t values are not used for this example
def example_db (file_name) :
   # note that the a, t values are not used for this case
   def fun_iota(a, t) :
      return ('prior_iota', None, None)
   # ----------------------------------------------------------------------
   # age table:
   age_list    = [ 0.0, 5.0, 15.0, 35.0, 50.0, 75.0, 90.0, 100.0 ]
   #
   # time table:
   time_list   = [ 1990.0, 2000.0, 2010.0, 2200.0 ]
   #
   # integrand table:
   integrand_table = [
      { 'name':'Sincidence' },
      { 'name':'prevalence' }
   ]
   #
   # node table:
   node_table = [ { 'name':'n0', 'parent':'' } ]
   for node_id in range(1, n_node) :
      node_table.append( { 'name':f'n{node_id}', 'parent':'n0' } )
   #
   # weight table:
   weight_table = list()
   #
   # covariate table:
   covariate_table = list()
   #
   # mulcov table:
   mulcov_table = list()
   #
   # avgint table: empty
   avgint_table = list()
   #
   # nslist_table:
   nslist_table = dict()
   # ----------------------------------------------------------------------
   # data table:
   data_table = list()
   #
   # values that are the same for all data points
   row = {
      'density':     'gaussian',
      'meas_std':    iota_true / 10.,
      'weight':      '',
      'age_lower':    50.0,
      'age_upper':    50.0,
      'time_lower':   2000.,
      'time_upper':   2000.,
      'node':         'world',
      'subgroup':     'world',
   }
   #
   # prevalence data point with value 0.0 and held out
   # using option table hold_out command
   row['node']       = 'n0'
   row['integrand']  = 'prevalence'
   row['meas_value'] = 0.0
   row['hold_out']   = False;
   data_table.append( copy.copy(row) )
   #
   # Sincidence data point with value 0.0 and data_table hold_out 1
   row['node']       = 'n0'
   row['integrand']  = 'Sincidence'
   row['hold_out']   = True
   row['meas_value'] = 0.0
   data_table.append( copy.copy(row) )
   #
   # Sincidence data points with value iota_true and data_table hold_out 0
   # Note that only every other child node has data
   row['integrand']  = 'Sincidence'
   row['hold_out']   = False
   row['meas_value'] = iota_true
   for node_id in range(0, n_node, 2) :
      row['node'] = f'n{node_id}'
      for k in range(4) :
         data_table.append( copy.copy(row) )
   #
   # ----------------------------------------------------------------------
   # prior_table
   prior_table = [
      { # prior_iota
         'name':     'prior_iota',
         'density':  'uniform',
         'lower':    iota_true / 10.0,
         'upper':    iota_true * 10.0,
         'mean':     iota_true * 2.0,
      }
   ]
   # ----------------------------------------------------------------------
   # smooth table
   name           = 'smooth_iota'
   fun            = fun_iota
   smooth_table = [
      {  'name':name,
         'age_id':[0],
         'time_id':[0],
         'fun':fun
      }
   ]
   # ----------------------------------------------------------------------
   # rate table:
   rate_table = [
      {  'name':          'iota',
         'parent_smooth': 'smooth_iota',
      }
   ]
   # ----------------------------------------------------------------------
   # option_table
   option_table = [
      { 'name':'rate_case',              'value':'iota_pos_rho_zero'   },
      { 'name':'parent_node_name',       'value':'n0'                  },
      { 'name':'warn_on_stderr',         'value':'false'               },

      { 'name':'quasi_fixed',            'value':'false'               },
      { 'name':'max_num_iter_fixed',     'value':'50'                  },
      { 'name':'print_level_fixed',      'value':'0'                   },
      { 'name':'tolerance_fixed',        'value':'1e-8'                },

      { 'name':'max_num_iter_random',    'value':'50'                  },
      { 'name':'print_level_random',     'value':'0'                   },
      { 'name':'tolerance_random',       'value':'1e-10'               },
   ]
   # hold out all prevalence data no matter what hold_out in data table
   option_table.append(
      { 'name':'hold_out_integrand',     'value':'prevalence'          }
   )
   # ----------------------------------------------------------------------
   # subgroup_table
   subgroup_table = [ { 'subgroup':'world', 'group':'world' } ]
   # ----------------------------------------------------------------------
   # create database
   dismod_at.create_database(
      file_name,
      age_list,
      time_list,
      integrand_table,
      node_table,
      subgroup_table,
      weight_table,
      covariate_table,
      avgint_table,
      data_table,
      prior_table,
      smooth_table,
      nslist_table,
      rate_table,
      mulcov_table,
      option_table
   )
   # ----------------------------------------------------------------------
   return
# ===========================================================================
# Create database
file_name = 'example.db'
example_db(file_name)
#
#
program = '../../devel/dismod_at'
dismod_at.system_command_prc([ program, file_name, 'init' ])
#
integrand = 'Sincidence'
max_fit   = 2 * n_node
dismod_at.system_command_prc(
   [ program, file_name, 'hold_out', integrand, str(max_fit) ]
)
dismod_at.system_command_prc([ program, file_name, 'fit', 'fixed' ])
# -----------------------------------------------------------------------
# read database
new                   = False
connection            = dismod_at.create_connection(file_name, new)
var_table             = dismod_at.get_table_dict(connection, 'var')
fit_var_table         = dismod_at.get_table_dict(connection, 'fit_var')
data_table            = dismod_at.get_table_dict(connection, 'data')
data_subset_table     = dismod_at.get_table_dict(connection, 'data_subset')
integrand_table       = dismod_at.get_table_dict(connection, 'integrand')
fit_data_subset_table = dismod_at.get_table_dict(connection, 'fit_data_subset')
connection.close()
#
# only one varable in this model, iota
assert len(var_table) == 1
assert len(fit_var_table) == 1
#
# all data points are in the data_sebset table, so data_subset_id is
# the same as data_id (see data subset table documentation).
assert len(data_subset_table) == len(data_table)
assert len(fit_data_subset_table) == len(data_table)
#
# check that max_fit Sincidence values are no held out
count_fit = 0
for subset_row in data_subset_table :
   data_id         = subset_row['data_id']
   data_row        = data_table[data_id]
   integrand_id    = data_row['integrand_id']
   integrand_name  = integrand_table[integrand_id]['integrand_name']
   if integrand_name == 'Sincidence' :
      hold_out    = data_row['hold_out'] == 1 or subset_row['hold_out'] == 1
      if not hold_out :
         count_fit += 1
assert count_fit == max_fit
#
# check fitted value for iota
iota_fit = fit_var_table[0]['fit_var_value']
assert abs( 1.0 - iota_fit / iota_true ) < 1e-6
#
# check residuals for non-zero data
for (subset_id, fit_row) in enumerate(fit_data_subset_table) :
   data_row          = data_table[data_id]
   meas_value        = data_row['meas_value']
   meas_std          = data_row['meas_std']
   weighted_residual = fit_row['weighted_residual']
   integrand_id      = data_row['integrand_id']
   integrand_name    = integrand_table[integrand_id]['integrand_name']
   if integrand_name == 'prevalence' :
      age      = data_row['age_lower']
      model    = 1.0 - math.exp( - iota_fit * age )
   else :
      model    = iota_fit

   check    = ( meas_value - model ) / meas_std
   assert( 1.0 - weighted_residual / check ) < 1e-6
#
# -----------------------------------------------------------------------------
print('hold_out_1.py: OK')
# -----------------------------------------------------------------------------

Input File: example/user/hold_out_1.py