Using Data Interval Compression

user_compress.py

@(@\newcommand{\B}[1]{ {\bf #1} } \newcommand{\R}[1]{ {\rm #1} } \newcommand{\W}[1]{ \; #1 \; }@)@This is dismod_at-20221105 documentation: Here is a link to its current documentation . Using Data Interval Compression
Purpose
Integrands
Nodes
True Iota
Model
Data
Source Code

Purpose
This example shows how to use the compression intervals option.

Integrands
For this example there are one integrand, Sincidence.

Nodes
There is only one node called world for this example. There are no random effects because there are no child nodes.

True Iota
For this example, the true model incidence rate iota is def iota_true(age) : return 0.01 * ( 1 + ((age - 50) / 50)**2 )

Model
There is only one rate iota and it piecewise linear in age with knots at the age points 0, 50, and 100.

Data
There is one data point measuring Sincidence with the true value of iota at times 0.0, 50, 100. The corresponding age intervals are [0,0], [0,100], [100,100]. The age interval for the second measurement should be [50,50] and using interval compression with make it so. This is a cooked up example where interval compression makes the solution more accurate. Under normal circumstances, the answer less accurate but faster to compute.

Source Code


# ------------------------------------------------------------------------
import sys
import os
import csv
import copy
import math
test_program = 'example/user/compress.py'
if sys.argv[0] != test_program  or len(sys.argv) != 1 :
   usage  = 'python3 ' + test_program + '\n'
   usage += 'where python3 is the python 3 program on your system\n'
   usage += 'and working directory is the dismod_at distribution directory\n'
   sys.exit(usage)
print(test_program)
#
# import dismod_at
local_dir = os.getcwd() + '/python'
if( os.path.isdir( local_dir + '/dismod_at' ) ) :
   sys.path.insert(0, local_dir)
import dismod_at
#
# change into the build/example/user directory
if not os.path.exists('build/example/user') :
   os.makedirs('build/example/user')
os.chdir('build/example/user')
# ------------------------------------------------------------------------
# Note that the a, t values are not used for this example
def example_db (file_name) :
   # note that the a, t values are not used for this case
   def fun_iota(a, t) :
      return ('prior_iota', None, None)
   # ----------------------------------------------------------------------
   # age table:
   age_list    = [ 0.0, 50.0, 100.0 ]
   #
   # time table:
   time_list   = [ 1990.0, 2000.0, 2010.0, 2020.0 ]
   #
   # integrand table:
   integrand_table = [
       { 'name':'Sincidence' }
   ]
   #
   # node table:
   node_table = [ { 'name':'world', 'parent':'' } ]
   #
   # weight table:
   weight_table = list()
   #
   # covariate table:
   covariate_table = list()
   #
   # mulcov table:
   mulcov_table = list()
   #
   # avgint table: empty
   avgint_table = list()
   #
   # nslist_table:
   nslist_table = dict()
   # ----------------------------------------------------------------------
   # data table:
   data_table = list()
   #
   # values that are the same for all data points
   row = {
      'integrand':   'Sincidence',
      'hold_out':    False,
      'density':     'gaussian',
      'meas_std':    iota_true(0.0) / 10.,
      'weight':      '',
      'time_lower':   2000.,
      'time_upper':   2000.,
      'node':         'world',
      'subgroup':     'world',
   }
   # Sincidence at age 0.0
   row['meas_value'] = iota_true(0.0)
   row['age_lower']  = 0.0
   row['age_upper']  = 0.0
   data_table.append( copy.copy(row) )
   #
   # average Sincidence between age 0.0 and 100
   row['meas_value'] = iota_true(50.0)
   row['age_lower']  = 0.0
   row['age_upper']  = 100.0
   data_table.append( copy.copy(row) )
   #
   # Sincidence between at age 100
   row['meas_value'] = iota_true(100.0)
   row['age_lower']  = 100.0
   row['age_upper']  = 100.0
   data_table.append( copy.copy(row) )
   #
   # ----------------------------------------------------------------------
   # prior_table
   prior_table = [
      { # prior_iota
         'name':     'prior_iota',
         'density':  'uniform',
         'lower':    iota_true(0) / 10.0,
         'upper':    iota_true(0) * 10.0,
         'mean':     iota_true(0) * 2.0,
      }
   ]
   # ----------------------------------------------------------------------
   # smooth table
   name           = 'smooth_iota'
   fun            = fun_iota
   smooth_table = [
      {  'name':name,
         'age_id':[0, 1, 2], # ages 0, 50, 100
         'time_id':[0],
         'fun':fun
      }
   ]
   # ----------------------------------------------------------------------
   # rate table:
   rate_table = [
      {  'name':          'iota',
         'parent_smooth': 'smooth_iota',
      }
   ]
   # ----------------------------------------------------------------------
   # option_table
   option_table = [
      { 'name':'rate_case',              'value':'iota_pos_rho_zero'   },
      { 'name':'parent_node_name',       'value':'world'               },

      { 'name':'quasi_fixed',            'value':'false'               },
      { 'name':'max_num_iter_fixed',     'value':'50'                  },
      { 'name':'print_level_fixed',      'value':'0'                   },
      { 'name':'tolerance_fixed',        'value':'1e-9'                },

      { 'name':'max_num_iter_random',    'value':'50'                  },
      { 'name':'print_level_random',     'value':'0'                   },
      { 'name':'tolerance_random',       'value':'1e-10'               },
   ]
   # ----------------------------------------------------------------------
   # subgroup_table
   subgroup_table = [ { 'subgroup':'world', 'group':'world' } ]
   # ----------------------------------------------------------------------
   # create database
   dismod_at.create_database(
      file_name,
      age_list,
      time_list,
      integrand_table,
      node_table,
      subgroup_table,
      weight_table,
      covariate_table,
      avgint_table,
      data_table,
      prior_table,
      smooth_table,
      nslist_table,
      rate_table,
      mulcov_table,
      option_table
   )
   # ----------------------------------------------------------------------
   return
# ===========================================================================
# Create database
file_name = 'example.db'
example_db(file_name)
#
program = '../../devel/dismod_at'
dismod_at.system_command_prc([ program, file_name, 'init' ])
dismod_at.system_command_prc([ program, file_name, 'fit', 'fixed' ])
# -----------------------------------------------------------------------
# read database
new                   = False
connection            = dismod_at.create_connection(file_name, new)
age_table             = dismod_at.get_table_dict(connection, 'age')
var_table             = dismod_at.get_table_dict(connection, 'var')
fit_var_table         = dismod_at.get_table_dict(connection, 'fit_var')
connection.close()
#
# There are three values for iota in this model
assert len(var_table) == 3
assert len(fit_var_table) == 3
#
# check that this fit is not accurate
for var_id in range( len(var_table) ) :
   age_id      = var_table[var_id]['age_id']
   if age_id == 1 :
      age         = age_table[age_id]['age']
      true_value  = iota_true(age)
      fit_value   = fit_var_table[var_id]['fit_var_value']
      rel_error   = 1.0 - fit_value/true_value
      assert age == 50.0
      assert abs(rel_error) > 0.5
# -----------------------------------------------------------------------
# Now compress the age intervals for all the data. This only affects the second
# data points because the others have intervals of size zero.
dismod_at.system_command_prc([
   program, file_name, 'set', 'option', 'compress_interval', '100.0 0.0'
])
dismod_at.system_command_prc([ program, file_name, 'fit', 'fixed' ])
#
# read database
new                   = False
connection            = dismod_at.create_connection(file_name, new)
fit_var_table         = dismod_at.get_table_dict(connection, 'fit_var')
connection.close()
#
# check that this fit is accurate
for var_id in range( len(var_table) ) :
   age_id      = var_table[var_id]['age_id']
   age         = age_table[age_id]['age']
   true_value  = iota_true(age)
   fit_value   = fit_var_table[var_id]['fit_var_value']
   rel_error   = 1.0 - fit_value/true_value
   assert abs(rel_error) < 1e-6
# ---------------------------------------------------------------------------
# Now check data.csv for interval compression
os.chdir('../../..')
program   = 'bin/dismodat.py'
file_name = 'build/example/user/' + file_name
dismod_at.system_command_prc([ program, file_name, 'db2csv' ])
data_file      = open('build/example/user/data.csv', 'r')
reader         = csv.DictReader(data_file)
for (data_id, row) in enumerate(reader) :
   # check flag for age compression
   if int(data_id) == 0 :
      assert float( row['age_lo'] ) == 0.0
      assert float( row['age_up'] ) == 0.0
   if int(data_id) == 1 :
      assert float( row['age_lo'] ) == 50.0
      assert float( row['age_up'] ) == 50.0
   if int(data_id) == 2 :
      assert float( row['age_lo'] ) == 100.0
      assert float( row['age_up'] ) == 100.0
# -----------------------------------------------------------------------------
print('compress.py: OK')
# -----------------------------------------------------------------------------

Input File: example/user/compress.py