From 500994d70a0688a6a40e74582e6f4588395f26e4 Mon Sep 17 00:00:00 2001
From: Neil Dalchau <ndalchau@microsoft.com>
Date: Fri, 17 Sep 2021 23:13:04 +0000
Subject: [PATCH] Merged PR 9248: Use safe_load for PyYAML

Introduced a model class look-up table that alleviates the need to specify the desired model class in the YAML file. Referencing classes in YAML files requires unsafe loading, which is undesirable from a security stand-point.

Some refactoring along the way, so the code will run without appending "src" to the PYTHONPATH.
---
 README.md                                  |   4 +-
 models/__init__.py                         |  14 +++
 models/base_model.py                       |  31 +++---
 models/debug.py                            |  27 +++--
 models/dr_blackbox.py                      |  21 ++--
 models/dr_constant.py                      |  33 +++---
 models/dr_growthrate.py                    |  15 ++-
 requirements.txt                           |   3 +-
 specs/debug.yaml                           |   2 +-
 specs/debug_precisions.yaml                |   2 +-
 specs/dr_blackbox_xval.yaml                |   2 +-
 specs/dr_blackbox_xval_hierarchical.yaml   |   2 +-
 specs/dr_constant_icml.yaml                |   2 +-
 specs/dr_constant_one.yaml                 |   2 +-
 specs/dr_constant_precisions.yaml          |   2 +-
 specs/dr_constant_xval_time_dependent.yaml |   2 +-
 specs/dr_growthrate_xval.yaml              |   2 +-
 src/call_run_xval.py                       |   2 -
 src/convenience.py                         |  62 ++++++-----
 src/decoders.py                            |   7 +-
 src/distributions.py                       |  13 +--
 src/encoders.py                            |   2 +-
 src/parameters.py                          |   4 +-
 src/plotting.py                            |   8 +-
 src/procdata.py                            |   2 +-
 src/run_xval.py                            |  54 +++++-----
 src/solvers.py                             |   5 +-
 src/utils.py                               |  16 +--
 src/vi.py                                  |   2 +-
 src/xval.py                                |  29 +++--
 tests/test_conditional_encoder.py          | 103 ++++++++----------
 tests/test_ode_solvers.py                  | 119 ++++++++++-----------
 tests/test_run_xval_icml.py                |  15 ++-
 33 files changed, 296 insertions(+), 313 deletions(-)

diff --git a/README.md b/README.md
index b0ab358..2408431 100644
--- a/README.md
+++ b/README.md
@@ -33,14 +33,14 @@ To install the python dependencies, you can use `pip` with the requirements.txt
 
     In Linux:
     ```bash
-    export PYTHONPATH=.:src
+    export PYTHONPATH=.
     export INFERENCE_DATA_DIR=data
     export INFERENCE_RESULTS_DIR=results
     ```
 
     In Windows:
     ```dos
-    set PYTHONPATH=.;src
+    set PYTHONPATH=.
     set INFERENCE_DATA_DIR=data
     set INFERENCE_RESULTS_DIR=results
     ```
diff --git a/models/__init__.py b/models/__init__.py
index e69de29..dcf81c7 100644
--- a/models/__init__.py
+++ b/models/__init__.py
@@ -0,0 +1,14 @@
+# In this module, you must define the set of supported models. 
+# The YAML loader enables you to reference each model by string.
+
+from models import debug, dr_constant, dr_growthrate, dr_blackbox
+
+LOOKUP = {
+    'debug': debug.Debug_Constant,
+    'dr_constant': dr_constant.DR_Constant,
+    'dr_constant_precisions': dr_constant.DR_Constant_Precisions,
+    'dr_growthrate': dr_growthrate.DR_Growth,
+    'dr_blackbox': dr_blackbox.DR_Blackbox,
+    'dr_blackbox_precisions': dr_blackbox.DR_BlackboxPrecisions,
+    'dr_hierarchical_blackbox': dr_blackbox.DR_HierarchicalBlackbox
+}
\ No newline at end of file
diff --git a/models/base_model.py b/models/base_model.py
index d0e70a5..873a74d 100644
--- a/models/base_model.py
+++ b/models/base_model.py
@@ -1,16 +1,13 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under a Microsoft Research License.
 
-import tensorflow as tf
-from tensorflow.keras.layers import Dense
-from tensorflow.keras import Sequential
-from tensorflow.keras.constraints import NonNeg
 import numpy as np
-import pdb
+import tensorflow.compat.v1 as tf # type: ignore
+import yaml
 
-from solvers import modified_euler_integrate, integrate_while
-from utils import default_get_value, variable_summaries
-from procdata import ProcData
+from src.solvers import modified_euler_integrate, integrate_while
+from src.utils import default_get_value, variable_summaries
+from src.procdata import ProcData
 
 def power(x, a):
     return tf.exp(a * tf.math.log(x))
@@ -35,13 +32,9 @@ def expand_constant_precisions(precision_list):
 def expand_decayed_precisions(precision_list): # pylint: disable=unused-argument
     raise NotImplementedError("TODO: expand_decayed_precisions")
 
-class BaseModel(object):
-    # We need an init_with_params method separate from the usual __init__, because the latter is
-    # called automatically with no arguments by pyyaml on creation, and we need a way to feed
-    # params (from elsewhere in the YAML structure) into it. It would really be better construct
-    # it properly after the structure has been loaded.
+class BaseModel:
     # pylint: disable=attribute-defined-outside-init
-    def init_with_params(self, params, procdata : ProcData):
+    def __init__(self, params, procdata : ProcData):
         self.params = params
         self.relevance = procdata.relevance_vectors
         self.default_devices = procdata.default_devices
@@ -150,11 +143,11 @@ def __init__(self, nspecies, n_hidden_precisions, inputs = None, hidden_activati
         self.nspecies = nspecies
         if inputs is None:
             inputs = self.nspecies+1
-        inp = Dense(n_hidden_precisions, activation = hidden_activation, use_bias=True, name = "prec_hidden", input_shape=(inputs,))
-        act_layer = Dense(4, activation = tf.nn.sigmoid, name = "prec_act", bias_constraint = NonNeg())
-        deg_layer = Dense(4, activation = tf.nn.sigmoid, name = "prec_deg", bias_constraint = NonNeg())
-        self.act = Sequential([inp, act_layer])
-        self.deg = Sequential([inp, deg_layer])
+        inp = tf.keras.layers.Dense(n_hidden_precisions, activation = hidden_activation, use_bias=True, name = "prec_hidden", input_shape=(inputs,))
+        act_layer = tf.keras.layers.Dense(4, activation = tf.nn.sigmoid, name = "prec_act", bias_constraint = tf.keras.constraints.NonNeg())
+        deg_layer = tf.keras.layers.Dense(4, activation = tf.nn.sigmoid, name = "prec_deg", bias_constraint = tf.keras.constraints.NonNeg())
+        self.act = tf.keras.Sequential([inp, act_layer])
+        self.deg = tf.keras.Sequential([inp, deg_layer])
 
         for layer in [inp, act_layer, deg_layer]:
             weights, bias = layer.weights
diff --git a/models/debug.py b/models/debug.py
index a61c84a..9f27bff 100644
--- a/models/debug.py
+++ b/models/debug.py
@@ -1,10 +1,9 @@
-import tensorflow as tf
-from tensorflow.compat.v1 import verify_tensor_all_finite
+import tensorflow.compat.v1 as tf # type: ignore
 from models.base_model import BaseModel
 
 class Debug_Constant(BaseModel):
-    def init_with_params(self, params, procdata):
-        super(Debug_Constant, self).init_with_params(params, procdata)
+    def __init__(self, params, procdata):
+        super(Debug_Constant, self).__init__(params, procdata)
         self.species = ['OD', 'RFP', 'YFP', 'CFP']
         self.n_species = len(self.species)
 
@@ -30,26 +29,26 @@ def gen_reaction_equations(self, theta, treatments, dev_1hot, condition_on_devic
         r = tf.clip_by_value(theta.r, 0.1, 2.0)
         
         def reaction_equations(state, t):
-            state = verify_tensor_all_finite(state, "state NOT finite")
+            state = tf.verify_tensor_all_finite(state, "state NOT finite")
             x, rfp, yfp, cfp = tf.unstack(state, axis=2)
-            x = verify_tensor_all_finite(x, "x NOT finite")
-            rfp = verify_tensor_all_finite(rfp, "rfp NOT finite")
-            yfp = verify_tensor_all_finite(yfp, "yfp NOT finite")
-            cfp = verify_tensor_all_finite(cfp, "cfp NOT finite")
+            x = tf.verify_tensor_all_finite(x, "x NOT finite")
+            rfp = tf.verify_tensor_all_finite(rfp, "rfp NOT finite")
+            yfp = tf.verify_tensor_all_finite(yfp, "yfp NOT finite")
+            cfp = tf.verify_tensor_all_finite(cfp, "cfp NOT finite")
 
             gamma = r * (1.0 - x)
-            gamma = verify_tensor_all_finite(gamma, "gamma NOT finite")
+            gamma = tf.verify_tensor_all_finite(gamma, "gamma NOT finite")
             # Right-hand sides
             d_x = x * gamma
             #d_x = verify_tensor_all_finite(d_x, "d_x NOT finite")
             d_rfp = 1.0 - (gamma + 1.0) * rfp
-            d_rfp = verify_tensor_all_finite(d_rfp, "d_rfp NOT finite")
+            d_rfp = tf.verify_tensor_all_finite(d_rfp, "d_rfp NOT finite")
             d_yfp = 1.0 - (gamma + 1.0) * yfp
-            d_yfp = verify_tensor_all_finite(d_yfp, "d_yfp NOT finite")
+            d_yfp = tf.verify_tensor_all_finite(d_yfp, "d_yfp NOT finite")
             d_cfp = 1.0 - (gamma + 1.0) * cfp
-            d_cfp = verify_tensor_all_finite(d_cfp, "d_cfp NOT finite")
+            d_cfp = tf.verify_tensor_all_finite(d_cfp, "d_cfp NOT finite")
 
             X = tf.stack([d_x, d_rfp, d_yfp, d_cfp], axis=2)
-            X = verify_tensor_all_finite(X, "RHS NOT finite")
+            X = tf.verify_tensor_all_finite(X, "RHS NOT finite")
             return X
         return reaction_equations
\ No newline at end of file
diff --git a/models/dr_blackbox.py b/models/dr_blackbox.py
index bceb979..69e430d 100644
--- a/models/dr_blackbox.py
+++ b/models/dr_blackbox.py
@@ -3,15 +3,14 @@
 
 from models.base_model import BaseModel, NeuralPrecisions
 from src.utils import default_get_value, variable_summaries
-import tensorflow as tf
-from tensorflow.compat.v1 import keras
+import tensorflow.compat.v1 as tf   # type: ignore
+from tensorflow import keras
 import numpy as np
-import pdb
 
 class DR_Blackbox( BaseModel ):
     
-    def init_with_params( self, params, procdata ):
-        super(DR_Blackbox, self).init_with_params( params, procdata )
+    def __init__( self, params, procdata ):
+        super(DR_Blackbox, self).__init__( params, procdata )
         self.species = ['OD', 'RFP', 'YFP', 'CFP']
         self.nspecies = 4
         # do the other inits now
@@ -69,8 +68,8 @@ def observe( self, x_sample, theta ):
 
 class DR_BlackboxStudentT( DR_Blackbox ):
     
-    def init_with_params( self, params, procdata ):
-        super(DR_BlackboxStudentT, self).init_with_params( params, procdata )
+    def __init__( self, params, procdata ):
+        super(DR_BlackboxStudentT, self).__init__( params, procdata )
         
         # use a fixed gamma prior over precisions
         self.alpha = params['precision_alpha']
@@ -103,8 +102,8 @@ def log_prob_observations( self, x_predict, x_obs, theta, x_sample ):
         return log_prob
     
 class DR_BlackboxPrecisions( DR_Blackbox ):
-    def init_with_params( self, params, procdata ):
-        super(DR_BlackboxPrecisions, self).init_with_params( params, procdata )
+    def __init__( self, params, procdata ):
+        super(DR_BlackboxPrecisions, self).__init__( params, procdata )
         self.init_prec = params['init_prec']
         self.n_hidden_precisions = params['n_hidden_decoder_precisions']
         self.n_states = 4 + self.n_latent_species + 4
@@ -178,8 +177,8 @@ def reaction_equations( state, t ):
 
 class DR_HierarchicalBlackbox( DR_BlackboxPrecisions ):
     
-    def init_with_params( self, params, procdata ):
-        super(DR_HierarchicalBlackbox, self).init_with_params( params, procdata )
+    def __init__( self, params, procdata ):
+        super(DR_HierarchicalBlackbox, self).__init__( params, procdata )
         # do the other inits now
         self.n_x = params['n_x']
         self.n_y = params['n_y']
diff --git a/models/dr_constant.py b/models/dr_constant.py
index 9c3bc75..8cf4c03 100644
--- a/models/dr_constant.py
+++ b/models/dr_constant.py
@@ -1,17 +1,16 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under a Microsoft Research License.
 
-from models.base_model import BaseModel, log_prob_gaussian, NeuralPrecisions
-from src.utils import default_get_value, variable_summaries
-import tensorflow as tf
-from tensorflow.compat.v1 import keras, verify_tensor_all_finite
 import numpy as np
-import pdb
+import tensorflow.compat.v1 as tf # type: ignore
+
+from models.base_model import BaseModel, NeuralPrecisions
+from src.utils import default_get_value, variable_summaries
 
 class DR_Constant(BaseModel):
 
-    def init_with_params(self, params, procdata):
-        super(DR_Constant, self).init_with_params(params, procdata)
+    def __init__(self, params, procdata):
+        super(DR_Constant, self).__init__(params, procdata)
         # do the other inits now
         self.use_aRFP = default_get_value(params, "use_aRFP", False)
         self.species = ['OD', 'RFP', 'YFP', 'CFP', 'F530', 'F480', 'LuxR', 'LasR']
@@ -68,7 +67,7 @@ def gen_reaction_equations(self, theta, treatments, dev_1hot, condition_on_devic
         # condition on device information by mapping param_cond = f(param, d; \phi) where d is one-hot rep of device
         # currently, f is a one-layer MLP with NO activation function (e.g., offset and scale only)
         if condition_on_device:
-            kinit = keras.initializers.RandomNormal(mean=2.0, stddev=1.5)
+            kinit = tf.keras.initializers.RandomNormal(mean=2.0, stddev=1.5)
             ones = tf.tile([[1.0]],tf.shape(theta.r))
             aR = self.device_conditioner(ones, 'aR', dev_1hot, kernel_initializer=kinit)
             aS = self.device_conditioner(ones, 'aS', dev_1hot, kernel_initializer=kinit)
@@ -99,8 +98,8 @@ def reaction_equations(state, t):
             P81 = (e81 + KGR_81 * boundLuxR + KGS_81 * boundLasR) / (1.0 + KGR_81 * boundLuxR + KGS_81 * boundLasR)
 
             # Check they are finite
-            boundLuxR = verify_tensor_all_finite(boundLuxR, "boundLuxR NOT finite")
-            boundLasR = verify_tensor_all_finite(boundLasR, "boundLasR NOT finite")
+            boundLuxR = tf.verify_tensor_all_finite(boundLuxR, "boundLuxR NOT finite")
+            boundLasR = tf.verify_tensor_all_finite(boundLasR, "boundLasR NOT finite")
 
             # Right-hand sides
             d_x = gamma * x
@@ -122,8 +121,8 @@ def reaction_equations(state, t):
 
 class DR_ConstantStudentT(DR_Constant):
 
-    def init_with_params(self, params):
-        super(DR_ConstantStudentT, self).init_with_params(params)
+    def __init__(self, params, procdata):
+        super(DR_ConstantStudentT, self).__init__(params, procdata)
 
         # use a fixed gamma prior over precisions
         self.alpha = params['precision_alpha']
@@ -156,8 +155,8 @@ def log_prob_observations(self, x_predict, x_obs, theta, x_sample):
 
 class DR_Constant_Precisions(DR_Constant):
 
-    def init_with_params(self, params, procdata):
-        super(DR_Constant_Precisions, self).init_with_params(params, procdata)
+    def __init__(self, params, procdata):
+        super(DR_Constant_Precisions, self)
 
         self.species = ['OD', 'RFP', 'YFP', 'CFP', 'F510', 'F430', 'LuxR', 'LasR']
         self.init_prec = default_get_value(params, 'init_prec', 0.00001)
@@ -223,7 +222,7 @@ def gen_reaction_equations(self, theta, treatments, dev_1hot, condition_on_devic
         # condition on device information by mapping param_cond = f(param, d; \phi) where d is one-hot rep of device
         # currently, f is a one-layer MLP with NO activation function (e.g., offset and scale only)
         if condition_on_device:
-            kinit = keras.initializers.RandomNormal(mean=2.0, stddev=1.5)
+            kinit = tf.keras.initializers.RandomNormal(mean=2.0, stddev=1.5)
             ones = tf.tile([[1.0]], tf.shape(theta.r))
             aR = self.device_conditioner(ones, 'aR', dev_1hot, kernel_initializer=kinit)
             aS = self.device_conditioner(ones, 'aS', dev_1hot, kernel_initializer=kinit)
@@ -255,8 +254,8 @@ def reaction_equations(state, t):
             P81 = (e81 + KGR_81 * boundLuxR + KGS_81 * boundLasR) / (1.0 + KGR_81 * boundLuxR + KGS_81 * boundLasR)
 
             # Check they are finite
-            boundLuxR = verify_tensor_all_finite(boundLuxR, "boundLuxR NOT finite")
-            boundLasR = verify_tensor_all_finite(boundLasR, "boundLasR NOT finite")
+            boundLuxR = tf.verify_tensor_all_finite(boundLuxR, "boundLuxR NOT finite")
+            boundLasR = tf.verify_tensor_all_finite(boundLasR, "boundLasR NOT finite")
 
             # Right-hand sides
             d_x = gamma * x
diff --git a/models/dr_growthrate.py b/models/dr_growthrate.py
index 1bdc837..495c4d2 100644
--- a/models/dr_growthrate.py
+++ b/models/dr_growthrate.py
@@ -1,10 +1,9 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under a Microsoft Research License.
 
-from src.utils import default_get_value, variable_summaries
+from src.utils import variable_summaries
 from models.dr_constant import DR_Constant
-import tensorflow as tf
-from tensorflow.compat.v1 import keras, verify_tensor_all_finite
+import tensorflow.compat.v1 as tf # type: ignore
 import numpy as np
 
 class DR_Growth( DR_Constant ):
@@ -50,7 +49,7 @@ def gen_reaction_equations( self, theta, treatments, dev_1hot, condition_on_devi
         # condition on device information by mapping param_cond = f(param, d; \phi) where d is one-hot rep of device
         # currently, f is a one-layer MLP with NO activation function (e.g., offset and scale only) 
         if condition_on_device:
-            kinit = keras.initializers.RandomNormal(mean=2.0, stddev=1.5)
+            kinit = tf.keras.initializers.RandomNormal(mean=2.0, stddev=1.5)
             ones = tf.tile([[1.0]], tf.shape(theta.r))
             aR = self.device_conditioner(ones, 'aR', dev_1hot, kernel_initializer=kinit)
             aS = self.device_conditioner(ones, 'aS', dev_1hot, kernel_initializer=kinit)
@@ -83,8 +82,8 @@ def reaction_equations( state, t ):
             #P81 = func(luxR, lasR, c6, c12)
 
             # Check they are finite
-            boundLuxR = verify_tensor_all_finite(boundLuxR, "boundLuxR NOT finite")
-            boundLasR = verify_tensor_all_finite(boundLasR, "boundLasR NOT finite")
+            boundLuxR = tf.verify_tensor_all_finite(boundLuxR, "boundLuxR NOT finite")
+            boundLasR = tf.verify_tensor_all_finite(boundLasR, "boundLasR NOT finite")
 
             # Right-hand sides
             d_x    = gamma*x
@@ -102,8 +101,8 @@ def reaction_equations( state, t ):
 
 class DR_GrowthStudentT( DR_Growth ):
     
-    def init_with_params( self, params ):
-        super(DR_GrowthStudentT, self).init_with_params( params )
+    def __init__( self, params ):
+        super(DR_GrowthStudentT, self).__init__( params )
         
         # use a fixed gamma prior over precisions
         self.alpha = params['precision_alpha']
diff --git a/requirements.txt b/requirements.txt
index c7ef389..573626c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,4 +4,5 @@ numpy==1.18.5
 matplotlib
 pandas
 seaborn
-pyyaml
\ No newline at end of file
+pyyaml
+pytest
\ No newline at end of file
diff --git a/specs/debug.yaml b/specs/debug.yaml
index c073158..ad06521 100644
--- a/specs/debug.yaml
+++ b/specs/debug.yaml
@@ -13,7 +13,7 @@ data:
   conditions: ["C6","C12"]
   
 params:
-  model: !!python/object:models.debug.Debug_Constant {}
+  model: debug {}
 
   constant:
     init_x: 0.002
diff --git a/specs/debug_precisions.yaml b/specs/debug_precisions.yaml
index 5c96be1..7b3f955 100644
--- a/specs/debug_precisions.yaml
+++ b/specs/debug_precisions.yaml
@@ -16,7 +16,7 @@ data:
   separate_conditions: true
   
 params:
-  model: !!python/object:models.dr_constant.DR_Constant_Precisions {}
+  model: dr_constant_precisions
   learning_boundaries: [250,1000]
   learning_rates: [0.01,0.002,0.0002]
   n_hidden_decoder_precisions: 5
diff --git a/specs/dr_blackbox_xval.yaml b/specs/dr_blackbox_xval.yaml
index f2b9400..6c7b39e 100644
--- a/specs/dr_blackbox_xval.yaml
+++ b/specs/dr_blackbox_xval.yaml
@@ -13,7 +13,7 @@ data:
   separate_conditions: true
 
 params:
-  model: !!python/object:models.dr_blackbox.DR_BlackboxPrecisions {}
+  model: dr_blackbox_precisions
   theta_columns: ['z1','z2','z3','z4','z5'] #,'z6','z7','z8','z9','z10']
   n_z: 5
   n_latent_species: 2
diff --git a/specs/dr_blackbox_xval_hierarchical.yaml b/specs/dr_blackbox_xval_hierarchical.yaml
index 9257b4c..f72f510 100644
--- a/specs/dr_blackbox_xval_hierarchical.yaml
+++ b/specs/dr_blackbox_xval_hierarchical.yaml
@@ -13,7 +13,7 @@ data:
   separate_conditions: true
 
 params:
-  model: !!python/object:models.dr_blackbox.DR_HierarchicalBlackbox {}
+  model: dr_hierarchical_blackbox
   #theta_columns: ['x1','y1','z1','z2','z3'] #,'z6','z7','z8','z9','z10']
   theta_columns: ['x1','x2','y1','y2','z1','z2'] #,'z6','z7','z8','z9','z10']
 
diff --git a/specs/dr_constant_icml.yaml b/specs/dr_constant_icml.yaml
index ffef7e7..ed8bff4 100644
--- a/specs/dr_constant_icml.yaml
+++ b/specs/dr_constant_icml.yaml
@@ -16,7 +16,7 @@ data:
   separate_conditions: True
   
 params:
-  model: !!python/object:models.dr_constant.DR_Constant {}
+  model: dr_constant
   learning_boundaries: [250,1000]
   learning_rates: [0.01,0.002,0.0002]
 
diff --git a/specs/dr_constant_one.yaml b/specs/dr_constant_one.yaml
index 9f303ac..3093456 100644
--- a/specs/dr_constant_one.yaml
+++ b/specs/dr_constant_one.yaml
@@ -10,7 +10,7 @@ data:
   separate_conditions: True
   
 params:
-  model: !!python/object:models.dr_constant.DR_Constant {}
+  model: dr_constant
   learning_boundaries: [250,1000]
   learning_rates: [0.002,0.0004,0.00008]
   #solver: modeulerwhile
diff --git a/specs/dr_constant_precisions.yaml b/specs/dr_constant_precisions.yaml
index 74ed17e..280f482 100644
--- a/specs/dr_constant_precisions.yaml
+++ b/specs/dr_constant_precisions.yaml
@@ -16,7 +16,7 @@ data:
   separate_conditions: true
   
 params:
-  model: !!python/object:models.dr_constant.DR_Constant_Precisions {}
+  model: dr_constant_precisions
   theta_columns: ['aYFP','aCFP','aR','aS','r','K']
   lambda_l2: 0.001
   lambda_l2_hidden: 0.001
diff --git a/specs/dr_constant_xval_time_dependent.yaml b/specs/dr_constant_xval_time_dependent.yaml
index db43bb3..8052dd0 100644
--- a/specs/dr_constant_xval_time_dependent.yaml
+++ b/specs/dr_constant_xval_time_dependent.yaml
@@ -13,7 +13,7 @@ data:
   separate_conditions: true
   
 params:
-  model: !!python/object:models.dr_constant.DR_Constant {}
+  model: dr_constant
   theta_columns: ['aYFP','aCFP','aR','aS','r','K']
   lambda_l2: 0.001
   lambda_l2_hidden: 0.001
diff --git a/specs/dr_growthrate_xval.yaml b/specs/dr_growthrate_xval.yaml
index 98aea19..8f82ca8 100644
--- a/specs/dr_growthrate_xval.yaml
+++ b/specs/dr_growthrate_xval.yaml
@@ -16,7 +16,7 @@ data:
   separate_conditions: true
 
 params:
-  model: !!python/object:models.dr_growthrate.DR_Growth {}
+  model: dr_growthrate
   learning_boundaries: [250,1000]
   learning_rates: [0.01,0.002,0.0002]
 
diff --git a/src/call_run_xval.py b/src/call_run_xval.py
index bfeaad4..99dd9da 100644
--- a/src/call_run_xval.py
+++ b/src/call_run_xval.py
@@ -3,9 +3,7 @@
 
 from __future__ import absolute_import
 
-import os
 import procdata
-import numpy as np
 from src.run_xval import run_on_split, create_parser
 from src.xval import XvalMerge
 import src.utils as utils
diff --git a/src/convenience.py b/src/convenience.py
index dcd3b9e..c0ac850 100644
--- a/src/convenience.py
+++ b/src/convenience.py
@@ -2,31 +2,29 @@
 # Licensed under a Microsoft Research License.
 
 from __future__ import absolute_import
-from typing import Any, Dict, List, Optional
-from collections import OrderedDict
+from typing import Any, Dict
 
 # Standard data science imports
 import numpy as np
-import pandas as pd
-import tensorflow as tf
-from tensorflow.compat.v1 import placeholder, train, get_collection, GraphKeys
+import tensorflow.compat.v1 as tf # type: ignore
 
 # Local imports
-import procdata
-import encoders
-from decoders import ODEDecoder
-import distributions as ds
-from vi import GeneralLogImportanceWeights
-from utils import default_get_value, variable_summaries
+from .encoders import ConditionalEncoder
+from .decoders import ODEDecoder
+from .distributions import ( build_p_local, build_p_global_cond, build_p_global, build_p_constant, 
+    build_q_local, build_q_global_cond, build_q_global, build_q_constant, ChainedDistribution )
+from .parameters import Parameters
+from .vi import GeneralLogImportanceWeights
+from .utils import default_get_value, variable_summaries
 
 class Decoder:
     '''
     Decoder network
     '''
 
-    def __init__(self, params: Dict[str, Any], placeholders: 'Placeholders', times: np.array,
+    def __init__(self, params: Dict[str, Any], ode_model, placeholders: 'Placeholders', times: np.array,
                  encoder: 'Encoder', condition_on_device=True, plot_histograms=True):
-        ode_decoder = ODEDecoder(params)
+        ode_decoder = ODEDecoder(params, ode_model)
         # List(str), e.g. ['OD', 'RFP', 'YFP', 'CFP', 'F510', 'F430', 'LuxR', 'LasR']
         self.names = ode_decoder.ode_model.species # list(str)
         # self.x_sample: Tensor of float32, shape e.g. (?, ?, ?, 8)
@@ -67,11 +65,11 @@ def set_up_p(self, verbose: bool, parameters: 'Parameters'):
         """Returns a ChainedDistribution"""
         p_vals = LocalAndGlobal(
             # prior: local: may have some dependencies in theta (in hierarchy, local, etc)
-            ds.build_p_local(parameters, verbose, self.theta),
-            ds.build_p_global_cond(parameters, verbose, self.theta),
+            build_p_local(parameters, verbose, self.theta),
+            build_p_global_cond(parameters, verbose, self.theta),
             # prior: global should be fully defined in parameters
-            ds.build_p_global(parameters, verbose, self.theta),
-            ds.build_p_constant(parameters, verbose, self.theta))
+            build_p_global(parameters, verbose, self.theta),
+            build_p_constant(parameters, verbose, self.theta))
         if verbose:
             p_vals.diagnostic_printout('P')
         return p_vals.concat("p")
@@ -79,19 +77,19 @@ def set_up_p(self, verbose: bool, parameters: 'Parameters'):
     @classmethod
     def set_up_q(self, verbose, parameters, placeholders, x_delta_obs):
         # Constants
-        q_constant = ds.build_q_constant(parameters, verbose)
+        q_constant = build_q_constant(parameters, verbose)
         # q: global, device-dependent distributions
-        q_global_cond = ds.build_q_global_cond(parameters, placeholders.dev_1hot, placeholders.conds_obs, verbose, plot_histograms=parameters.params_dict["plot_histograms"])
+        q_global_cond = build_q_global_cond(parameters, placeholders.dev_1hot, placeholders.conds_obs, verbose, plot_histograms=parameters.params_dict["plot_histograms"])
         # q: global, independent distributions
-        q_global = ds.build_q_global(parameters, verbose)
+        q_global = build_q_global(parameters, verbose)
         # q: local, based on amortized neural network
         if len(parameters.l.list_of_params) > 0:
-            encode = encoders.ConditionalEncoder(parameters.params_dict)
+            encode = ConditionalEncoder(parameters.params_dict)
             approx_posterior_params = encode(x_delta_obs)
-            q_local = ds.build_q_local(parameters, approx_posterior_params, placeholders.dev_1hot, placeholders.conds_obs, verbose,
+            q_local = build_q_local(parameters, approx_posterior_params, placeholders.dev_1hot, placeholders.conds_obs, verbose,
                         kernel_regularizer=tf.keras.regularizers.l2(0.01))
         else:
-            q_local = ds.ChainedDistribution(name="q_local")
+            q_local = ChainedDistribution(name="q_local")
         q_vals = LocalAndGlobal(q_local, q_global_cond, q_global, q_constant)
         if verbose:
             q_vals.diagnostic_printout('Q')
@@ -137,7 +135,7 @@ def sum(self):
 
     def create_placeholders(self, suffix):
         def as_placeholder(size, name):
-            return placeholder(dtype=tf.float32, shape=(None, None, size), name=name)
+            return tf.placeholder(dtype=tf.float32, shape=(None, None, size), name=name)
         return LocalAndGlobal(
             as_placeholder(self.loc, "local_" + suffix),
             as_placeholder(self.glob_cond, "global_cond_" + suffix),
@@ -146,7 +144,7 @@ def as_placeholder(size, name):
 
     def concat(self, name,):
         """Returns a concatenation of the items."""
-        concatenated = ds.ChainedDistribution(name=name)
+        concatenated = ChainedDistribution(name=name)
         for chained in self.to_list():
             for item_name, distribution in chained.distributions.items():
                 concatenated.add_distribution(item_name, distribution, chained.slot_dependencies[item_name])
@@ -194,11 +192,11 @@ def __init__(self, data_pair, n_vals):
         # PLACEHOLDERS: represent stuff we must supply to the computational graph at each iteration,
         # e.g. batch of data or random numbers
         #: None means we can dynamically set this number (nbr of batch, nbr of IW samples)
-        self.x_obs = placeholder(dtype=tf.float32, shape=(None, data_pair.n_time, data_pair.n_species), name='species')
-        self.dev_1hot = placeholder(dtype=tf.float32, shape=(None, data_pair.depth), name='device_1hot')
-        self.conds_obs = placeholder(dtype=tf.float32, shape=(None, data_pair.n_conditions), name='conditions')
+        self.x_obs = tf.placeholder(dtype=tf.float32, shape=(None, data_pair.n_time, data_pair.n_species), name='species')
+        self.dev_1hot = tf.placeholder(dtype=tf.float32, shape=(None, data_pair.depth), name='device_1hot')
+        self.conds_obs = tf.placeholder(dtype=tf.float32, shape=(None, data_pair.n_conditions), name='conditions')
         # for beta VAE
-        self.beta = placeholder(dtype=tf.float32, shape=(None), name='beta')
+        self.beta = tf.placeholder(dtype=tf.float32, shape=(None), name='beta')
         u_vals = n_vals.create_placeholders("random_bits")
         self.u = tf.concat(u_vals.to_list(), axis=-1, name='u_local_global_stacked')
 
@@ -223,12 +221,12 @@ def __init__(self, dreg: bool, encoder: Encoder, objective: Objective, params_di
         self.tb_gradients = params_dict["tb_gradients"]
         boundaries = default_get_value(params_dict, "learning_boundaries", [1000, 2000, 5000])
         values = [float(f) for f in default_get_value(params_dict, "learning_rates", [1e-2, 1e-3, 1e-4, 2 * 1e-5])]
-        learning_rate = train.piecewise_constant(global_step, boundaries, values)
+        learning_rate = tf.train.piecewise_constant(global_step, boundaries, values)
         # Alternatives for opt_func, with momentum e.g. 0.50, 0.75
         #   momentum = default_get_value(params_dict, "momentum", 0.0)
         #   opt_func = train.MomentumOptimizer(learning_rate, momentum=momentum)
         #   opt_func = train.RMSPropOptimizer(learning_rate)
-        opt_func = train.AdamOptimizer(learning_rate)
+        opt_func = tf.train.AdamOptimizer(learning_rate)
         self.train_step = self.build_train_step(dreg, encoder, objective, opt_func)
 
     @classmethod
@@ -250,7 +248,7 @@ def build_train_step(self, dreg, encoder, objective, opt_func):
         '''Returns a computation that is run in the tensorflow session.'''
         # This path is for b_use_correct_iwae_gradients = True. For False, we would just
         # want to return opt_func.minimize(objective.vae_cost)
-        trainable_params = get_collection(GraphKeys.TRAINABLE_VARIABLES)
+        trainable_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
         if dreg:
             grads = self.create_dreg_gradients(encoder, objective, trainable_params)
             print("Set up Doubly Reparameterized Gradient (dreg)")
diff --git a/src/decoders.py b/src/decoders.py
index 3d49101..bc4da09 100644
--- a/src/decoders.py
+++ b/src/decoders.py
@@ -1,13 +1,12 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under a Microsoft Research License.
 
-import tensorflow as tf
-static_rnn = tf.nn.static_rnn
+import tensorflow.compat.v1 as tf # type: ignore
 
 class ODEDecoder(object):
-    def __init__(self, params):
+    def __init__(self, params, ode_model):
         self.solver = params["solver"]
-        self.ode_model = params["model"]
+        self.ode_model = ode_model
 
     def __call__(self, conds_obs, dev_1hot, times, thetas, clipped_thetas, condition_on_device):
         x_sample, _f_sample = self.ode_model.simulate(
diff --git a/src/distributions.py b/src/distributions.py
index 70396dd..3d19fac 100644
--- a/src/distributions.py
+++ b/src/distributions.py
@@ -3,13 +3,10 @@
 
 from abc import ABC, abstractmethod
 from collections import OrderedDict
-import os
-import pdb
 import numpy as np
-import tensorflow as tf
-from tensorflow.compat.v1 import summary
+import tensorflow.compat.v1 as tf # type: ignore
 
-from utils import variable_summaries
+from .utils import variable_summaries
 
 SQRT2 = np.sqrt(2.0)
 LOG2PI = np.log(2 * np.pi)
@@ -522,8 +519,6 @@ def sample(self, list_of_u, verbose, stop_grad=False):
                 distribution.fill_slots(self.slot_dependencies[name], samples)
                 assert distribution.slots_are_pending() is False, "STILL pending slot for %s"%name
 
-            if name == "dummy":
-                pdb.set_trace()
             theta = distribution.sample(list_of_u[:, :, idx], stop_grad)
             samples.add(name, theta)
         return samples
@@ -759,8 +754,8 @@ def attach_summaries(self, name, plot_histograms):
             variable_summaries(self.prec, name + '.prec', plot_histograms)
         else:
             with tf.name_scope(name):
-                summary.scalar('mu', tf.reduce_mean(self.mu))
-                summary.scalar('prec', tf.reduce_mean(self.prec))
+                tf.summary.scalar('mu', tf.reduce_mean(self.mu))
+                tf.summary.scalar('prec', tf.reduce_mean(self.prec))
 
 
     def get_tensor_names(self, name):
diff --git a/src/encoders.py b/src/encoders.py
index 9f114b2..d35c7a7 100644
--- a/src/encoders.py
+++ b/src/encoders.py
@@ -1,7 +1,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under a Microsoft Research License.
 
-import tensorflow as tf
+import tensorflow.compat.v1 as tf # type: ignore
 import numpy as np
 
 def xavier_init(fan_in, fan_out, constant=1):
diff --git a/src/parameters.py b/src/parameters.py
index 8499948..0b93bc8 100644
--- a/src/parameters.py
+++ b/src/parameters.py
@@ -2,8 +2,8 @@
 # Licensed under a Microsoft Research License.
 
 import numpy as np
-from distributions import TfKumaraswamy, TfLogNormal, TfNormal, TfTruncatedNormal, TfConstant
-from utils import default_get_value
+from .distributions import TfKumaraswamy, TfLogNormal, TfNormal, TfTruncatedNormal, TfConstant
+from .utils import default_get_value
 
 class DistributionDescription(object):
     def __init__(self, name, class_type, defaults):
diff --git a/src/plotting.py b/src/plotting.py
index 988a38e..5127421 100644
--- a/src/plotting.py
+++ b/src/plotting.py
@@ -6,7 +6,6 @@
 matplotlib.use('agg')
 import matplotlib.pyplot as pp
 import matplotlib.cm as cmx
-import pdb
 import numpy as np
 import pandas as pd
 import src.utils as utils
@@ -240,7 +239,7 @@ def xval_treatments(res, data, devices):
             std = np.sqrt(np.sum(device_IW*(device_PREDICT[j]**2 + device_STD[j]**2 ), 1) - mu**2)
             for ci, cvalues in enumerate(input_values):
                 ax.errorbar( cvalues, mu, yerr=std, fmt=pred_mk, ms=ms, lw=1, mec=edges[ci], color=colors[ci], zorder=ci)
-                ax.semilogx( cvalues, device_OBS[j], 'k'+obs_mk, ms=ms, lw=1, color=edges[ci], zorder=ci+20)
+                ax.semilogx( cvalues, device_OBS[j], obs_mk, ms=ms, lw=1, color=edges[ci], zorder=ci+20)
             ax.set_ylim(-0.1,1.1)
             ax.tick_params(axis='both', which='major', labelsize=fs)
             ax.set_xticks(np.logspace(0,4,3))
@@ -567,7 +566,10 @@ def xval_variable_parameters(res, ncols=2):
                 name = ps[j+i*ncols]
                 for di in devices:
                     locs = np.where(res.devices == di)
-                    ax.errorbar(res.ids[locs], qs['%s.mu'%name][locs], 1 / qs['%s.prec'%name][locs], fmt='.', color=cdict[di])
+                    x = res.ids[locs]
+                    y_mu = qs['%s.mu'%name][locs]
+                    y_err = np.squeeze(1 / qs['%s.prec'%name][locs])
+                    ax.errorbar(x, y_mu, y_err, fmt='.', color=cdict[di])
                     ax.set_title(name)
                 if i == (nrows-1):
                     ax.set_xlabel('Data instance')
diff --git a/src/procdata.py b/src/procdata.py
index 415117a..2293012 100644
--- a/src/procdata.py
+++ b/src/procdata.py
@@ -3,7 +3,7 @@
 
 import os
 
-from collections import OrderedDict, namedtuple
+from collections import OrderedDict
 from functools import reduce
 from typing import Any, Dict, List
 
diff --git a/src/run_xval.py b/src/run_xval.py
index 031ac0a..0a0ba23 100644
--- a/src/run_xval.py
+++ b/src/run_xval.py
@@ -5,7 +5,7 @@
 import argparse
 import os
 import time
-from typing import Any, Dict, List
+from typing import Dict
 
 # Special imports for matplotlib because of "use" call
  # pylint: disable=wrong-import-order,wrong-import-position
@@ -16,16 +16,16 @@
 
 # Standard data science imports
 import numpy as np
-import tensorflow.compat.v1 as tf
-from tensorflow.compat.v1 import summary, set_random_seed, global_variables_initializer
+import tensorflow.compat.v1 as tf # type: ignore
 
 # Local imports
-import procdata
-from parameters import Parameters
-from plotting import plot_prediction_summary, xval_treatments, plot_weighted_theta, species_summary
-from convenience import Decoder, Encoder, SessionVariables, LocalAndGlobal, Objective, Placeholders, TrainingLogData, TrainingStepper
-from xval import XvalMerge
-import utils
+import models
+import src.procdata as procdata
+from src.parameters import Parameters
+from src.plotting import plot_prediction_summary, plot_weighted_theta, species_summary
+from src.convenience import Decoder, Encoder, SessionVariables, LocalAndGlobal, Objective, Placeholders, TrainingLogData, TrainingStepper
+from src.xval import XvalMerge
+import src.utils as utils
 
 class Runner:
     """A class to set up, train and evaluate a variation CRN model, holding out one fold 
@@ -92,7 +92,7 @@ def _fix_random_seed(self):
         Currently (2019-01-14) it doesn't do that, and adding a call to random.seed(seed) doesn't help either."""
         seed = self.args.seed
         print("Setting: tf.set_random_seed({})".format(seed))
-        set_random_seed(seed)
+        tf.set_random_seed(seed)
         print("Setting: np.random.seed({})".format(seed))
         np.random.seed(seed)
 
@@ -183,10 +183,10 @@ def set_up(self, data_settings, params):
         # Number of instances to put in a training batch.
         self.n_batch = min(self.params_dict['n_batch'], self.dataset_pair.n_train)
 
-        # This is already a model object because of the use of "!!python/object:... in the yaml file.
-        model = self.params_dict["model"]
+        # Look up which model class to use
+        model_class = models.LOOKUP[self.params_dict["model"]]
         # Set various attributes of the model
-        model.init_with_params(self.params_dict, self.procdata)
+        ode_model = model_class(self.params_dict, self.procdata)
         
         # Import priors from YAML
         parameters = Parameters()
@@ -214,12 +214,12 @@ def set_up(self, data_settings, params):
 
         # DEFINE THE DECODER NN
         print("Set up decoder")
-        self.decoder = Decoder(self.params_dict, self.placeholders, self.dataset_pair.times, self.encoder, condition_on_device=self.decoder_condition_on_device)
+        self.decoder = Decoder(self.params_dict, ode_model, self.placeholders, self.dataset_pair.times, self.encoder, condition_on_device=self.decoder_condition_on_device)
 
         # DEFINE THE OBJECTIVE and GRADIENTS
         # likelihood p (x | theta)
         print("Set up objective")
-        self.objective = Objective(self.encoder, self.decoder, model, self.placeholders)
+        self.objective = Objective(self.encoder, self.decoder, ode_model, self.placeholders)
 
         # SET-UP tensorflow LEARNING/OPTIMIZER
         self.training_stepper = TrainingStepper(self.args.dreg, self.encoder, self.objective, self.params_dict)
@@ -234,21 +234,21 @@ def set_up(self, data_settings, params):
         self_normed_iw = self.objective.normalized_iws[ts_to_vis, :]   # not in log space
         utils.variable_summaries(unnormed_iw, 'IWS_unn_log', plot_histograms)
         utils.variable_summaries(self_normed_iw, 'IWS_normed', plot_histograms)
-        summary.scalar('IWS_normed/nonzeros', tf.count_nonzero(self_normed_iw))
+        tf.summary.scalar('IWS_normed/nonzeros', tf.count_nonzero(self_normed_iw))
 
         with tf.name_scope('ELBO'):
-            summary.scalar('elbo', self.objective.elbo)
+            tf.summary.scalar('elbo', self.objective.elbo)
             # log(P) and also a per-species breakdown
             log_p = tf.reduce_mean(tf.reduce_logsumexp(self.objective.log_p_observations, axis=1))
-            summary.scalar('log_p', log_p)  # [batch, 1]
+            tf.summary.scalar('log_p', log_p)  # [batch, 1]
             for i,plot in enumerate(self.procdata.signals):
                 log_p_by_species = tf.reduce_mean(tf.reduce_logsumexp(self.objective.log_p_observations_by_species[:,:,i], axis=1))
-                summary.scalar('log_p_'+plot, log_p_by_species)
+                tf.summary.scalar('log_p_'+plot, log_p_by_species)
             # Priors
             logsumexp_log_p_theta = tf.reduce_logsumexp(self.encoder.log_p_theta, axis=1)
-            summary.scalar('log_prior', tf.reduce_mean(logsumexp_log_p_theta))
+            tf.summary.scalar('log_prior', tf.reduce_mean(logsumexp_log_p_theta))
             logsumexp_log_q_theta = tf.reduce_logsumexp(self.encoder.log_q_theta, axis=1)
-            summary.scalar('loq_q', tf.reduce_mean(logsumexp_log_q_theta))
+            tf.summary.scalar('loq_q', tf.reduce_mean(logsumexp_log_q_theta))
 
     def _create_session_variables(self):
         return SessionVariables([
@@ -293,7 +293,7 @@ def _plot_weighted_theta_figure(self, training_output, validation_output, valid_
         pp.close(theta_fig)
         pp.close('all')
 
-    def _evaluate_elbo_and_plot(self, beta_val, epoch, eval_tensors, log_data, merged, sess, train_writer, valid_writer, saver):
+    def _evaluate_elbo_and_plot(self, epoch, eval_tensors, log_data, merged, sess, train_writer, valid_writer, saver):
         print("epoch %4d"%epoch, end='', flush=True)
         log_data.n_test += 1
         test_start = time.time()
@@ -348,10 +348,10 @@ def _run_batch(self, beta_val, epoch_start, i_batch, log_data):
 
     def _run_session(self):
         # summary.scalar('ESS',  1.0 / np.sum(np.square(ws), 1))
-        merged = summary.merge_all()
+        merged = tf.summary.merge_all()
         held_out_name = self.args.heldout or '%d_of_%d' % (self.args.split, self.args.folds)
-        train_writer = summary.FileWriter(os.path.join(self.trainer.tb_log_dir, 'train_%s' % held_out_name))
-        valid_writer = summary.FileWriter(os.path.join(self.trainer.tb_log_dir, 'valid_%s' % held_out_name))
+        train_writer = tf.summary.FileWriter(os.path.join(self.trainer.tb_log_dir, 'train_%s' % held_out_name))
+        valid_writer = tf.summary.FileWriter(os.path.join(self.trainer.tb_log_dir, 'valid_%s' % held_out_name))
         eval_tensors = self._create_session_variables().as_list()
         saver = tf.train.Saver()
         print("----------------------------------------------")
@@ -360,7 +360,7 @@ def _run_session(self):
         with tf.Session() as sess:
             self._fix_random_seed()  # <-- force run to be deterministic given random seed
             # initialize variables in the graph
-            sess.run(global_variables_initializer())
+            sess.run(tf.global_variables_initializer())
             log_data = TrainingLogData()
             print("===========================")
             if self.args.heldout:
@@ -376,7 +376,7 @@ def _run_session(self):
                 log_data.total_train_time += time.time() - epoch_start
                 # occasionally evaluation ELBO on train and val, using more IW samples
                 if np.mod(epoch, self.args.test_epoch) == 0:
-                    self._evaluate_elbo_and_plot(beta, epoch, eval_tensors, log_data, merged, sess, train_writer, 
+                    self._evaluate_elbo_and_plot(epoch, eval_tensors, log_data, merged, sess, train_writer, 
                         valid_writer, saver)
             train_writer.close()
             valid_writer.close()
diff --git a/src/solvers.py b/src/solvers.py
index e6c21e2..3beb9f3 100644
--- a/src/solvers.py
+++ b/src/solvers.py
@@ -1,10 +1,9 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under a Microsoft Research License.
 
-import tensorflow as tf
-from tensorflow.compat.v1 import verify_tensor_all_finite
+import tensorflow.compat.v1 as tf
+from tensorflow import verify_tensor_all_finite
 import numpy as np
-import pdb
 
 def modified_euler_integrate( d_states_d_t, init_state, times ):
     init_state = verify_tensor_all_finite(init_state, "init_state NOT finite")
diff --git a/src/utils.py b/src/utils.py
index e3e38e0..a7b2127 100644
--- a/src/utils.py
+++ b/src/utils.py
@@ -8,8 +8,7 @@
 import shutil
 
 import yaml
-import tensorflow as tf
-from tensorflow.compat.v1 import summary
+import tensorflow.compat.v1 as tf # type: ignore
 
 def get_data_directory():
     """ 
@@ -41,12 +40,12 @@ def variable_summaries(var, name, plot_histograms=False):
     """ Attach summaries to a scalar node using Tensorboard """
     #print("- Attaching tensorboard summary for %s"%name)
     mean = tf.reduce_mean(var)
-    summary.scalar(name+'/mean', mean)
+    tf.summary.scalar(name+'/mean', mean)
     stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
-    summary.scalar(name+'/stddev', stddev)
-    summary.scalar(name+'/max', tf.reduce_max(var))
-    summary.scalar(name+'/min', tf.reduce_min(var))
-    if plot_histograms: summary.histogram(name+'/histogram', var)
+    tf.summary.scalar(name+'/stddev', stddev)
+    tf.summary.scalar(name+'/max', tf.reduce_max(var))
+    tf.summary.scalar(name+'/min', tf.reduce_min(var))
+    if plot_histograms: tf.summary.histogram(name+'/histogram', var)
 
 def make_summary_image_op(fig, tag, scope, image_format='png'):
     buf = fig_to_byte_buffer(fig, image_format=image_format)
@@ -63,7 +62,8 @@ def load_config_file(filename):
     if filename is None:
         return None
     with open(filename, 'r') as stream:
-        return yaml.unsafe_load(stream)
+        #return yaml.unsafe_load(stream)
+        return yaml.safe_load(stream)
 
 def default_get_value(dct, key, default_value, verbose=False):
     if key in dct:
diff --git a/src/vi.py b/src/vi.py
index 02e8aa8..1c36c1a 100644
--- a/src/vi.py
+++ b/src/vi.py
@@ -1,7 +1,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under a Microsoft Research License.
 
-import tensorflow as tf
+import tensorflow.compat.v1 as tf
 import numpy as np
 
 def gaussian_log_prob(x, mu, vr, log_vr):
diff --git a/src/xval.py b/src/xval.py
index a71a037..0af9f0d 100644
--- a/src/xval.py
+++ b/src/xval.py
@@ -3,12 +3,11 @@
 
 import os
 import numpy as np
-import tensorflow as tf
-from tensorflow.compat.v1 import summary
+import tensorflow.compat.v1 as tf # type: ignore
 
-import procdata
-import plotting
-from utils import make_summary_image_op, Trainer
+from .plotting import ( plot_prediction_summary, xval_treatments, species_summary, xval_fit_summary, 
+    xval_global_parameters, xval_variable_parameters, xval_individual_2treatments, xval_individual)
+from .utils import make_summary_image_op, Trainer
 import matplotlib.pyplot as pp # pylint:disable=wrong-import-order
 
 class XvalMerge(object):
@@ -35,7 +34,7 @@ def __init__(self, args, data_settings):
         self.data_ids = []
         self.devices = []
         self.treatments = []
-        self.trainer = trainer = Trainer(args, add_timestamp=True)
+        self.trainer = Trainer(args, add_timestamp=True)
         self.X_obs = []
         # Attributes initialized elsewhere
         self.chunk_sizes = None
@@ -154,7 +153,7 @@ def loadtxt(base):
     def make_writer(self, location=None):
         if location is None:
             location = self.trainer.tb_log_dir
-        self.xval_writer = summary.FileWriter(os.path.join(location, 'xval'))
+        self.xval_writer = tf.summary.FileWriter(os.path.join(location, 'xval'))
 
     def close_writer(self):
         self.xval_writer.close()
@@ -167,7 +166,7 @@ def make_images(self, procdata):
         device_ids = list(range(len(procdata.device_names)))
         
         print("Making summary figure")
-        f1 = plotting.plot_prediction_summary(procdata, self.names, self.times, self.X_obs, self.X_post_sample,
+        f1 = plot_prediction_summary(procdata, self.names, self.times, self.X_obs, self.X_post_sample,
             self.precisions, self.devices, self.log_normalized_iws, '-')
         self.save_figs(f1,'xval_fit')
         plot_op1 = make_summary_image_op(f1, 'Summary', 'Summary')
@@ -177,7 +176,7 @@ def make_images(self, procdata):
 
         if self.separated_inputs is True:
             print("Making treatment figure")
-            f2 = plotting.xval_treatments(self, procdata, device_ids)
+            f2 = xval_treatments(self, procdata, device_ids)
             self.save_figs(f2,'xval_treatments')
             plot_op2 = make_summary_image_op(f2, 'Treatment', 'Treatment')
             self.xval_writer.add_summary(tf.Summary(value=[plot_op2]), self.epoch)
@@ -185,7 +184,7 @@ def make_images(self, procdata):
             self.xval_writer.flush()
 
         print("Making species figure")
-        f_species = plotting.species_summary(procdata, self.names, self.treatments, self.devices, self.times, self.X_sample, self.importance_weights, device_ids, fixYaxis = True)
+        f_species = species_summary(procdata, self.names, self.treatments, self.devices, self.times, self.X_sample, self.importance_weights, device_ids, fixYaxis = True)
         self.save_figs(f_species,'xval_species')
         plot_op_species = make_summary_image_op(f_species, 'Species', 'Species')
         self.xval_writer.add_summary(tf.Summary(value=[plot_op_species]), self.epoch)
@@ -193,7 +192,7 @@ def make_images(self, procdata):
         self.xval_writer.flush()
 
         print("Making global parameters figure")
-        f_gparas = plotting.xval_global_parameters(self)
+        f_gparas = xval_global_parameters(self)
         if f_gparas is not None:
             self.save_figs(f_gparas,'xval_global_parameters')
             plot_op_gparas = make_summary_image_op(f_gparas, 'Parameters', 'Globals')
@@ -202,7 +201,7 @@ def make_images(self, procdata):
             self.xval_writer.flush()
 
         print("Making variable parameters figure")
-        f_vparas = plotting.xval_variable_parameters(self)
+        f_vparas = xval_variable_parameters(self)
         if f_vparas is not None:
             self.save_figs(f_vparas,'xval_variable_parameters')
             plot_op_vparas = make_summary_image_op(f_vparas, 'Parameters', 'Variable')
@@ -215,7 +214,7 @@ def make_images(self, procdata):
         for u in device_ids:
             print("- %s" % procdata.pretty_devices[u])
             device = procdata.device_names[u]
-            f4 = plotting.xval_fit_summary(self, u, separatedInputs=self.separated_inputs)
+            f4 = xval_fit_summary(self, u, separatedInputs=self.separated_inputs)
             self.save_figs(f4, 'xval_summary_%s' % device)
             summaries.append(make_summary_image_op(f4, device, 'Device (Summary)'))
             pp.close(f4)        
@@ -228,9 +227,9 @@ def make_images(self, procdata):
             print("- %s" % procdata.pretty_devices[u])
             device = procdata.device_names[u]
             if self.separated_inputs is True:
-                f5 = plotting.xval_individual_2treatments(self, u)
+                f5 = xval_individual_2treatments(self, u)
             else:
-                f5 = plotting.xval_individual(self, u)
+                f5 = xval_individual(self, u)
             self.save_figs(f5, 'xval_individual_%s' % device)
             indivs.append(make_summary_image_op(f5, device, 'Device (Individual)'))                
             pp.close(f5)
diff --git a/tests/test_conditional_encoder.py b/tests/test_conditional_encoder.py
index ce5500e..ce5f0ad 100644
--- a/tests/test_conditional_encoder.py
+++ b/tests/test_conditional_encoder.py
@@ -2,78 +2,63 @@
 # Licensed under a Microsoft Research License.
 
 import numpy as np
-import tensorflow as tf
-import os
-import subprocess
-import sys
-import tempfile
-import re
+import tensorflow.compat.v1 as tf # type: ignore
 
 # Call tests in this file by running "pytest" on the directory containing it. For example:
 #   cd ~/vi-hds
 #   pytest tests
 
-import models.dr_constant
-import utils
-import procdata
-from convenience import LocalAndGlobal, Placeholders
-from distributions import DotOperatorSamples
-import encoders
-from parameters import Parameters
-from run_xval import Runner, create_parser
+import src.utils as utils
+from src.procdata import apply_defaults
+from src.convenience import LocalAndGlobal, Placeholders
+from src.encoders import ConditionalEncoder
+from src.parameters import Parameters
+from src.run_xval import Runner, create_parser
 
-# Load a spec (YAML)
-parser = create_parser(False)
-args = parser.parse_args(['./specs/dr_constant_icml.yaml'])
-spec = utils.load_config_file(args.yaml)  # spec is a dict of dicts of dicts
-para_settings = utils.apply_defaults(spec['params'])
-data_settings = procdata.apply_defaults(spec["data"])
-model = para_settings['model']
-conditions = np.array([[1.0, 1.0], [2.0, 1.0], [3.0, 1.0]]).astype(np.float32)
+def test_conditional_encoder():
 
-# Define a theta sample 
-n_batch = conditions.shape[0]
-n_iwae = 10
+    # Load a spec (YAML)
+    parser = create_parser(False)
+    args = parser.parse_args(['./specs/dr_constant_icml.yaml'])
+    spec = utils.load_config_file(args.yaml)  # spec is a dict of dicts of dicts
+    para_settings = utils.apply_defaults(spec['params'])
+    data_settings = apply_defaults(spec["data"])
 
-# Set up model runner
-trainer = utils.Trainer(args, add_timestamp=True)
-self = Runner(args, 0, trainer)
-self.params_dict = para_settings
-self._prepare_data(data_settings)
-self.n_batch = min(self.params_dict['n_batch'], self.dataset_pair.n_train)
+    # Set up model runner
+    trainer = utils.Trainer(args, add_timestamp=True)
+    self = Runner(args, 0, trainer)
+    self.params_dict = para_settings
+    self._prepare_data(data_settings)
+    self.n_batch = min(self.params_dict['n_batch'], self.dataset_pair.n_train)
 
-# Set various attributes of the model
-model = self.params_dict["model"]
-model.init_with_params(self.params_dict, self.procdata)
+    # Import priors from YAML
+    parameters = Parameters()
+    parameters.load(self.params_dict)
 
-# Import priors from YAML
-parameters = Parameters()
-parameters.load(self.params_dict)
+    print("----------------------------------------------")
+    if self.args.verbose:
+        print("parameters:")
+        parameters.pretty_print()
+    n_vals = LocalAndGlobal.from_list(parameters.get_parameter_counts())
+    self.n_theta = n_vals.sum()
 
-print("----------------------------------------------")
-if self.args.verbose:
-    print("parameters:")
-    parameters.pretty_print()
-n_vals = LocalAndGlobal.from_list(parameters.get_parameter_counts())
-self.n_theta = n_vals.sum()
+    self.placeholders = Placeholders(self.dataset_pair, n_vals)
 
-self.placeholders = Placeholders(self.dataset_pair, n_vals)
+    # feed_dicts are used to supply placeholders, these are for the entire train/val dataset, there is a batch one below.
+    self._create_feed_dicts()
 
-# feed_dicts are used to supply placeholders, these are for the entire train/val dataset, there is a batch one below.
-self._create_feed_dicts()
+    # time-series of species differences: x_delta_obs is BATCH x (nTimes-1) x nSpecies
+    x_delta_obs = self.placeholders.x_obs[:, 1:, :] - self.placeholders.x_obs[:, :-1, :]
 
-# time-series of species differences: x_delta_obs is BATCH x (nTimes-1) x nSpecies
-x_delta_obs = self.placeholders.x_obs[:, 1:, :] - self.placeholders.x_obs[:, :-1, :]
+    # Define encoder
+    encode = ConditionalEncoder(parameters.params_dict)
+    approx_posterior_params = encode(x_delta_obs)
 
-# Define encoder
-encode = encoders.ConditionalEncoder(parameters.params_dict)
-approx_posterior_params = encode(x_delta_obs)
+    # Run TF session to extract an ODE simulation using modified Euler and RK4
+    sess = tf.Session()
+    sess.run(tf.global_variables_initializer()) 
+    [xd, q] = sess.run([x_delta_obs, approx_posterior_params], feed_dict=self.train_feed_dict)
+    print("xd:", np.shape(xd))
+    print("q:", np.shape(q))
 
-# Run TF session to extract an ODE simulation using modified Euler and RK4
-sess = tf.Session()
-sess.run(tf.global_variables_initializer()) 
-[xd, q] = sess.run([x_delta_obs, approx_posterior_params], feed_dict=self.train_feed_dict)
-print("xd:", np.shape(xd))
-print("q:", np.shape(q))
-
-assert np.shape(q) == (234,50), 'Shape of encoder output'
\ No newline at end of file
+    assert np.shape(q) == (234,50), 'Shape of encoder output'
\ No newline at end of file
diff --git a/tests/test_ode_solvers.py b/tests/test_ode_solvers.py
index 8f62cc8..8e2ea93 100644
--- a/tests/test_ode_solvers.py
+++ b/tests/test_ode_solvers.py
@@ -2,79 +2,74 @@
 # Licensed under a Microsoft Research License.
 
 import numpy as np
-import tensorflow as tf
-import os
-import subprocess
-import sys
-import tempfile
-import re
+import tensorflow.compat.v1 as tf # type: ignore
 
 # Call tests in this file by running "pytest" on the directory containing it. For example:
 #   cd ~/vi-hds
 #   pytest tests
 
-import models.dr_constant
-import utils
-import procdata
-import distributions
-from run_xval import Runner, create_parser
+import models
+import src.utils as utils
+from src.procdata import apply_defaults
+from src.distributions import DotOperatorSamples
+from src.run_xval import Runner, create_parser
 
-# Load a spec (YAML)
-parser = create_parser(False)
-args = parser.parse_args(['./specs/dr_constant_icml.yaml'])
-spec = utils.load_config_file(args.yaml)  # spec is a dict of dicts of dicts
-para_settings = utils.apply_defaults(spec['params'])
-data_settings = procdata.apply_defaults(spec["data"])
-model = para_settings['model']
+def test_ode_solvers():
 
-# Load the parameter priors
-shared = dict([(k, np.exp(v['mu'])) for k, v in para_settings['shared'].items()])
-priors = dict()
-priors.update(para_settings['global'])
-priors.update(para_settings['global_conditioned'])
-priors.update(para_settings['local'])
+    # Load a spec (YAML)
+    parser = create_parser(False)
+    args = parser.parse_args(['./specs/dr_constant_icml.yaml'])
+    spec = utils.load_config_file(args.yaml)  # spec is a dict of dicts of dicts
+    para_settings = utils.apply_defaults(spec['params'])
+    data_settings = apply_defaults(spec["data"])
+    model = para_settings['model']
 
-# Define a parameter sample that is the mode of each LogNormal prior
-theta = distributions.DotOperatorSamples()
-for k, v in priors.items():
-    if k != "conditioning":
-        if 'mu' in v: 
-            sample_value = np.exp(v['mu'])         
-        else: 
-            sample_value = shared[v['distribution']]
-        theta.add(k, np.tile(sample_value, [1,1]).astype(np.float32))
+    # Load the parameter priors
+    shared = dict([(k, np.exp(v['mu'])) for k, v in para_settings['shared'].items()])
+    priors = dict()
+    priors.update(para_settings['global'])
+    priors.update(para_settings['global_conditioned'])
+    priors.update(para_settings['local'])
 
-# Add the constants separately
-for k, v in para_settings['constant'].items():
-    theta.add(k, np.tile(v, [1,1]).astype(np.float32))
+    # Define a parameter sample that is the mode of each LogNormal prior
+    theta = DotOperatorSamples()
+    for k, v in priors.items():
+        if k != "conditioning":
+            if 'mu' in v: 
+                sample_value = np.exp(v['mu'])         
+            else: 
+                sample_value = shared[v['distribution']]
+            theta.add(k, np.tile(sample_value, [1,1]).astype(np.float32))
 
-# Set up model runner
-trainer = utils.Trainer(args, add_timestamp=True)
-self = Runner(args, 0, trainer)
-self.params_dict = para_settings
-self._prepare_data(data_settings)
-self.n_batch = min(self.params_dict['n_batch'], self.dataset_pair.n_train)
+    # Add the constants separately
+    for k, v in para_settings['constant'].items():
+        theta.add(k, np.tile(v, [1,1]).astype(np.float32))
 
-# Set various attributes of the model
-model = self.params_dict["model"]
-model.init_with_params(self.params_dict, self.procdata)
+    # Set up model runner
+    trainer = utils.Trainer(args, add_timestamp=True)
+    self = Runner(args, 0, trainer)
+    self.params_dict = para_settings
+    self._prepare_data(data_settings)
+    self.n_batch = min(self.params_dict['n_batch'], self.dataset_pair.n_train)
 
-# Define simulation variables and run simulator
-times = np.linspace(0.0, 20.0, 101).astype(np.float32)
-conditions = np.array([[1.0, 1.0]]).astype(np.float32)
-dev_1hot = np.expand_dims(np.zeros(7).astype(np.float32),0)
-sol_rk4 = model.simulate(theta, times, conditions, dev_1hot, 'rk4')[0]
-sol_mod = model.simulate(theta, times, conditions, dev_1hot, 'modeulerwhile')[0]
+    # Set various attributes of the model
+    model_class = models.LOOKUP[self.params_dict["model"]]
+    model = model_class(self.params_dict, self.procdata)
 
-# Run TF session to extract an ODE simulation using modified Euler and RK4
-sess = tf.Session()
-sess.run(tf.global_variables_initializer()) 
-[mod, rk4] = sess.run([sol_mod, sol_rk4])
-print(np.shape(mod))
+    # Define simulation variables and run simulator
+    times = np.linspace(0.0, 20.0, 101).astype(np.float32)
+    conditions = np.array([[1.0, 1.0]]).astype(np.float32)
+    dev_1hot = np.expand_dims(np.zeros(7).astype(np.float32),0)
+    sol_rk4 = model.simulate(theta, times, conditions, dev_1hot, 'rk4')[0]
+    sol_mod = model.simulate(theta, times, conditions, dev_1hot, 'modeulerwhile')[0]
 
-# Ensure that the relative error is no bigger than 5%
-Y0 = mod[0][0][-1]
-#print(Y0)
-Y1 = rk4[0][0][-1]
-#print(Y1)
-assert np.nanmax(np.abs((Y0 - Y1) / Y0)) < 0.05, 'Difference between Modified Euler and RK4 solvers greater than 5%'
\ No newline at end of file
+    # Run TF session to extract an ODE simulation using modified Euler and RK4
+    sess = tf.Session()
+    sess.run(tf.global_variables_initializer()) 
+    [mod, rk4] = sess.run([sol_mod, sol_rk4])
+    print(np.shape(mod))
+
+    # Ensure that the relative error is no bigger than 5%
+    Y0 = mod[0][0][-1]
+    Y1 = rk4[0][0][-1]
+    assert np.nanmax(np.abs((Y0 - Y1) / Y0)) < 0.05, 'Difference between Modified Euler and RK4 solvers greater than 5%'
\ No newline at end of file
diff --git a/tests/test_run_xval_icml.py b/tests/test_run_xval_icml.py
index 9e4104c..d0a9ca5 100644
--- a/tests/test_run_xval_icml.py
+++ b/tests/test_run_xval_icml.py
@@ -34,6 +34,8 @@ def pre_test(pattern, num_folds):
     assert sys.version_info[0] == 3, 'This test will only run on Python 3'
     results_dir = tempfile.mkdtemp()
     os.environ['INFERENCE_RESULTS_DIR'] = results_dir
+    # Switch off the GPU
+    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
     if num_folds > 1:
         cmd = ('python src/%s.py --folds=%d --experiment=TEST --epochs=2 --test_epoch=1 '
             '--train_sample=3 --test_samples=3 specs/dr_blackbox_xval.yaml') % (pattern, num_folds)
@@ -42,7 +44,10 @@ def pre_test(pattern, num_folds):
             '--train_sample=10 --test_samples=10 specs/dr_blackbox_xval.yaml') % pattern
     cmd_tokens = cmd.split()
     result = subprocess.run(cmd_tokens, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
+    if result.returncode != 0:
+        print(result.stderr)
     assert result.returncode == 0
+
     elbo_list = []
     for line in result.stdout.split('\n'):
         print(line)
@@ -89,9 +94,9 @@ def post_test(example_dir):
         assert len(matches) > 0, 'Cannot find file matching *_*_*.%s in %s' % (suffix, example_dir)
 
 
-def test_run_xval_icml():
+def test_run_xval():
     '''
-    Tests that a relatively quick (1 minute or so) call of run_xval_icml.py, with two test epochs,
+    Tests that a relatively quick (1 minute or so) call of run_xval.py, with two test epochs,
     has a higher validation ELBO on the second test epoch than the first.
     '''
     pre_test('run_xval', 1)
@@ -102,4 +107,8 @@ def test_folds2():
     fold as run_xval_icml.py does. This is slow - about five minutes.
     '''
     example_dir = pre_test('call_run_xval', 2)
-    post_test(example_dir)
\ No newline at end of file
+    post_test(example_dir)
+
+if __name__ == "__main__":
+    test_run_xval()
+    # test_folds2()
\ No newline at end of file