Merge pull request #53 from eth-cscs/release-0.5.4

Release 0.5.4
eth-cscs · Jan 8, 2019 · a1486b9 · a1486b9
2 parents 6d406cd + 27bf708
commit a1486b9
Show file tree

Hide file tree

Showing 11 changed files with 197 additions and 42 deletions.
diff --git a/README.md b/README.md
@@ -26,9 +26,9 @@ scientists by providing
 # Documentation
 For more information, check out the
 
-* [Documentation](http://abcpy.readthedocs.io/en/v0.5.3) 
-* [Examples](https://github.com/eth-cscs/abcpy/tree/v0.5.3/examples) directory and
-* [Reference](http://abcpy.readthedocs.io/en/v0.5.3/abcpy.html)
+* [Documentation](http://abcpy.readthedocs.io/en/v0.5.4) 
+* [Examples](https://github.com/eth-cscs/abcpy/tree/v0.5.4/examples) directory and
+* [Reference](http://abcpy.readthedocs.io/en/v0.5.4/abcpy.html)
 
 Further, we provide a
 [collection of models](https://github.com/eth-cscs/abcpy-models) for which ABCpy
@@ -54,7 +54,7 @@ finally CSCS (Swiss National Super Computing Center) for their generous support.
 
 There is a paper in the proceedings of the 2017 PASC conference. In case you use
 ABCpy for your publication, we would appreciate a citation. You can use
-[this](https://github.com/eth-cscs/abcpy/blob/v0.5.3/doc/literature/DuttaS-ABCpy-PASC-2017.bib)
+[this](https://github.com/eth-cscs/abcpy/blob/v0.5.4/doc/literature/DuttaS-ABCpy-PASC-2017.bib)
 BibTex reference.
 
 

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-0.5.3
+0.5.4
diff --git a/abcpy/approx_lhd.py b/abcpy/approx_lhd.py
@@ -91,11 +91,10 @@ def likelihood(self, y_obs, y_sim):
         # print("DEBUG: robust_precision_sim_det computation..")
         robust_precision_sim_det = np.linalg.det(robust_precision_sim)
         # print("DEBUG: combining.")
-        result = pow(np.sqrt((1/(2*np.pi))*robust_precision_sim_det),self.stat_obs.shape[0])\
-        *np.exp(np.sum(-0.5*np.sum(np.array(self.stat_obs-mean_sim)* \
-        np.array(np.matrix(robust_precision_sim)*np.matrix(self.stat_obs-mean_sim).T).T, axis = 1)))
-
-        return result
+        tmp1 = robust_precision_sim * np.array(self.stat_obs.reshape(-1,1) - mean_sim.reshape(-1,1)).T
+        tmp2 = np.exp(np.sum(-0.5*np.sum(np.array(self.stat_obs-mean_sim) * np.array(tmp1).T, axis = 1)))
+        tmp3 = pow(np.sqrt((1/(2*np.pi)) * robust_precision_sim_det),self.stat_obs.shape[0])
+        return tmp2 * tmp3
 
 
 class PenLogReg(Approx_likelihood, GraphTools):

diff --git a/abcpy/discretemodels.py b/abcpy/discretemodels.py
@@ -302,5 +302,97 @@ def pmf(self, input_values, x):
         """
 
         pmf = poisson(int(input_values[0])).pmf(x)
+        self.calculated_pmf = pmf
+        return pmf
+
+
+
+class DiscreteUniform(Discrete, ProbabilisticModel):
+    def __init__(self, parameters, name='DiscreteUniform'):
+        """This class implements a probabilistic model following a Discrete Uniform distribution.
+
+        Parameters
+        ----------
+        parameters: list
+             A list containing two entries, the upper and lower bound of the range.
+
+        name: string
+            The name that should be given to the probabilistic model in the journal file.
+        """
+
+        if not isinstance(parameters, list):
+            raise TypeError('Input for Discrete Uniform has to be of type list.')
+        if len(parameters) != 2:
+            raise ValueError('Input for Discrete Uniform has to be of length 2.')
+
+        self._dimension = 1
+        input_parameters = InputConnector.from_list(parameters)
+        super(DiscreteUniform, self).__init__(input_parameters, name)
+        self.visited = False
+
+    def _check_input(self, input_values):
+        # Check whether input has correct type or format
+        if len(input_values) != 2:
+            raise ValueError('Number of parameters of FloorField model must be 2.')
+
+        # Check whether input is from correct domain
+        lowerbound = input_values[0]  # Lower bound
+        upperbound = input_values[1]  # Upper bound
+
+        if not isinstance(lowerbound, (int, np.int64, np.int32, np.int16)) or not isinstance(upperbound, (int, np.int64, np.int32, np.int16)) or lowerbound >= upperbound:
+            return False
+        return True
+
+    def _check_output(self, parameters):
+        """
+        Checks parameter values given as fixed values. Returns False iff it is not an integer
+        """
+        if not isinstance(parameters[0], (int, np.int32, np.int64)):
+            return False
+        return True
+
+    def forward_simulate(self, input_values, k, rng=np.random.RandomState()):
+        """
+        Samples from the Discrete Uniform distribution associated with the probabilistic model.
 
+        Parameters
+        ----------
+        input_values: list
+            List of input parameters, in the same order as specified in the InputConnector passed to the init function
+        k: integer
+            The number of samples to be drawn.
+        rng: random number generator
+            The random number generator to be used.
+
+        Returns
+        -------
+        list: [np.ndarray]
+            A list containing the sampled values as np-array.
+        """
+
+        result = np.array(rng.randint(input_values[0], input_values[1], size=k, dtype=np.int64))
+        return [np.array([x]) for x in result]
+
+    def get_output_dimension(self):
+        return self._dimension
+
+    def pmf(self, input_values, x):
+        """Evaluates the probability mass function at point x.
+
+        Parameters
+        ----------
+        input_values: list
+            List of input parameters, in the same order as specified in the InputConnector passed to the init function
+        x: float
+            The point at which the pmf should be evaluated.
+
+        Returns
+        -------
+        float:
+            The pmf evaluated at point x.
+        """
+        upperbound, lowerbound = input_values[0], input_values[1]
+        pmf = 1. / (upperbound - lowerbound + 1)
+        self.calculated_pmf = pmf
         return pmf
+
diff --git a/abcpy/distances.py b/abcpy/distances.py
@@ -3,6 +3,7 @@
 import numpy as np
 from glmnet import LogitNet
 from sklearn import linear_model
+from scipy import stats
 
 
 class Distance(metaclass = ABCMeta):
@@ -211,8 +212,6 @@ def distance(self, d1, d2):
 
     def dist_max(self):
         return 1.0
-
-
 
 
 class LogReg(Distance):
@@ -257,7 +256,7 @@ def distance(self, d1, d2):
         training_set_labels = np.concatenate((label_s1, label_s2), axis=0).ravel()
 
         reg_inv = 1e5
-        log_reg_model = linear_model.LogisticRegression(C=reg_inv, penalty='l1')
+        log_reg_model = linear_model.LogisticRegression(C=reg_inv, penalty='l1', max_iter=1000, solver='liblinear')
         log_reg_model.fit(training_set_features, training_set_labels)
         score = log_reg_model.score(training_set_features, training_set_labels)
         distance = 2.0 * (score - 0.5)

diff --git a/abcpy/inferences.py b/abcpy/inferences.py
@@ -811,19 +811,17 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
                         break
             # 2: calculate approximate lieklihood for new parameters
             self.logger.info("Calculate approximate likelihood")
-            new_parameters_pds = self.backend.parallelize(new_parameters)
-            approx_likelihood_new_parameters_and_counter_pds = self.backend.map(self._approx_lik_calc, new_parameters_pds)
-            self.logger.debug("collect approximate likelihood from pds")
-            approx_likelihood_new_parameters_and_counter = self.backend.collect(approx_likelihood_new_parameters_and_counter_pds)
-            approx_likelihood_new_parameters, counter = [list(t) for t in zip(*approx_likelihood_new_parameters_and_counter)]
-
-            approx_likelihood_new_parameters = np.array(approx_likelihood_new_parameters).reshape(-1,1)
+            merged_sim_data_parameter = self.flat_map(new_parameters, self.n_samples_per_param, self._simulate_data)
+            # Compute likelihood for each parameter value
+            approx_likelihood_new_parameters, counter = self.simple_map(merged_sim_data_parameter, self._approx_calc)
+            approx_likelihood_new_parameters = np.array(approx_likelihood_new_parameters).reshape(-1, 1)
 
             for count in counter:
                 self.simulation_counter+=count
 
             # 3: calculate new weights for new parameters
             self.logger.info("Calculating weights")
+            new_parameters_pds = self.backend.parallelize(new_parameters)
             new_weights_pds = self.backend.map(self._calculate_weight, new_parameters_pds)
             new_weights = np.array(self.backend.collect(new_weights_pds)).reshape(-1, 1)
 
@@ -874,38 +872,75 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
 
         return journal
 
+    ## Simple_map and Flat_map: Python wrapper for nested parallelization
+    def simple_map(self, data, map_function):
+        data_pds = self.backend.parallelize(data)
+        result_pds = self.backend.map(map_function, data_pds)
+        result = self.backend.collect(result_pds)
+        main_result, counter = [list(t) for t in zip(*result)]
+        return main_result, counter
+    def flat_map(self, data, n_repeat, map_function):
+        repeated_data = np.repeat(data, n_repeat, axis=0)
+        repeated_data_pds = self.backend.parallelize(repeated_data)
+        repeated_data__result_pds = self.backend.map(map_function, repeated_data_pds)
+        repeated_data_result = self.backend.collect(repeated_data__result_pds)
+        repeated_data, result = [list(t) for t in zip(*repeated_data_result)]
+        merged_result_data = []
+        for ind in range(0, data.shape[0]):
+            merged_result_data.append([[[result[np.int(i)][0][0] \
+                                         for i in
+                                         np.where(np.mean(repeated_data == data[ind, :], axis=1) == 1)[0]]],
+                                       data[ind, :]])
+        return merged_result_data
+
     # define helper functions for map step
-    def _approx_lik_calc(self, theta):
+    def _simulate_data(self, theta):
         """
-        Compute likelihood for new parameters using approximate likelihood function
-
+        Simulate n_sample_per_param many datasets for new parameter
         Parameters
         ----------
         theta: numpy.ndarray
             1xp matrix containing the model parameters, where p is the number of parameters
+        Returns
+        -------
+        (theta, sim_data)
+            tehta and simulate data
+        """
+
+        # Simulate the fake data from the model given the parameter value theta
+        # print("DEBUG: Simulate model for parameter " + str(theta))
+        self.set_parameters(theta)
+        y_sim = self.simulate(1, self.rng)
+
+        return (theta, y_sim)
 
+    def _approx_calc(self, sim_data_parameter):
+        """
+        Compute likelihood for new parameters using approximate likelihood function
+        Parameters
+        ----------
+        sim_data_parameter: list
+            First element is the parameter and the second element is the simulated data
         Returns
         -------
         float
             The approximated likelihood function
         """
+        # Extract data and parameter
+        y_sim, theta = sim_data_parameter[0], sim_data_parameter[1]
 
-        # Simulate the fake data from the model given the parameter value theta
-        # print("DEBUG: Simulate model for parameter " + str(theta))
-        y_sim = self.simulate(self.n_samples_per_param, self.rng)
         # print("DEBUG: Extracting observation.")
         obs = self.accepted_parameters_manager.observations_bds.value()
         # print("DEBUG: Computing likelihood...")
 
-
         total_pdf_at_theta = 1.
 
         lhd = self.likfun.likelihood(obs, y_sim)
 
         # print("DEBUG: Likelihood is :" + str(lhd))
         pdf_at_theta = self.pdf_of_prior(self.model, theta)
 
-        total_pdf_at_theta*=(pdf_at_theta*lhd)
+        total_pdf_at_theta *= (pdf_at_theta * lhd)
 
         # print("DEBUG: prior pdf evaluated at theta is :" + str(pdf_at_theta))
         return (total_pdf_at_theta, 1)
@@ -1553,7 +1588,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
             seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=int(n_samples / temp_chain_length),
                                         dtype=np.uint32)
             rng_arr = np.array([np.random.RandomState(seed) for seed in seed_arr])
-            index_arr = np.linspace(0, n_samples / temp_chain_length - 1, n_samples / temp_chain_length).astype(
+            index_arr = np.linspace(0, n_samples // temp_chain_length - 1, n_samples // temp_chain_length).astype(
                 int).reshape(int(n_samples / temp_chain_length), )
             rng_and_index_arr = np.column_stack((rng_arr, index_arr))
             rng_and_index_pds = self.backend.parallelize(rng_and_index_arr)

diff --git a/doc/source/DEVELOP.rst b/doc/source/DEVELOP.rst
@@ -15,15 +15,16 @@ new version `M.m.b':
 1. Create a release branch `release-M.m.b`
 2. Adapt `VERSION` file in the repos root directory: `echo M.m.b > VERSION`
 3. Adapt `README.md` file: adapt links to correct version of `User Documentation` and `Reference`
-4. Merge all desired feature branches into the release branch
-5. Create a pull/ merge request: release branch -> master
+4. Adapt `doc/source/DEVELOP.rst` file: to install correct version of ABCpy
+5. Merge all desired feature branches into the release branch
+6. Create a pull/ merge request: release branch -> master
 
 After a successful merge:
 
-5. Create tag vM.m.b (`git tag vM.m.b`)
-6. Retag tag `stable` to the current version
-7. Push the tag (`git push --tags`)
-8. Create a release in GitHub
+7. Create tag vM.m.b (`git tag vM.m.b`)
+8. Retag tag `stable` to the current version
+9. Push the tag (`git push --tags`)
+10. Create a release in GitHub
 
 The new tag on master will signal Travis to deploy a new package to Pypi while
 the GitHub release is just for user documentation.
diff --git a/doc/source/installation.rst b/doc/source/installation.rst
@@ -34,7 +34,7 @@ To create a package and install it, do
 ::
 
    make package
-   pip3 install build/dist/abcpy-0.5.1-py3-none-any.whl
+   pip3 install build/dist/abcpy-0.5.4-py3-none-any.whl
 
 Note that ABCpy requires Python3.
 

diff --git a/tests/discretemodels_tests.py b/tests/discretemodels_tests.py
@@ -18,6 +18,10 @@ class PoissonAPITests(AbstractAPIImplementationTests, unittest.TestCase):
     model_types = [Poisson]
     model_inputs = [[3]]
 
+class DiscreteUniformTests(AbstractAPIImplementationTests, unittest.TestCase):
+    model_types = [DiscreteUniform]
+    model_inputs = [[10, 20]]
+
 class CheckParametersAtInitializationTests(unittest.TestCase):
     """Tests that no probabilistic model with invalid parameters can be initialized."""
 
@@ -45,6 +49,14 @@ def test_Poisson(self):
         with self.assertRaises(ValueError):
             Poisson([2, 3])
 
+    def test_DiscreteUniform(self):
+        with self.assertRaises(TypeError):
+            DiscreteUniform(np.array([1, 2, 3]))
+
+        with self.assertRaises(ValueError):
+            DiscreteUniform([2, 3, 4])
+
+
 class DimensionTests(unittest.TestCase):
     """Tests whether the dimensions of all discrete models are defined in the correct way."""
 
@@ -60,6 +72,10 @@ def test_Poisson(self):
         Po = Poisson([3])
         self.assertTrue(Po.get_output_dimension()==1)
 
+    def test_DiscreteUniform(self):
+        Du = DiscreteUniform([10, 20])
+        self.assertTrue(Du.get_output_dimension()==1)
+
 
 class SampleFromDistributionTests(unittest.TestCase):
     """Tests the return value of forward_simulate for all discrete distributions."""
@@ -81,6 +97,13 @@ def test_Poisson(self):
         self.assertTrue(isinstance(samples, list))
         self.assertTrue(len(samples) == 3)
 
+    def test_DiscreteUniform(self):
+        Du = DiscreteUniform([10, 20])
+        samples = Du.forward_simulate(Du.get_input_values(), 3)
+        self.assertTrue(isinstance(samples, list))
+        self.assertTrue(len(samples) == 3)
+
+
 class CheckParametersBeforeSamplingTests(unittest.TestCase):
     """Tests whether False will be returned if the input parameters of _check_parameters_before_sampling are not
     accepted."""
@@ -104,5 +127,12 @@ def test_Poisson(self):
         self.assertFalse(Po._check_input([3, 5]))
         self.assertFalse(Po._check_input([-1]))
 
+    def test_DiscreteUniform(self):
+        Du = DiscreteUniform([10, 20])
+        self.assertFalse(Du._check_input([3.0, 5]))
+        self.assertFalse(Du._check_input([2, 6.0]))
+        self.assertFalse(Du._check_input([5, 2]))
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/distances_tests.py b/tests/distances_tests.py
@@ -78,8 +78,7 @@ def test_distance(self):
         self.assertEqual(self.distancefunc.distance(d1,d1), 0.0)
 
     def test_dist_max(self):
-        self.assertTrue(self.distancefunc.dist_max() == 1.0)        
-
+        self.assertTrue(self.distancefunc.dist_max() == 1.0)
 
 if __name__ == '__main__':
     unittest.main()