diff --git a/README.md b/README.md index ec75eca6..df39d5a6 100644 --- a/README.md +++ b/README.md @@ -26,9 +26,9 @@ scientists by providing # Documentation For more information, check out the -* [Documentation](http://abcpy.readthedocs.io/en/v0.5.3) -* [Examples](https://github.com/eth-cscs/abcpy/tree/v0.5.3/examples) directory and -* [Reference](http://abcpy.readthedocs.io/en/v0.5.3/abcpy.html) +* [Documentation](http://abcpy.readthedocs.io/en/v0.5.4) +* [Examples](https://github.com/eth-cscs/abcpy/tree/v0.5.4/examples) directory and +* [Reference](http://abcpy.readthedocs.io/en/v0.5.4/abcpy.html) Further, we provide a [collection of models](https://github.com/eth-cscs/abcpy-models) for which ABCpy @@ -54,7 +54,7 @@ finally CSCS (Swiss National Super Computing Center) for their generous support. There is a paper in the proceedings of the 2017 PASC conference. In case you use ABCpy for your publication, we would appreciate a citation. You can use -[this](https://github.com/eth-cscs/abcpy/blob/v0.5.3/doc/literature/DuttaS-ABCpy-PASC-2017.bib) +[this](https://github.com/eth-cscs/abcpy/blob/v0.5.4/doc/literature/DuttaS-ABCpy-PASC-2017.bib) BibTex reference. diff --git a/VERSION b/VERSION index be14282b..7d856835 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.5.3 +0.5.4 diff --git a/abcpy/approx_lhd.py b/abcpy/approx_lhd.py index 46a000fa..e63d6100 100644 --- a/abcpy/approx_lhd.py +++ b/abcpy/approx_lhd.py @@ -91,11 +91,10 @@ def likelihood(self, y_obs, y_sim): # print("DEBUG: robust_precision_sim_det computation..") robust_precision_sim_det = np.linalg.det(robust_precision_sim) # print("DEBUG: combining.") - result = pow(np.sqrt((1/(2*np.pi))*robust_precision_sim_det),self.stat_obs.shape[0])\ - *np.exp(np.sum(-0.5*np.sum(np.array(self.stat_obs-mean_sim)* \ - np.array(np.matrix(robust_precision_sim)*np.matrix(self.stat_obs-mean_sim).T).T, axis = 1))) - - return result + tmp1 = robust_precision_sim * np.array(self.stat_obs.reshape(-1,1) - mean_sim.reshape(-1,1)).T + tmp2 = np.exp(np.sum(-0.5*np.sum(np.array(self.stat_obs-mean_sim) * np.array(tmp1).T, axis = 1))) + tmp3 = pow(np.sqrt((1/(2*np.pi)) * robust_precision_sim_det),self.stat_obs.shape[0]) + return tmp2 * tmp3 class PenLogReg(Approx_likelihood, GraphTools): diff --git a/abcpy/discretemodels.py b/abcpy/discretemodels.py index 81c12cf4..41431ad4 100644 --- a/abcpy/discretemodels.py +++ b/abcpy/discretemodels.py @@ -302,5 +302,97 @@ def pmf(self, input_values, x): """ pmf = poisson(int(input_values[0])).pmf(x) + self.calculated_pmf = pmf + return pmf + + + +class DiscreteUniform(Discrete, ProbabilisticModel): + def __init__(self, parameters, name='DiscreteUniform'): + """This class implements a probabilistic model following a Discrete Uniform distribution. + + Parameters + ---------- + parameters: list + A list containing two entries, the upper and lower bound of the range. + + name: string + The name that should be given to the probabilistic model in the journal file. + """ + + if not isinstance(parameters, list): + raise TypeError('Input for Discrete Uniform has to be of type list.') + if len(parameters) != 2: + raise ValueError('Input for Discrete Uniform has to be of length 2.') + + self._dimension = 1 + input_parameters = InputConnector.from_list(parameters) + super(DiscreteUniform, self).__init__(input_parameters, name) + self.visited = False + + def _check_input(self, input_values): + # Check whether input has correct type or format + if len(input_values) != 2: + raise ValueError('Number of parameters of FloorField model must be 2.') + + # Check whether input is from correct domain + lowerbound = input_values[0] # Lower bound + upperbound = input_values[1] # Upper bound + + if not isinstance(lowerbound, (int, np.int64, np.int32, np.int16)) or not isinstance(upperbound, (int, np.int64, np.int32, np.int16)) or lowerbound >= upperbound: + return False + return True + + def _check_output(self, parameters): + """ + Checks parameter values given as fixed values. Returns False iff it is not an integer + """ + if not isinstance(parameters[0], (int, np.int32, np.int64)): + return False + return True + + def forward_simulate(self, input_values, k, rng=np.random.RandomState()): + """ + Samples from the Discrete Uniform distribution associated with the probabilistic model. + Parameters + ---------- + input_values: list + List of input parameters, in the same order as specified in the InputConnector passed to the init function + k: integer + The number of samples to be drawn. + rng: random number generator + The random number generator to be used. + + Returns + ------- + list: [np.ndarray] + A list containing the sampled values as np-array. + """ + + result = np.array(rng.randint(input_values[0], input_values[1], size=k, dtype=np.int64)) + return [np.array([x]) for x in result] + + def get_output_dimension(self): + return self._dimension + + def pmf(self, input_values, x): + """Evaluates the probability mass function at point x. + + Parameters + ---------- + input_values: list + List of input parameters, in the same order as specified in the InputConnector passed to the init function + x: float + The point at which the pmf should be evaluated. + + Returns + ------- + float: + The pmf evaluated at point x. + """ + upperbound, lowerbound = input_values[0], input_values[1] + pmf = 1. / (upperbound - lowerbound + 1) + self.calculated_pmf = pmf return pmf + diff --git a/abcpy/distances.py b/abcpy/distances.py index 8c2df7ed..73ca3bfa 100644 --- a/abcpy/distances.py +++ b/abcpy/distances.py @@ -3,6 +3,7 @@ import numpy as np from glmnet import LogitNet from sklearn import linear_model +from scipy import stats class Distance(metaclass = ABCMeta): @@ -211,8 +212,6 @@ def distance(self, d1, d2): def dist_max(self): return 1.0 - - class LogReg(Distance): @@ -257,7 +256,7 @@ def distance(self, d1, d2): training_set_labels = np.concatenate((label_s1, label_s2), axis=0).ravel() reg_inv = 1e5 - log_reg_model = linear_model.LogisticRegression(C=reg_inv, penalty='l1') + log_reg_model = linear_model.LogisticRegression(C=reg_inv, penalty='l1', max_iter=1000, solver='liblinear') log_reg_model.fit(training_set_features, training_set_labels) score = log_reg_model.score(training_set_features, training_set_labels) distance = 2.0 * (score - 0.5) diff --git a/abcpy/inferences.py b/abcpy/inferences.py index 0f54a3fc..28c8dbb8 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -811,19 +811,17 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 break # 2: calculate approximate lieklihood for new parameters self.logger.info("Calculate approximate likelihood") - new_parameters_pds = self.backend.parallelize(new_parameters) - approx_likelihood_new_parameters_and_counter_pds = self.backend.map(self._approx_lik_calc, new_parameters_pds) - self.logger.debug("collect approximate likelihood from pds") - approx_likelihood_new_parameters_and_counter = self.backend.collect(approx_likelihood_new_parameters_and_counter_pds) - approx_likelihood_new_parameters, counter = [list(t) for t in zip(*approx_likelihood_new_parameters_and_counter)] - - approx_likelihood_new_parameters = np.array(approx_likelihood_new_parameters).reshape(-1,1) + merged_sim_data_parameter = self.flat_map(new_parameters, self.n_samples_per_param, self._simulate_data) + # Compute likelihood for each parameter value + approx_likelihood_new_parameters, counter = self.simple_map(merged_sim_data_parameter, self._approx_calc) + approx_likelihood_new_parameters = np.array(approx_likelihood_new_parameters).reshape(-1, 1) for count in counter: self.simulation_counter+=count # 3: calculate new weights for new parameters self.logger.info("Calculating weights") + new_parameters_pds = self.backend.parallelize(new_parameters) new_weights_pds = self.backend.map(self._calculate_weight, new_parameters_pds) new_weights = np.array(self.backend.collect(new_weights_pds)).reshape(-1, 1) @@ -874,30 +872,67 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 return journal + ## Simple_map and Flat_map: Python wrapper for nested parallelization + def simple_map(self, data, map_function): + data_pds = self.backend.parallelize(data) + result_pds = self.backend.map(map_function, data_pds) + result = self.backend.collect(result_pds) + main_result, counter = [list(t) for t in zip(*result)] + return main_result, counter + def flat_map(self, data, n_repeat, map_function): + repeated_data = np.repeat(data, n_repeat, axis=0) + repeated_data_pds = self.backend.parallelize(repeated_data) + repeated_data__result_pds = self.backend.map(map_function, repeated_data_pds) + repeated_data_result = self.backend.collect(repeated_data__result_pds) + repeated_data, result = [list(t) for t in zip(*repeated_data_result)] + merged_result_data = [] + for ind in range(0, data.shape[0]): + merged_result_data.append([[[result[np.int(i)][0][0] \ + for i in + np.where(np.mean(repeated_data == data[ind, :], axis=1) == 1)[0]]], + data[ind, :]]) + return merged_result_data + # define helper functions for map step - def _approx_lik_calc(self, theta): + def _simulate_data(self, theta): """ - Compute likelihood for new parameters using approximate likelihood function - + Simulate n_sample_per_param many datasets for new parameter Parameters ---------- theta: numpy.ndarray 1xp matrix containing the model parameters, where p is the number of parameters + Returns + ------- + (theta, sim_data) + tehta and simulate data + """ + + # Simulate the fake data from the model given the parameter value theta + # print("DEBUG: Simulate model for parameter " + str(theta)) + self.set_parameters(theta) + y_sim = self.simulate(1, self.rng) + + return (theta, y_sim) + def _approx_calc(self, sim_data_parameter): + """ + Compute likelihood for new parameters using approximate likelihood function + Parameters + ---------- + sim_data_parameter: list + First element is the parameter and the second element is the simulated data Returns ------- float The approximated likelihood function """ + # Extract data and parameter + y_sim, theta = sim_data_parameter[0], sim_data_parameter[1] - # Simulate the fake data from the model given the parameter value theta - # print("DEBUG: Simulate model for parameter " + str(theta)) - y_sim = self.simulate(self.n_samples_per_param, self.rng) # print("DEBUG: Extracting observation.") obs = self.accepted_parameters_manager.observations_bds.value() # print("DEBUG: Computing likelihood...") - total_pdf_at_theta = 1. lhd = self.likfun.likelihood(obs, y_sim) @@ -905,7 +940,7 @@ def _approx_lik_calc(self, theta): # print("DEBUG: Likelihood is :" + str(lhd)) pdf_at_theta = self.pdf_of_prior(self.model, theta) - total_pdf_at_theta*=(pdf_at_theta*lhd) + total_pdf_at_theta *= (pdf_at_theta * lhd) # print("DEBUG: prior pdf evaluated at theta is :" + str(pdf_at_theta)) return (total_pdf_at_theta, 1) @@ -1553,7 +1588,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=int(n_samples / temp_chain_length), dtype=np.uint32) rng_arr = np.array([np.random.RandomState(seed) for seed in seed_arr]) - index_arr = np.linspace(0, n_samples / temp_chain_length - 1, n_samples / temp_chain_length).astype( + index_arr = np.linspace(0, n_samples // temp_chain_length - 1, n_samples // temp_chain_length).astype( int).reshape(int(n_samples / temp_chain_length), ) rng_and_index_arr = np.column_stack((rng_arr, index_arr)) rng_and_index_pds = self.backend.parallelize(rng_and_index_arr) diff --git a/doc/source/DEVELOP.rst b/doc/source/DEVELOP.rst index 03d7ead1..57afac1a 100644 --- a/doc/source/DEVELOP.rst +++ b/doc/source/DEVELOP.rst @@ -15,15 +15,16 @@ new version `M.m.b': 1. Create a release branch `release-M.m.b` 2. Adapt `VERSION` file in the repos root directory: `echo M.m.b > VERSION` 3. Adapt `README.md` file: adapt links to correct version of `User Documentation` and `Reference` -4. Merge all desired feature branches into the release branch -5. Create a pull/ merge request: release branch -> master +4. Adapt `doc/source/DEVELOP.rst` file: to install correct version of ABCpy +5. Merge all desired feature branches into the release branch +6. Create a pull/ merge request: release branch -> master After a successful merge: -5. Create tag vM.m.b (`git tag vM.m.b`) -6. Retag tag `stable` to the current version -7. Push the tag (`git push --tags`) -8. Create a release in GitHub +7. Create tag vM.m.b (`git tag vM.m.b`) +8. Retag tag `stable` to the current version +9. Push the tag (`git push --tags`) +10. Create a release in GitHub The new tag on master will signal Travis to deploy a new package to Pypi while the GitHub release is just for user documentation. diff --git a/doc/source/installation.rst b/doc/source/installation.rst index 8e42e00f..2e5f0293 100644 --- a/doc/source/installation.rst +++ b/doc/source/installation.rst @@ -34,7 +34,7 @@ To create a package and install it, do :: make package - pip3 install build/dist/abcpy-0.5.1-py3-none-any.whl + pip3 install build/dist/abcpy-0.5.4-py3-none-any.whl Note that ABCpy requires Python3. diff --git a/tests/discretemodels_tests.py b/tests/discretemodels_tests.py index 0afbfca6..78082df0 100644 --- a/tests/discretemodels_tests.py +++ b/tests/discretemodels_tests.py @@ -18,6 +18,10 @@ class PoissonAPITests(AbstractAPIImplementationTests, unittest.TestCase): model_types = [Poisson] model_inputs = [[3]] +class DiscreteUniformTests(AbstractAPIImplementationTests, unittest.TestCase): + model_types = [DiscreteUniform] + model_inputs = [[10, 20]] + class CheckParametersAtInitializationTests(unittest.TestCase): """Tests that no probabilistic model with invalid parameters can be initialized.""" @@ -45,6 +49,14 @@ def test_Poisson(self): with self.assertRaises(ValueError): Poisson([2, 3]) + def test_DiscreteUniform(self): + with self.assertRaises(TypeError): + DiscreteUniform(np.array([1, 2, 3])) + + with self.assertRaises(ValueError): + DiscreteUniform([2, 3, 4]) + + class DimensionTests(unittest.TestCase): """Tests whether the dimensions of all discrete models are defined in the correct way.""" @@ -60,6 +72,10 @@ def test_Poisson(self): Po = Poisson([3]) self.assertTrue(Po.get_output_dimension()==1) + def test_DiscreteUniform(self): + Du = DiscreteUniform([10, 20]) + self.assertTrue(Du.get_output_dimension()==1) + class SampleFromDistributionTests(unittest.TestCase): """Tests the return value of forward_simulate for all discrete distributions.""" @@ -81,6 +97,13 @@ def test_Poisson(self): self.assertTrue(isinstance(samples, list)) self.assertTrue(len(samples) == 3) + def test_DiscreteUniform(self): + Du = DiscreteUniform([10, 20]) + samples = Du.forward_simulate(Du.get_input_values(), 3) + self.assertTrue(isinstance(samples, list)) + self.assertTrue(len(samples) == 3) + + class CheckParametersBeforeSamplingTests(unittest.TestCase): """Tests whether False will be returned if the input parameters of _check_parameters_before_sampling are not accepted.""" @@ -104,5 +127,12 @@ def test_Poisson(self): self.assertFalse(Po._check_input([3, 5])) self.assertFalse(Po._check_input([-1])) + def test_DiscreteUniform(self): + Du = DiscreteUniform([10, 20]) + self.assertFalse(Du._check_input([3.0, 5])) + self.assertFalse(Du._check_input([2, 6.0])) + self.assertFalse(Du._check_input([5, 2])) + + if __name__ == '__main__': unittest.main() diff --git a/tests/distances_tests.py b/tests/distances_tests.py index 37a43e55..ab3ed11d 100644 --- a/tests/distances_tests.py +++ b/tests/distances_tests.py @@ -78,8 +78,7 @@ def test_distance(self): self.assertEqual(self.distancefunc.distance(d1,d1), 0.0) def test_dist_max(self): - self.assertTrue(self.distancefunc.dist_max() == 1.0) - + self.assertTrue(self.distancefunc.dist_max() == 1.0) if __name__ == '__main__': unittest.main() diff --git a/tests/inferences_tests.py b/tests/inferences_tests.py index 3516e1f8..4308324d 100644 --- a/tests/inferences_tests.py +++ b/tests/inferences_tests.py @@ -91,8 +91,8 @@ def test_sample(self): self.assertEqual(mu_sample_shape, (10,1)) self.assertEqual(sigma_sample_shape, (10,1)) self.assertEqual(weights_sample_shape, (10,1)) - self.assertLess(abs(mu_post_mean - (-3.55548377)), 1e-3) - self.assertLess(abs(sigma_post_mean - 5.87147492), 1e-3) + self.assertLess(abs(mu_post_mean - (-3.402868)), 1e-3) + self.assertLess(abs(sigma_post_mean - 6.212), 1e-3) self.assertFalse(journal.number_of_simulations == 0) @@ -111,8 +111,8 @@ def test_sample(self): self.assertEqual(mu_sample_shape, (10,1)) self.assertEqual(sigma_sample_shape, (10,1)) self.assertEqual(weights_sample_shape, (10,1)) - self.assertLess(abs(mu_post_mean - (-3.88512394) ), 1e-3) - self.assertLess(abs(sigma_post_mean - 2.90352432), 1e-3) + self.assertLess(abs(mu_post_mean - (-3.03325763) ), 1e-3) + self.assertLess(abs(sigma_post_mean - 6.92124735), 1e-3) self.assertFalse(journal.number_of_simulations == 0)