minor edits

jeanollion · Feb 6, 2021 · 0ce2926 · 0ce2926
1 parent afc7985
commit 0ce2926
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 11 deletions.
diff --git a/README.md b/README.md
@@ -44,15 +44,11 @@ train_it = MultiChannelIterator(dataset_file_path = file_path, channel_keywords
 test_it = MultiChannelIterator(dataset_file_path = file_path, channel_keywords = ["/raw", "/labels"], group_keyword="test")
 ```
 # Image formats
-- Those iterators are using an object of class `DatasetIO` to access the data. 
-- There is currently an implementation of DatasetIO for .h5 files (`H5pyIO`), as well as dataset composed of multiple images files supported by PILLOW (`MultipleFileIO`). 
+- Those iterators are using an object of class `DatasetIO` to access the data.
+- There is currently an implementation of DatasetIO for .h5 files (`H5pyIO`), as well as dataset composed of multiple images files supported by PILLOW (`MultipleFileIO`).
 - one can also concatenate datasets from different files:
   - if a dataset is split into several files that contain the same channels: use `ConcatenateDatasetIO`
   - if a dataset contains channels in different files, use: `MultipleDatasetIO`
 
 # Demo
 See this notebook for a demo: [![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1J-UPivwyNTpyLhOMfzhfG0pIl6gDD9I5)
-
-# Generate Dataset with BACMMAN
-.h5 datasets can be generated directly from [BACMMAN software](https://github.com/jeanollion/bacmman).
-See [this tutorial](https://github.com/jeanollion/bacmman/wiki/FineTune-DistNet) for instance.
diff --git a/dataset_iterator/helpers.py b/dataset_iterator/helpers.py
@@ -63,9 +63,7 @@ def get_histogram_bins_IPR(histogram, bins, n_bins, percentiles=[25, 75], min_bi
         assert bin_range_percentiles>=0 and bin_range_percentiles<=100, "invalid percentile valud"
         p2 = 100 - bin_range_percentiles
         bin_range_percentiles = [min(p2, bin_range_percentiles), max(p2, bin_range_percentiles)]
-
     pmin, pmax = get_percentile(histogram, bins, percentiles)
-    print(pmin, pmax)
     bin_size = (pmax - pmin) / n_bins
     if min_bin_size is not None and min_bin_size>0:
         bin_size = max(min_bin_size, bin_size)
@@ -107,7 +105,6 @@ def get_mean_sd(dataset, channel_keyword, group_keyword=None, per_channel=True):
   sum_im = np.zeros(shape=(ds_size, n_channels), dtype=np.float64)
   sum2_im = np.zeros(shape=(ds_size, n_channels), dtype=np.float64)
   for i in range(ds_size):
-    #print("computing mean / sd : image: {}/{}".format(i, DS_SIZE[dataset_idx]))
     image = it[i]
     for c in range(n_channels):
       sum_im[i,c] = np.sum(image[...,c])

diff --git a/setup.py b/setup.py
@@ -5,13 +5,16 @@
 
 setuptools.setup(
     name="dataset_iterator",
-    version="0.0.1",
+    packages = ['dataset_iterator'],
+    version="0.1",
     author="Jean Ollion",
     author_email="jean.ollion@polytechnique.org",
-    description="keras data iterator for images contained in dataset files such as hdf5",
+    description="data iterator for images contained in dataset files such as hdf5 or PIL readable files. Images can be contained in several files. Based on tensorflow.keras.preprocessing.image.Iterator",
     long_description=long_description,
     long_description_content_type="text/markdown",
     url="https://github.com/jeanollion/dataset_iterator.git",
+    download_url = 'https://github.com/jeanollion/dataset_iterator/archive/v_01.tar.gz',
+    keywords = ['Iterator', 'Dataset', 'Image', 'Numpy'],
     packages=setuptools.find_packages(),
     classifiers=[
         "Programming Language :: Python :: 3",