Merge pull request #90 from JannisHoch/dev

v0.0.4
JannisHoch · Sep 18, 2020 · ccd5e09 · ccd5e09
2 parents 0b176f9 + 8536cf4
commit ccd5e09
Show file tree

Hide file tree

Showing 103 changed files with 14,423 additions and 196,898 deletions.
diff --git a/.gitignore b/.gitignore
@@ -132,4 +132,13 @@ dmypy.json
 OUT*/
 
 #latest UCDP file too large for commit
-*ged201*
+*ged201*
+
+# sphinx
+generated*
+
+# article related stuff
+_paper/*
+
+# convertd htmls
+example/*.html
diff --git a/.STALL.travis.yml → .travis.yml b/.STALL.travis.yml → .travis.yml
@@ -2,7 +2,6 @@
 
 language: python
 python:
-  - 3.8
   - 3.7
   - 3.6
 

diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -1,3 +1,4 @@
 {
-    "restructuredtext.confPath": "${workspaceFolder}\\docs"
+    "restructuredtext.confPath": "${workspaceFolder}\\docs",
+    "python.pythonPath": "C:\\Users\\Icke\\anaconda3\\envs\\conflict_model\\python.exe"
 }
diff --git a/HISTORY.rst b/HISTORY.rst
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2020 Jannis M. Hoch
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.rst b/README.rst
@@ -2,18 +2,30 @@
 Overview
 ===============
 
-conflict_model
+The conflict_model
 ----------------
-(Machine learning) model for mapping environmental drivers of conflict risk
+(Machine learning) model for mapping environmental drivers of conflict risk.
 
 .. image:: https://travis-ci.com/JannisHoch/conflict_model.svg?token=BnX1oxxHRbyd1dPyXAp2&branch=dev
     :target: https://travis-ci.com/JannisHoch/conflict_model
 
+.. image:: https://img.shields.io/badge/License-MIT-blue.svg
+    :target: https://github.com/JannisHoch/conflict_model/blob/dev/LICENSE
 
-installation
+.. image:: https://readthedocs.org/projects/conflict-model/badge/?version=dev
+    :target: https://conflict-model.readthedocs.io/en/dev/?badge=dev
+
+.. image:: https://img.shields.io/github/v/release/JannisHoch/conflict_model
+    :target: https://github.com/JannisHoch/conflict_model/releases/tag/v0.0.3
+
+.. image:: https://badges.frapsoft.com/os/v2/open-source.svg?v=103
+    :target: https://github.com/ellerbrock/open-source-badges/
+
+Installation
 ----------------
 
-To install the conflict model, first clone the code from GitHub. It is advised to create an individual python environment first. Then go to the model folder and install the model.
+To install the conflict model, first clone the code from GitHub. It is advised to create an individual python environment first. 
+You can then install the model package into this environment.
 
 .. code-block:: console
 
@@ -23,10 +35,10 @@ To install the conflict model, first clone the code from GitHub. It is advised t
     $ conda activate conflict_model
     $ python setup.py develop
 
-execution
+Execution
 ----------------
 
-example notebook
+Example notebook
 ^^^^^^^^^^^^^^^^^^
 
 To run the example jupyter notebook, follow these instructions
@@ -38,43 +50,26 @@ To run the example jupyter notebook, follow these instructions
 
 This automatically executes the notebook and converts it to a html-file, also stored in the example folder.
 
-.. note:: It is of course also possible to execute the notebook cell by cell using jupyter notebook
+It is of course also possible to execute the notebook cell by cell using jupyter notebook.
 
-with runner script
+Runner script
 ^^^^^^^^^^^^^^^^^^
 
-To run the model from command line, a command line script is provided. In the most basic version, all data is taken from the settings-file.
+To run the model from command line, a command line script is provided. 
+All data and settings are retrieved from the settings-file which needs to be provided as inline argument.
 
 .. code-block:: console
 
     $ cd path/to/conflict_model/scripts
     $ python runner.py path/to/conflict_model/data/run_setting.cfg
 
-If output is to be stored in an output map, this currently needs to be specified in the runner scipt explictely (-so option).
-By default, output is stored to the output directory specified in the settings-file. Alternatively, this can be provided via command line too (-o option).
-For some minimal verbose output, please specify this using the -v option.
-
-.. code-block:: console
-
-    $ python runner.py -o path/to/output/folder path/to/conflict_model/data/run_setting.cfg -so -v
-
-.. note:: for convenience, there is a 'run_script.sh' file executing just this command.
+By default, output is stored to the output directory specified in the settings-file. 
 
-For help, try this if you are in the scripts folder:
-
-.. code-block:: console
-
-    $ python runner.py --help
-
-authors
+Authors
 ----------------
 
 * Jannis M. Hoch (Utrecht University)
 * Sophie de Bruin (Utrecht University, PBL)
 * Niko Wanders (Utrecht University)
 
-corrosponding author: Jannis M. Hoch (j.m.hoch@uu.nl)
-
-license
-----------------
-tba
+Corrosponding author: Jannis M. Hoch (j.m.hoch@uu.nl)
diff --git a/conflict_model/__init__.py b/conflict_model/__init__.py
@@ -1,4 +1,4 @@
-"""Top-level package for pcrglobwb_utils."""
+"""Top-level package."""
 
 from . import selection
 from . import utils
@@ -13,4 +13,4 @@
 
 __author__ = """Jannis M. Hoch, Niko Wanders, Sophie de Bruin"""
 __email__ = 'j.m.hoch@uu.nl'
-__version__ = '0.0.3'
+__version__ = '0.0.4'
diff --git a/conflict_model/conflict.py b/conflict_model/conflict.py
@@ -51,17 +51,17 @@ def conflict_in_year_bool(conflict_gdf, extent_gdf, config, sim_year):
     return list_out
 
 def get_poly_ID(extent_gdf): 
-    """Extracts geometry information for each polygon from geodataframe and saves in list.
+    """Extracts and returns a list with unique identifiers for each polygon used in the model. The identifiers are currently limited to 'name' or 'watprovID'.
 
     Args:
-        extent_gdf ([type]): [description]
+        extent_gdf (geo-dataframe): geo-dataframe containing one or more polygons.
 
     Raises:
-        AssertionError: [description]
+        AssertionError: error raised if length of output list does not match length of input geo-dataframe.
 
     Returns:
-        [type]: [description]
-    """    
+        list: list containing a unique identifier extracted from geo-dataframe for each polygon used in the model.
+    """  
 
     # initiatie empty list
     list_ID = []
@@ -81,16 +81,16 @@ def get_poly_ID(extent_gdf):
     return list_ID
 
 def get_poly_geometry(extent_gdf): 
-    """Extracts geometry information for each polygon from geodataframe and saves in list.
+    """Extracts geometry information for each polygon from geodataframe and saves to list. The geometry column in geodataframe must be named 'geometry'.
 
     Args:
-        extent_gdf ([type]): [description]
+        extent_gdf (geo-dataframe): geo-dataframe containing one or more polygons with geometry information.
 
     Raises:
-        AssertionError: [description]
+        AssertionError: error raised if length of output list does not match length of input geo-dataframe.
 
     Returns:
-        [type]: [description]
+        list: list containing the geometry information extracted from geo-dataframe for each polygon used in the model.
     """    
 
     print('listing the geometry of all geographical units')
@@ -110,13 +110,13 @@ def get_poly_geometry(extent_gdf):
     return list_geometry
 
 def split_conflict_geom_data(X):
-    """[summary]
+    """Separates the unique identifier and geometry information from the variable-containing X-array.
 
     Args:
-        X ([type]): [description]
+        X (array): variable-containing X-array.
 
     Returns:
-        [type]: [description]
+        arrays: seperate arrays with ID, geometry, and  actual data 
     """    
 
     X_ID = X[:, 0]
@@ -126,25 +126,24 @@ def split_conflict_geom_data(X):
     return X_ID, X_geom, X_data
 
 def get_pred_conflict_geometry(X_test_ID, X_test_geom, y_test, y_pred):
-    """[summary]
+    """Stacks together the arrays with unique identifier, geometry, test data, and predicted data into a dataframe. 
+    Contains therefore only the data points used in the test-sample, not in the training-sample. 
+    Additionally computes whether a correct prediction was made in column 'correct_pred'.
 
     Args:
-        X_test_ID ([type]): [description]
-        X_test_geom ([type]): [description]
-        y_test ([type]): [description]
-        y_pred ([type]): [description]
+        X_test_ID (list): list containing the unique identifier per data point.
+        X_test_geom (list): list containing the geometry per data point.
+        y_test (list): list containing test-data.
+        y_pred (list): list containing predictions.
 
     Returns:
-        [type]: [description]
+        dataframe: dataframe with each input list as column plus computed 'correct_pred'.
     """   
 
     arr = np.column_stack((X_test_ID, X_test_geom, y_test, y_pred))
 
     df = pd.DataFrame(arr, columns=['ID', 'geometry', 'y_test', 'y_pred'])
 
-    #TODO: think this through properly
-    # df['conflict_hit'] = np.where((df['y_test'] == 1) & (df['y_pred'] ==1), 1, np.nan)
-
-    df['overall_hit'] = np.where(df['y_test'] == df['y_pred'], 1, 0)
+    df['correct_pred'] = np.where(df['y_test'] == df['y_pred'], 1, 0)
 
     return df
diff --git a/conflict_model/data.py b/conflict_model/data.py
@@ -6,14 +6,16 @@
 
 
 def initiate_XY_data(config):
-    """[summary]
+    """Initiates an empty dictionary to contain the XY-data for each polygon. 
+    By default, the first column is for the polygon ID, the second for polygon geometry, and the last for binary conflict data (i.e. the Y-data).
+    Every column in between corresponds to the variables providing in the cfg-file (i.e. the X-data).
 
     Args:
-        config ([type]): [description]
+        config (ConfigParser-object): object containing the parsed configuration-settings of the model.
 
     Returns:
-        [type]: [description]
-    """    
+        dict: emtpy dictionary containing the variable values (X) and binary conflict data (Y) plus meta-data.
+    """
 
     XY = {}
     XY['poly_ID'] = pd.Series()
@@ -26,20 +28,22 @@ def initiate_XY_data(config):
 
     return XY
 
-def fill_XY(XY, config, conflict_gdf, extent_active_polys_gdf):
-    """[summary]
+def fill_XY(XY, config, conflict_gdf, polygon_gdf):
+    """Fills the XY-dictionary with data for each variable and conflict for each polygon for each simulation year. 
+    The number of rows should therefore equal to number simulation years times number of polygons.
+    At end of last simulation year, the dictionary is converted to a numpy-array.
 
     Args:
-        XY ([type]): [description]
-        config ([type]): [description]
-        conflict_gdf ([type]): [description]
-        extent_active_polys_gdf ([type]): [description]
+        XY (dict): initiated, i.e. empty, XY-dictionary
+        config (ConfigParser-object): object containing the parsed configuration-settings of the model.
+        conflict_gdf (geo-dataframe): geo-dataframe containing the selected conflicts.
+        polygon_gdf (geo-dataframe): geo-dataframe containing the selected polygons.
 
     Raises:
-        Warning: [description]
+        Warning: a warning is raised if the datetime-format of the netCDF-file does not match conventions and/or supported formats.
 
     Returns:
-        array: [description]
+        array: filled array containing the variable values (X) and binary conflict data (Y) plus meta-data.
     """    
 
     if config.getboolean('general', 'verbose'): print('reading data for period from', str(config.getint('settings', 'y_start')), 'to', str(config.getint('settings', 'y_end')) + os.linesep)
@@ -55,21 +59,21 @@ def fill_XY(XY, config, conflict_gdf, extent_active_polys_gdf):
             if key == 'conflict':
 
                 data_series = value
-                data_list = conflict.conflict_in_year_bool(conflict_gdf, extent_active_polys_gdf, config, sim_year)
+                data_list = conflict.conflict_in_year_bool(conflict_gdf, polygon_gdf, config, sim_year)
                 data_series = data_series.append(pd.Series(data_list), ignore_index=True)
                 XY[key] = data_series
 
             elif key == 'poly_ID':
 
                 data_series = value
-                data_list = conflict.get_poly_ID(extent_active_polys_gdf)
+                data_list = conflict.get_poly_ID(polygon_gdf)
                 data_series = data_series.append(pd.Series(data_list), ignore_index=True)
                 XY[key] = data_series
 
             elif key == 'poly_geometry':
 
                 data_series = value
-                data_list = conflict.get_poly_geometry(extent_active_polys_gdf)
+                data_list = conflict.get_poly_geometry(polygon_gdf)
                 data_series = data_series.append(pd.Series(data_list), ignore_index=True)
                 XY[key] = data_series
 
@@ -79,13 +83,13 @@ def fill_XY(XY, config, conflict_gdf, extent_active_polys_gdf):
 
                 if (np.dtype(nc_ds.time) == np.float32) or (np.dtype(nc_ds.time) == np.float64):
                     data_series = value
-                    data_list = variables.nc_with_float_timestamp(extent_active_polys_gdf, config, key, sim_year)
+                    data_list = variables.nc_with_float_timestamp(polygon_gdf, config, key, sim_year)
                     data_series = data_series.append(pd.Series(data_list), ignore_index=True)
                     XY[key] = data_series
 
                 elif np.dtype(nc_ds.time) == 'datetime64[ns]':
                     data_series = value
-                    data_list = variables.nc_with_continous_datetime_timestamp(extent_active_polys_gdf, config, key, sim_year)
+                    data_list = variables.nc_with_continous_datetime_timestamp(polygon_gdf, config, key, sim_year)
                     data_series = data_series.append(pd.Series(data_list), ignore_index=True)
                     XY[key] = data_series
 
@@ -97,13 +101,14 @@ def fill_XY(XY, config, conflict_gdf, extent_active_polys_gdf):
     return pd.DataFrame.from_dict(XY).to_numpy()
 
 def split_XY_data(XY, config):
-    """[summary]
+    """Separates the XY-array into array containing information about variable values (X-array) and conflict data (Y-array).
+    Thereby, the X-array also contains the information about unique identifier and polygon geometry.
 
     Args:
-        XY (array): [description]
+        XY (array): array containing variable values and conflict data
 
     Returns:
-        [type]: [description]
+        arrays: two separate arrays, the X-array and Y-array
     """    
 
     XY = pd.DataFrame(XY)