-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #78 from JannisHoch/dev
version 0.0.3
- Loading branch information
Showing
29 changed files
with
1,843 additions
and
3,871 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -129,4 +129,7 @@ dmypy.json | |
.pyre/ | ||
|
||
#output folders | ||
OUT*/ | ||
OUT*/ | ||
|
||
#latest UCDP file too large for commit | ||
*ged201* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
import geopandas as gpd | ||
import pandas as pd | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
import os, sys | ||
|
||
def conflict_in_year_bool(conflict_gdf, extent_gdf, config, sim_year): | ||
"""Creates a list for each timestep with boolean information whether a conflict took place in a polygon or not. | ||
Args: | ||
conflict_gdf (geodataframe): geo-dataframe containing georeferenced information of conflict (tested with PRIO/UCDP data) | ||
extent_gdf (geodataframe): geo-dataframe containing one or more polygons with geometry information for which values are extracted | ||
config (config): parsed configuration settings of run | ||
sim_year (int): year for which data is extracted | ||
Raises: | ||
AssertionError: raised if the length of output list does not match length of input geo-dataframe | ||
Returns: | ||
list: list containing 0/1 per polygon depending on conflict occurence | ||
""" | ||
|
||
# select the entries which occured in this year | ||
temp_sel_year = conflict_gdf.loc[conflict_gdf.year == sim_year] | ||
|
||
# merge the dataframes with polygons and conflict information, creating a sub-set of polygons/regions | ||
data_merged = gpd.sjoin(temp_sel_year, extent_gdf) | ||
|
||
# determine the aggregated amount of fatalities in one region (e.g. water province) | ||
try: | ||
fatalities_per_watProv = data_merged['best'].groupby(data_merged['watprovID']).sum().to_frame().rename(columns={"best": 'total_fatalities'}) | ||
except: | ||
fatalities_per_watProv = data_merged['best'].groupby(data_merged['name']).sum().to_frame().rename(columns={"best": 'total_fatalities'}) | ||
|
||
# loop through all regions and check if exists in sub-set | ||
# if so, this means that there was conflict and thus assign value 1 | ||
list_out = [] | ||
for i in range(len(extent_gdf)): | ||
try: | ||
i_watProv = extent_gdf.iloc[i]['watprovID'] | ||
except: | ||
i_watProv = extent_gdf.iloc[i]['name'] | ||
if i_watProv in fatalities_per_watProv.index.values: | ||
list_out.append(1) | ||
else: | ||
list_out.append(0) | ||
|
||
if not len(extent_gdf) == len(list_out): | ||
raise AssertionError('the dataframe with polygons has a lenght {0} while the lenght of the resulting list is {1}'.format(len(extent_gdf), len(list_out))) | ||
|
||
return list_out | ||
|
||
def get_poly_ID(extent_gdf): | ||
"""Extracts geometry information for each polygon from geodataframe and saves in list. | ||
Args: | ||
extent_gdf ([type]): [description] | ||
Raises: | ||
AssertionError: [description] | ||
Returns: | ||
[type]: [description] | ||
""" | ||
|
||
# initiatie empty list | ||
list_ID = [] | ||
|
||
# loop through all polygons | ||
for i in range(len(extent_gdf)): | ||
# append geometry of each polygon to list | ||
try: | ||
list_ID.append(extent_gdf.iloc[i]['name']) | ||
except: | ||
list_ID.append(extent_gdf.iloc[i]['watprovID']) | ||
|
||
# in the end, the same number of polygons should be in geodataframe and list | ||
if not len(extent_gdf) == len(list_ID): | ||
raise AssertionError('the dataframe with polygons has a lenght {0} while the lenght of the resulting list is {1}'.format(len(extent_gdf), len(list_ID))) | ||
|
||
return list_ID | ||
|
||
def get_poly_geometry(extent_gdf): | ||
"""Extracts geometry information for each polygon from geodataframe and saves in list. | ||
Args: | ||
extent_gdf ([type]): [description] | ||
Raises: | ||
AssertionError: [description] | ||
Returns: | ||
[type]: [description] | ||
""" | ||
|
||
print('listing the geometry of all geographical units') | ||
|
||
# initiatie empty list | ||
list_geometry = [] | ||
|
||
# loop through all polygons | ||
for i in range(len(extent_gdf)): | ||
# append geometry of each polygon to list | ||
list_geometry.append(extent_gdf.iloc[i]['geometry']) | ||
|
||
# in the end, the same number of polygons should be in geodataframe and list | ||
if not len(extent_gdf) == len(list_geometry): | ||
raise AssertionError('the dataframe with polygons has a lenght {0} while the lenght of the resulting list is {1}'.format(len(extent_gdf), len(list_geometry))) | ||
|
||
return list_geometry | ||
|
||
def split_conflict_geom_data(X): | ||
"""[summary] | ||
Args: | ||
X ([type]): [description] | ||
Returns: | ||
[type]: [description] | ||
""" | ||
|
||
X_ID = X[:, 0] | ||
X_geom = X[:, 1] | ||
X_data = X[: , 2:] | ||
|
||
return X_ID, X_geom, X_data | ||
|
||
def get_pred_conflict_geometry(X_test_ID, X_test_geom, y_test, y_pred): | ||
"""[summary] | ||
Args: | ||
X_test_ID ([type]): [description] | ||
X_test_geom ([type]): [description] | ||
y_test ([type]): [description] | ||
y_pred ([type]): [description] | ||
Returns: | ||
[type]: [description] | ||
""" | ||
|
||
arr = np.column_stack((X_test_ID, X_test_geom, y_test, y_pred)) | ||
|
||
df = pd.DataFrame(arr, columns=['ID', 'geometry', 'y_test', 'y_pred']) | ||
|
||
#TODO: think this through properly | ||
# df['conflict_hit'] = np.where((df['y_test'] == 1) & (df['y_pred'] ==1), 1, np.nan) | ||
|
||
df['overall_hit'] = np.where(df['y_test'] == df['y_pred'], 1, 0) | ||
|
||
return df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
from conflict_model import conflict, variables | ||
import numpy as np | ||
import xarray as xr | ||
import pandas as pd | ||
import os, sys | ||
|
||
|
||
def initiate_XY_data(config): | ||
"""[summary] | ||
Args: | ||
config ([type]): [description] | ||
Returns: | ||
[type]: [description] | ||
""" | ||
|
||
XY = {} | ||
XY['poly_ID'] = pd.Series() | ||
XY['poly_geometry'] = pd.Series() | ||
for key in config.items('env_vars'): | ||
XY[str(key[0])] = pd.Series(dtype=float) | ||
XY['conflict'] = pd.Series(dtype=int) | ||
|
||
if config.getboolean('general', 'verbose'): print('{}'.format(XY) + os.linesep) | ||
|
||
return XY | ||
|
||
def fill_XY(XY, config, conflict_gdf, extent_active_polys_gdf): | ||
"""[summary] | ||
Args: | ||
XY ([type]): [description] | ||
config ([type]): [description] | ||
conflict_gdf ([type]): [description] | ||
extent_active_polys_gdf ([type]): [description] | ||
Raises: | ||
Warning: [description] | ||
Returns: | ||
array: [description] | ||
""" | ||
|
||
if config.getboolean('general', 'verbose'): print('reading data for period from', str(config.getint('settings', 'y_start')), 'to', str(config.getint('settings', 'y_end')) + os.linesep) | ||
|
||
# go through all simulation years as specified in config-file | ||
for sim_year in np.arange(config.getint('settings', 'y_start'), config.getint('settings', 'y_end'), 1): | ||
|
||
if config.getboolean('general', 'verbose'): print(os.linesep + 'entering year {}'.format(sim_year) + os.linesep) | ||
|
||
# go through all keys in dictionary | ||
for key, value in XY.items(): | ||
|
||
if key == 'conflict': | ||
|
||
data_series = value | ||
data_list = conflict.conflict_in_year_bool(conflict_gdf, extent_active_polys_gdf, config, sim_year) | ||
data_series = data_series.append(pd.Series(data_list), ignore_index=True) | ||
XY[key] = data_series | ||
|
||
elif key == 'poly_ID': | ||
|
||
data_series = value | ||
data_list = conflict.get_poly_ID(extent_active_polys_gdf) | ||
data_series = data_series.append(pd.Series(data_list), ignore_index=True) | ||
XY[key] = data_series | ||
|
||
elif key == 'poly_geometry': | ||
|
||
data_series = value | ||
data_list = conflict.get_poly_geometry(extent_active_polys_gdf) | ||
data_series = data_series.append(pd.Series(data_list), ignore_index=True) | ||
XY[key] = data_series | ||
|
||
else: | ||
|
||
nc_ds = xr.open_dataset(os.path.join(config.get('general', 'input_dir'), config.get('env_vars', key))) | ||
|
||
if (np.dtype(nc_ds.time) == np.float32) or (np.dtype(nc_ds.time) == np.float64): | ||
data_series = value | ||
data_list = variables.nc_with_float_timestamp(extent_active_polys_gdf, config, key, sim_year) | ||
data_series = data_series.append(pd.Series(data_list), ignore_index=True) | ||
XY[key] = data_series | ||
|
||
elif np.dtype(nc_ds.time) == 'datetime64[ns]': | ||
data_series = value | ||
data_list = variables.nc_with_continous_datetime_timestamp(extent_active_polys_gdf, config, key, sim_year) | ||
data_series = data_series.append(pd.Series(data_list), ignore_index=True) | ||
XY[key] = data_series | ||
|
||
else: | ||
raise Warning('this nc-file does have a different dtype for the time variable than currently supported: {}'.format(nc_fo)) | ||
|
||
if config.getboolean('general', 'verbose'): print('...reading data DONE' + os.linesep) | ||
|
||
return pd.DataFrame.from_dict(XY).to_numpy() | ||
|
||
def split_XY_data(XY, config): | ||
"""[summary] | ||
Args: | ||
XY (array): [description] | ||
Returns: | ||
[type]: [description] | ||
""" | ||
|
||
XY = pd.DataFrame(XY) | ||
if config.getboolean('general', 'verbose'): print('number of data points including missing values:', len(XY)) | ||
|
||
XY = XY.dropna() | ||
if config.getboolean('general', 'verbose'): print('number of data points excluding missing values:', len(XY)) | ||
|
||
XY = XY.to_numpy() | ||
X = XY[:, :-1] # since conflict is the last column, we know that all previous columns must be variable values | ||
Y = XY[:, -1] | ||
Y = Y.astype(int) | ||
|
||
if config.getboolean('general', 'verbose'): | ||
fraction_Y_1 = 100*len(np.where(Y != 0)[0])/len(Y) | ||
print('from this, {0} points are equal to 1, i.e. represent conflict occurence. This is a fraction of {1} percent.'.format(len(np.where(Y != 0)[0]), round(fraction_Y_1, 2))) | ||
|
||
return X, Y |
Oops, something went wrong.