From c63c369ccb41c57527d1760f697abecc39352d34 Mon Sep 17 00:00:00 2001 From: eli knaap Date: Tue, 7 Mar 2023 11:36:32 -0800 Subject: [PATCH 1/4] add vio plots --- docs/api.rst | 1 + environment.yml | 5 +-- geosnap/io/util.py | 3 +- geosnap/tests/test_plots.py | 9 +++- geosnap/visualize/__init__.py | 3 +- geosnap/visualize/descriptives.py | 73 +++++++++++++++++++++++++++++++ geosnap/visualize/transitions.py | 2 +- 7 files changed, 87 insertions(+), 9 deletions(-) create mode 100644 geosnap/visualize/descriptives.py diff --git a/docs/api.rst b/docs/api.rst index 471fab1b..e936e198 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -196,6 +196,7 @@ Visualize Module visualize.plot_timeseries visualize.plot_transition_matrix visualize.plot_transition_graphs + visualize.plot_violins_by_cluster .. _util_api: diff --git a/environment.yml b/environment.yml index 01916939..0f322cd3 100644 --- a/environment.yml +++ b/environment.yml @@ -8,7 +8,7 @@ dependencies: - libpysal - cenpy - geopandas >=0.9 - - matplotlib + - matplotlib <=3.3.4 # workaround for proplot missing pin upstream - scikit-learn - seaborn - pyarrow >=0.14.1 @@ -28,6 +28,3 @@ dependencies: - versioneer - pyproj >=3 - pandana - - pip - - pip: - - -e . # install local geosnap package in editable mode diff --git a/geosnap/io/util.py b/geosnap/io/util.py index 9736963a..48020a39 100644 --- a/geosnap/io/util.py +++ b/geosnap/io/util.py @@ -40,10 +40,9 @@ def get_census_gdb(years=None, geom_level="blockgroup", output_dir="."): fn = f"{year}_ACS_5YR_{geom_level.capitalize()}.gdb.zip" out_fn = f"ACS_{year}_5YR_{levels[geom_level].upper()}.gdb.zip" pth = pathlib.PurePath(output_dir, out_fn) - url = f"ftp://ftp2.census.gov/geo/tiger/TIGER_DP/{year}ACS/{fn}" + url = f"https://www2.census.gov/geo/tiger/TIGER_DP/{year}ACS/{fn}" download(url, pth) - def reformat_acs_vars(col): """Convert variable names to the same format used by the Census Detailed Tables API. diff --git a/geosnap/tests/test_plots.py b/geosnap/tests/test_plots.py index 6fb08c40..ecdfccd5 100644 --- a/geosnap/tests/test_plots.py +++ b/geosnap/tests/test_plots.py @@ -13,6 +13,7 @@ plot_timeseries, plot_transition_graphs, plot_transition_matrix, + plot_violins_by_cluster ) @@ -71,9 +72,15 @@ def test_graphs(): def test_animation(): if not os.path.exists('geosnap/tests/images'): os.mkdir('geosnap/tests/images') - animate_timeseries(dc_df, column='ward', categorical=True, filename='geosnap/tests/images/animation.gif') + animate_timeseries(dc_df, column='ward', categorical=True, filename='geosnap/tests/images/animation.gif', dpi=50) assert 'animation.gif' in os.listdir('geosnap/tests/images') +def test_violins(): + if not os.path.exists('geosnap/tests/images'): + os.mkdir('geosnap/tests/images') + plot_violins_by_cluster(dc_df, cluster_col='ward', columns=columns, savefig='geosnap/tests/images/violins.png', dpi=50) + assert 'violins.png' in os.listdir('geosnap/tests/images') + def test_boundary_silplot(): p = region_mod[1990].plot_boundary_silhouette(dpi=50,) assert isinstance(p, proplot.gridspec.SubplotGrid diff --git a/geosnap/visualize/__init__.py b/geosnap/visualize/__init__.py index cd587760..efb979c2 100644 --- a/geosnap/visualize/__init__.py +++ b/geosnap/visualize/__init__.py @@ -1,4 +1,5 @@ from .seq import * from .commviz import explore from .transitions import * -from .mapping import * \ No newline at end of file +from .mapping import * +from .descriptives import * diff --git a/geosnap/visualize/descriptives.py b/geosnap/visualize/descriptives.py new file mode 100644 index 00000000..e8ec1bf3 --- /dev/null +++ b/geosnap/visualize/descriptives.py @@ -0,0 +1,73 @@ +"""Tools for describing and exploring cluster/class composition.""" + +import matplotlib.pyplot as plt +import numpy as np +import seaborn as sns + + +def plot_violins_by_cluster( + df, + columns, + cluster_col, + violin_kwargs=None, + figsize=(12, 8), + nrows=None, + ncols=None, + titles=None, + savefig=None, + dpi=200 +): + """Create matrix of violin plots categorized by a discrete class variable + + Parameters + ---------- + df : pandas.DataFrame or geopandas.GeoDataFrame + datafrme with columns to plot as violins and a colunn of class labels + columns : list-like + list of columns to plot as violins + cluster_col : str + name of the column in the dataframe that holds class labels + violin_kwargs : dict, optional + additional keyword arguments passed to seaborn.violinplot + figsize : tuple, optional + size of output figure, by default (12, 8) + nrows : int, optional + number of rows in the violin (nrows * ncols must equal len(columns)), by default None + ncols : int, optional + number of columns in the violin (nrows * ncols must equal len(columns)), by default None + If both ncols and nrows are none, they will be set to the miminmum bounding square + titles : list, optional + list of titles to set on each subplot. If None (default) the title of each axes + will be set to the name of the column being plotted + savefig : str, optional + If provided, the figure will be saved at this path + dpi : int, optional + dpi of resulting figure when using `savefig`, by default 200 + + Returns + ------- + matplotlib.axes.Axes + a matplotlib Axes object with a subplot for each column + """ + if nrows is None and ncols is None: + sqcols = int(np.ceil(np.sqrt(len(columns)))) + ncols = sqcols + nrows = sqcols + if violin_kwargs is None: + violin_kwargs = dict() + fig, ax = plt.subplots(nrows, ncols, figsize=figsize) + ax = ax.flatten() + for i, col in enumerate(columns): + sns.violinplot(data=df, y=col, x=df[cluster_col], ax=ax[i], **violin_kwargs) + if titles: + ax[i].set_title(titles[i]) + else: + ax[i].set_title(col) + # pop off any unused axes + for i in range(len(ax)): + if i > len(columns): + ax[i].remove() + plt.tight_layout() + if savefig: + plt.savefig(savefig, dpi=dpi) + return ax diff --git a/geosnap/visualize/transitions.py b/geosnap/visualize/transitions.py index 3317f27f..889037a2 100644 --- a/geosnap/visualize/transitions.py +++ b/geosnap/visualize/transitions.py @@ -91,7 +91,7 @@ def plot_transition_matrix( ) else: sm = transition_model - if not n_rows and not n_cols: + if n_rows is None and n_cols is None: sqcols = int(np.ceil(np.sqrt(len(sm.classes) + 1))) n_cols = sqcols n_rows = sqcols From 4d316b35c9bbba853ccd0e9e75a11056807ddc9d Mon Sep 17 00:00:00 2001 From: eli knaap Date: Tue, 7 Mar 2023 11:40:13 -0800 Subject: [PATCH 2/4] remove old commviz --- codecov.yml | 2 - geosnap/visualize/commviz.py | 384 ----------------------------------- 2 files changed, 386 deletions(-) delete mode 100644 geosnap/visualize/commviz.py diff --git a/codecov.yml b/codecov.yml index 79cb3f60..a8ddc0f3 100644 --- a/codecov.yml +++ b/codecov.yml @@ -17,8 +17,6 @@ coverage: - "tools/*" - "geosnap/tests/*" - "geosnap/util/*" - - "geosnap/visualize/commviz.py" - - "geosnap/visualize/viz.py" - "geosnap/io/util.py" - "geosnap/_version.py" comment: diff --git a/geosnap/visualize/commviz.py b/geosnap/visualize/commviz.py deleted file mode 100644 index e6751e64..00000000 --- a/geosnap/visualize/commviz.py +++ /dev/null @@ -1,384 +0,0 @@ -from mapclassify import * - - -def explore(data="census"): - """Launch an interactive visualization portal. - - This function launches an interactive dataset explorer based on plotly's `dash` - Currently it is still experimental, but it provides a set of interactive widgets - and maps that allow users to rapidly create metropolitan-scale datasets and choropleth - webmaps using a variety of census data. - - Parameters - ---------- - data : str - Which dataset to explore. Options include "census, "ltdb", and "ncdb" (the default is "census"). - - Returns - ------- - None - Launches a web-browser with the interactive visualization. - - """ - mem = {} - mem["last_metro"] = "" - mem["last_comm"] = "" - - import dash - import dash_core_components as dcc - import dash_html_components as html - import dash_bootstrap_components as dbc - import webbrowser - import palettable - import json - from geosnap import Community, datasets - - mem["data"] = data - - mapbox_access_token = ( - "pk.eyJ1Ijoia25hYXB0aW1lIiwiYSI6ImlQeUJxazgifQ.35yYbOewGVVf7OkcM64obQ" - ) - external_stylesheets = [dbc.themes.JOURNAL] - - opts = [] - for colname in datasets.codebook().variable: - val = colname - if colname.startswith("n_"): - colname = colname[1:] - elif colname.startswith("p_"): - colname = colname[1:] - colname = colname + " (%)" - colname = colname.replace("_", " ") - colname = colname.title() - opts.append({"label": colname, "value": val}) - - # opts = [{'label': col.title(), 'value': col} for col in dictionary.variable] - - k_opts = [{"label": str(k), "value": k} for k in range(3, 11)] - data_type = ["sequential", "diverging", "qualitative"] - data_opts = [{"label": scheme, "value": scheme.title()} for scheme in data_type] - - scheme_dispatch = { - "Equal Interval": EqualInterval, - "Fisher Jenks": FisherJenks, - "Head-Tail Breaks": HeadTailBreaks, - "Jenks Caspall": JenksCaspall, - "Max-P Classifier": MaxP, - "Maximum Breaks": MaximumBreaks, - "Natural Breaks": NaturalBreaks, - "Quantiles": Quantiles, - "Percentiles": Percentiles, - } - - sequential = [ - "Blues", - "BuGn", - "BuPu", - "GnBu", - "Greens", - "Greys", - "OrRd", - "Oranges", - "PuBu", - "PuBuGn", - "PuRd", - "Purples", - "RdPu", - "Reds", - "YlGn", - "YlGnBu", - "YlOrBr", - "YlOrRd", - ] - diverging = [ - "BrBG", - "PRGn", - "PiYG", - "PuOr", - "RdBu", - "RdGy", - "RdYlBu", - "RdYlGn", - "Spectral", - ] - qualitative = [ - "Accent", - "Dark2", - "Paired", - "Pastel1", - "Pastel2", - "Set1", - "Set2", - "Set3", - ] - - cmaps = sequential + diverging + qualitative - - cmap_opts = [{"label": cmap, "value": cmap} for cmap in cmaps] - - scheme_opts = [{"label": str(v), "value": v} for v in scheme_dispatch] - - metro_opts = [ - {"label": str(metro["name"]), "value": metro["geoid"]} - for _, metro in datasets.msas().iterrows() - ] - - precomputed_color_ranges = palettable.colorbrewer.sequential.Blues_6.hex_colors - - trace = dict(type="scattermapbox", autocolorscale=True, name="metro") - - navbar = dbc.NavbarSimple( - children=[ - dbc.NavItem(dbc.NavLink("geosnap", href="http://spatial.ucr.edu")), - dbc.DropdownMenu( - nav=True, - in_navbar=True, - label="Menu", - children=[ - dbc.DropdownMenuItem("Explore Variables"), - dbc.DropdownMenuItem("Identify Neighborhoods"), - dbc.DropdownMenuItem("Model Neighborhood Change"), - dbc.DropdownMenuItem(divider=True), - dbc.DropdownMenuItem("Docs", href="http://geosnap.readthedocs.io"), - dbc.DropdownMenuItem( - "Github", href="http://github.com/spatialucr/geosnap" - ), - ], - ), - ], - brand="cgs", - brand_href="#", - sticky="top", - dark=True, - color="dark", - ) - - body = dbc.Container( - [ - html.H2( - children="Variable Explorer", - style={ - "textAlign": "center", - "padding-top": "2%", - "padding-bottom": "4%", - }, - ), - dbc.Row( - [ - dbc.Col( - [ - html.H5( - children="Metropolitan Region", - style={"padding-bottom": "4%"}, - ), - dcc.Dropdown( - id="metro-choice", - options=metro_opts, - value="41740", - style={"padding-bottom": "2%"}, - ), - html.H5( - children="Variable", - style={"padding-top": "2%", "padding-bottom": "2%"}, - ), - dcc.Dropdown( - id="overlay-choice", - options=opts, - value="median_home_value", - style={"padding-bottom": "2%"}, - ), - html.H5( - children="Classification Scheme", - style={"padding-top": "2%", "padding-bottom": "2%"}, - ), - dcc.Dropdown( - id="scheme-choice", - options=scheme_opts, - value="Equal Interval", - style={"padding-bottom": "2%"}, - ), - html.H5( - children="Colormap", - style={"padding-top": "2%", "padding-bottom": "2%"}, - ), - dcc.Dropdown( - id="cmap-choice", - options=cmap_opts, - value="YlOrBr", - style={"padding-bottom": "2%"}, - ), - html.H5( - children="Number of Classes", - style={"padding-top": "2%", "padding-bottom": "2%"}, - ), - dcc.Dropdown( - id="k-choice", - options=k_opts, - value=6, - style={"padding-bottom": "2%"}, - ), - html.H5( - children="Year", - style={"padding-top": "2%", "padding-bottom": "2%"}, - ), - html.Div( - dcc.Slider( - id="year-slider", - min=1970, - max=2010, - value=2010, - marks={ - str(year): str(year) - for year in range(1970, 2011, 10) - }, - step=10, - ), - style={ - "padding-left": "5%", - "padding-right": "5%", - "padding-top": "2%", - "padding-bottom": "4%", - }, - ), - ], - md=3, - ), - dbc.Col( - [ - dcc.Loading( - id="loading-output-1", - children=[dcc.Graph(id="map-display")], - type="default", - ) - ], - md=9, - ), - ] - ), - ], - className="mt-4", - ) - - app = dash.Dash(external_stylesheets=external_stylesheets) - - app.layout = html.Div([navbar, body]) - - map_layout = { - "data": [ - { - "name": "Layer name", - "sourcetype": "geojson", - "opacity": 0.8, - "type": "scattermapbox", - "showlegend": True, - "textposition": "top", - "text": "geoid", - "mode": "markers+text", - "hoverinfo": "text", - "marker": dict(size=5, color="white", opacity=0), - } - ], - "layout": { - "autosize": True, - "hovermode": "closest", - "margin": {"l": 0, "r": 0, "b": 0, "t": 0}, - "showlegend": True, - "mapbox": { - "accesstoken": mapbox_access_token, - "center": {"lat": 0, "lon": 0}, - "style": "light", - "zoom": 8, - "bearing": 0.0, - "pitch": 0.0, - }, - }, - } - - @app.callback( - dash.dependencies.Output("map-display", "figure"), - [ - dash.dependencies.Input("overlay-choice", "value"), - dash.dependencies.Input("metro-choice", "value"), - dash.dependencies.Input("year-slider", "value"), - dash.dependencies.Input("k-choice", "value"), - dash.dependencies.Input("scheme-choice", "value"), - dash.dependencies.Input("cmap-choice", "value"), - ], - ) - def update_map( - overlay_choice, metro_choice, year_choice, k_choice, scheme_choice, cmap_choice - ): - - readers = { - "census": Community.from_census, - "ltdb": Community.from_ltdb, - "ncdb": Community.from_ncdb, - } - if cmap_choice in qualitative: - cmap_type = "qualitative" - elif cmap_choice in sequential: - cmap_type = "sequential" - else: - cmap_type = "diverging" - - tmp = map_layout.copy() - - if metro_choice != mem["last_metro"]: - community = readers[mem["data"]](msa_fips=metro_choice) - mem["last_metro"] = metro_choice - mem["last_comm"] = community - else: - community = mem["last_comm"] - - gdf = community.gdf[community.gdf.year == year_choice] - - gdf = gdf.dropna(subset=[overlay_choice]).reset_index() - - if scheme_choice in ["Max-P Classifier", "Maximum Breaks", "Head-Tail Breaks"]: - classes = scheme_dispatch[scheme_choice](gdf[overlay_choice]).yb - else: - classes = scheme_dispatch[scheme_choice](gdf[overlay_choice], k=k_choice).yb - gdf = gdf.assign(cl=classes) - - if not k_choice: - k_choice = len(gdf.cl.unique()) - # Create a layer for each region colored by LEP value - - gdf = gdf[["geoid", "cl", "geometry"]] - - layers = [] - - precomputed_color_ranges = palettable.colorbrewer.get_map( - cmap_choice, cmap_type, k_choice - ).hex_colors - for i, lyr in enumerate(precomputed_color_ranges): - example = { - "name": "Layer name", - "source": json.loads(gdf[gdf.cl == i].to_json()), - "sourcetype": "geojson", - "type": "fill", - "opacity": 0.8, - "color": lyr, - } - layers.append(example) - - tmp["layout"]["mapbox"]["layers"] = layers - tmp["layout"]["mapbox"]["center"] = { - "lat": gdf.unary_union.centroid.y, - "lon": gdf.unary_union.centroid.x, - } - - tmp["data"][0]["text"] = gdf["geoid"].tolist() - - return tmp - - @app.callback(dash.dependencies.Output("loading-output-1", "children")) - def input_triggers_spinner(value): - return value - - webbrowser.open("http://127.0.0.1:8050") - app.run_server() - - -if __name__ == "__main__": - explore() From 66e69923519533196510941d29728eaaa41f4574 Mon Sep 17 00:00:00 2001 From: eli knaap Date: Tue, 7 Mar 2023 11:48:14 -0800 Subject: [PATCH 3/4] remove commviz from init --- geosnap/visualize/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/geosnap/visualize/__init__.py b/geosnap/visualize/__init__.py index efb979c2..30c2fe74 100644 --- a/geosnap/visualize/__init__.py +++ b/geosnap/visualize/__init__.py @@ -1,5 +1,4 @@ from .seq import * -from .commviz import explore from .transitions import * from .mapping import * from .descriptives import * From 409b9149c16e95b5ff4b69f987c6d15f58f4a3f2 Mon Sep 17 00:00:00 2001 From: eli knaap Date: Tue, 7 Mar 2023 11:58:45 -0800 Subject: [PATCH 4/4] download with pooch --- environment.yml | 1 + geosnap/io/util.py | 3 ++- requirements.txt | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index 0f322cd3..c6ca49bf 100644 --- a/environment.yml +++ b/environment.yml @@ -28,3 +28,4 @@ dependencies: - versioneer - pyproj >=3 - pandana + - pooch diff --git a/geosnap/io/util.py b/geosnap/io/util.py index 48020a39..2b1287f6 100644 --- a/geosnap/io/util.py +++ b/geosnap/io/util.py @@ -1,5 +1,6 @@ import os import pathlib +import pooch from urllib.error import HTTPError from warnings import warn @@ -41,7 +42,7 @@ def get_census_gdb(years=None, geom_level="blockgroup", output_dir="."): out_fn = f"ACS_{year}_5YR_{levels[geom_level].upper()}.gdb.zip" pth = pathlib.PurePath(output_dir, out_fn) url = f"https://www2.census.gov/geo/tiger/TIGER_DP/{year}ACS/{fn}" - download(url, pth) + pooch.retrieve(url, None, progressbar=True, path=pth) def reformat_acs_vars(col): """Convert variable names to the same format used by the Census Detailed Tables API. diff --git a/requirements.txt b/requirements.txt index dd77fe4d..f4a0bf9f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,4 +24,5 @@ segregation>=2.1 proplot>=0.9 versioneer pyproj>=3 -pandana \ No newline at end of file +pandana +pooch