Skip to content

Commit

Permalink
Add support for urbanaccess (#5)
Browse files Browse the repository at this point in the history
* add cli + streaming support

* remove extras_urbanaccess

* fix extras_require

* add support for urbanaccess [WIP]

* .gitignore

* WIP

* add urbanaccess

* add urbanaccess notebook

* improve urbanaccess network analysis
  • Loading branch information
OriHoch committed Jun 23, 2022
1 parent 2987747 commit 9490ba0
Show file tree
Hide file tree
Showing 13 changed files with 590 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Expand Up @@ -4,4 +4,7 @@ venv
.idea
dist
__pycache__
.data
VERSION.txt
logs
data
16 changes: 16 additions & 0 deletions README.md
Expand Up @@ -47,3 +47,19 @@ See the CLI help message for details:
stride --help
```

### SIRI Accessibility Analysis using UrbanAccess

[UDST/urbanaccess](https://github.com/UDST/urbanaccess/blob/dev/README.rst) is a tool for running accessibility
analysis. The stride client provides methods which allow to generate UrbanAccess accesibility graphs for the SIRI data.

Install:

```
pip install --upgrade open-bus-stride-client[cli,urbanaccess]
```

See the CLI help messages for available functionality:

```
stride urbanaccess --help
```
2 changes: 2 additions & 0 deletions notebooks/.gitignore
@@ -0,0 +1,2 @@
data
logs
316 changes: 316 additions & 0 deletions notebooks/siri accessibility analysis using UrbanAccess.ipynb

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions setup.py
Expand Up @@ -13,6 +13,7 @@
extras_cli = ['click==8.1.3']
extras_jupyter = ['jupyterlab', 'ipywidgets']
extras_notebooks = ['pandas>=1.3<1.4']
extras_urbanaccess = ['urbanaccess==0.2.2', 'geopandas==0.10.2']


setup(
Expand All @@ -24,10 +25,12 @@
extras_require={
'cli': extras_cli,
'notebooks': extras_notebooks,
'urbanaccess': extras_urbanaccess,
'all': [
*extras_cli,
*extras_jupyter,
*extras_notebooks,
*extras_urbanaccess,
]
},
entry_points={
Expand Down
5 changes: 5 additions & 0 deletions stride/cli.py
Expand Up @@ -9,6 +9,10 @@ def main():
pass


from .urbanaccess.cli import urbanaccess
main.add_command(urbanaccess)


@main.command()
@click.argument('PATH')
@click.argument('PARAMS_JSON', default='{}')
Expand All @@ -25,6 +29,7 @@ def get(path, params_json):
def iterate(path, params_json, limit):
"""Iterate over an API list path with optional json params, print one item per line"""
from . import streaming
i = -1
for i, item in enumerate(streaming.iterate(path, json.loads(params_json), limit)):
print(item)
print(f"Got {i+1} results")
Expand Down
36 changes: 36 additions & 0 deletions stride/common.py
@@ -1,3 +1,4 @@
import os
import json
import datetime
import urllib.parse
Expand Down Expand Up @@ -85,3 +86,38 @@ def get(path, params=None, pre_requests_callback=None):
res_status_code, res_text,
msg="Failure response from Stride API ({}): {}".format(res_status_code, parse_error_res(res))
)


def now():
return datetime.datetime.now(datetime.timezone.utc)


def create_unique_path(base_path, path_prefix=''):
os.makedirs(base_path, exist_ok=True)
for _ in range(5):
path_part = '{}{}'.format(path_prefix, now().strftime('%Y-%m-%dT%H%M%S.%f'))
path = os.path.join(base_path, path_part)
try:
os.mkdir(path)
except FileExistsError:
continue
return path
raise Exception("Failed to create unique path")


def is_None(val):
# due to a problem with airflow dag initialization, in some cases we get
# the actual string 'None' which we need to handle as None
return val is None or val == 'None'


def parse_date_str(date, num_days=None):
"""Parses a date string in format %Y-%m-%d with default of today if empty
if num_days is not None - will use a default of today minus given num_days
"""
if isinstance(date, datetime.date):
return date
elif not date or is_None(date):
return datetime.date.today() if num_days is None else datetime.date.today() - datetime.timedelta(days=int(num_days))
else:
return datetime.datetime.strptime(date, '%Y-%m-%d').date()
1 change: 1 addition & 0 deletions stride/config.py
Expand Up @@ -2,3 +2,4 @@


STRIDE_API_BASE_URL = (os.environ.get('STRIDE_API_BASE_URL') or 'https://open-bus-stride-api.hasadna.org.il').rstrip('/')
URBANACCESS_DATA_PATH = os.environ.get('URBANACCESS_DATA_PATH') or '.data/urbanaccess'
Empty file added stride/urbanaccess/__init__.py
Empty file.
39 changes: 39 additions & 0 deletions stride/urbanaccess/cli.py
@@ -0,0 +1,39 @@
import click


@click.group()
def urbanaccess():
"""Run accessibility analysis using UrbanAccess"""
pass


@urbanaccess.command()
@click.option('--target-path', help="Target path to save the fake gtfs data to. "
"If not provided will create a unique path in local directory.")
@click.option('--date', help="Date string in format %Y-%m-%d, for example: \"2022-06-15\"", required=True)
@click.option('--start-hour', type=int, required=True, help="UTC Hour")
@click.option('--end-hour', type=int, required=True, help="UTC Hour")
@click.option('--bbox', help='comma-separated square bounding box values: min_lon, min_lat, max_lon, max_lat. '
'For example: "34.8, 31.96, 34.81, 31.97". '
'Can get it from https://boundingbox.klokantech.com/ - csv export',
required=True)
def create_fake_gtfs(**kwargs):
"""Create fake GTFS data from the siri data to use as input to UrbanAccess"""
from .create_fake_gtfs import main
main(**kwargs)


@urbanaccess.command()
@click.option('--target-path')
@click.option('--fake-gtfs-path', help='path to output of create-fake-gtfs task. '
'If provided, the other fake gtfs arguments are not needed.')
@click.option('--date', help="To create fake gtfs data - date string in format %Y-%m-%d, for example: \"2022-06-15\"")
@click.option('--start-hour', type=int, help="To create fake gtfs data - UTC Hour")
@click.option('--end-hour', type=int, help="To create fake gtfs data - UTC Hour")
@click.option('--bbox', help='To create fake gtfs data - comma-separated square bounding box values: min_lon, min_lat, max_lon, max_lat. '
'For example: "34.8, 31.96, 34.81, 31.97". '
'Can get it from https://boundingbox.klokantech.com/ - csv export')
def create_network(**kwargs):
"""Create UrbanAccess accessibility network from the fake gtfs data"""
from .create_network import main
main(**kwargs)
110 changes: 110 additions & 0 deletions stride/urbanaccess/create_fake_gtfs.py
@@ -0,0 +1,110 @@
import os
import json
import datetime
from pprint import pprint
from textwrap import dedent
from collections import defaultdict
from contextlib import contextmanager

from .. import config, iterate
from ..common import create_unique_path, parse_date_str


def gtfs_escape(val):
return val.replace(',', '').replace('\n', ' ')


def create_calendar(target_path, date: datetime.date):
service_id = '1'
with open(os.path.join(target_path, 'calendar.txt'), 'w') as f:
f.writelines([
'service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date\n',
f'{service_id},1,1,1,1,1,1,1,{date.strftime("%Y%m%d")},{date.strftime("%Y%m%d")}\n'
])
return service_id


@contextmanager
def open_files(target_path):
with open(os.path.join(target_path, 'stops.txt'), 'w') as f_stops:
with open(os.path.join(target_path, 'routes.txt'), 'w') as f_routes:
with open(os.path.join(target_path, 'trips.txt'), 'w') as f_trips:
with open(os.path.join(target_path, 'stop_times.txt'), 'w') as f_stop_times:
yield f_stops, f_routes, f_trips, f_stop_times


def create_data(stats, target_path, service_id, date, start_hour, end_hour, min_lon, min_lat, max_lon, max_lat):
added_stop_ids = set()
added_route_ids = set()
added_trip_ids = set()
with open_files(target_path) as (f_stops, f_routes, f_trips, f_stop_times):
f_stops.write('stop_id,stop_name,stop_lat,stop_lon,location_type\n',)
f_routes.write('route_id,route_short_name,route_type\n')
f_trips.write('route_id,service_id,trip_id\n')
f_stop_times.write('trip_id,arrival_time,departure_time,stop_id,stop_sequence\n')
recorded_at_time_from = datetime.datetime.combine(date, datetime.time(start_hour), datetime.timezone.utc)
recorded_at_time_to = datetime.datetime.combine(date, datetime.time(end_hour, 59, 59), datetime.timezone.utc)
for item in iterate('/siri_ride_stops/list', {
'gtfs_stop__lat__greater_or_equal': min_lat,
'gtfs_stop__lat__lower_or_equal': max_lat,
'gtfs_stop__lon__greater_or_equal': min_lon,
'gtfs_stop__lon__lower_or_equal': max_lon,
'gtfs_date_from': date,
'gtfs_date_to': date,
'siri_vehicle_location__recorded_at_time_from': recorded_at_time_from,
'siri_vehicle_location__recorded_at_time_to': recorded_at_time_to,
'siri_ride__scheduled_start_time_from': recorded_at_time_from - datetime.timedelta(hours=10),
'siri_ride__scheduled_start_time_to': recorded_at_time_to + datetime.timedelta(hours=10),
'limit': -1,
}, limit=None):
svl_recorded_at_time = item['nearest_siri_vehicle_location__recorded_at_time'].strftime("%H:%M:%S")
gs_name = gtfs_escape(f'{item["gtfs_stop__city"]}: {item["gtfs_stop__name"]}')
gs_id = item['gtfs_stop_id']
if gs_id not in added_stop_ids:
added_stop_ids.add(gs_id)
f_stops.write(f'{gs_id},{gs_name},{item["gtfs_stop__lat"]},{item["gtfs_stop__lon"]},0\n')
stats['stops'] += 1
grt_id = item['gtfs_ride__gtfs_route_id']
if grt_id not in added_route_ids:
added_route_ids.add(grt_id)
f_routes.write(f'{grt_id},{gtfs_escape(item["gtfs_route__route_short_name"])},3\n')
stats['routes'] += 1
gr_id = item['siri_ride__gtfs_ride_id']
if gr_id not in added_trip_ids:
added_trip_ids.add(gr_id)
f_trips.write(f'{grt_id},{service_id},{gr_id}\n')
stats['trips'] += 1
f_stop_times.write(f'{gr_id},{svl_recorded_at_time},{svl_recorded_at_time},{gs_id},{item["order"]}\n')
stats['stop_times'] += 1
if stats["stop_times"] > 1 and stats["stop_times"] % 1000 == 0:
print(f'saved {stats["stop_times"]} stop times...')


def main(date, start_hour, end_hour, bbox, target_path=None):
if not target_path:
target_path = create_unique_path(os.path.join(config.URBANACCESS_DATA_PATH, 'fake_gtfs'))
target_path_feed = os.path.join(target_path, 'siri_feed')
os.makedirs(target_path_feed)
date = parse_date_str(date)
start_hour = int(start_hour)
end_hour = int(end_hour)
min_lon, min_lat, max_lon, max_lat = [float(v.strip()) for v in bbox.split(',')]
print(dedent(f'''
creating fake gtfs data
target_path={target_path}
date: {date}
hours: {start_hour} - {end_hour}
bbox: {min_lon},{min_lat} - {max_lon},{max_lat}
'''))
stats = defaultdict(int)
service_id = create_calendar(target_path_feed, date)
create_data(stats, target_path_feed, service_id, date, start_hour, end_hour, min_lon, min_lat, max_lon, max_lat)
with open(os.path.join(target_path, 'metadata.json'), 'w') as f:
json.dump({
'start_hour': start_hour,
'end_hour': end_hour,
'bbox': [min_lon, min_lat, max_lon, max_lat]
}, f)
pprint(dict(stats))
print(f'Fake gtfs data successfully stored at "{target_path}"')
return target_path
52 changes: 52 additions & 0 deletions stride/urbanaccess/create_network.py
@@ -0,0 +1,52 @@
import os
import json
from textwrap import dedent


import urbanaccess.gtfs.load
import urbanaccess.gtfs.network
import urbanaccess.osm.load
import urbanaccess.osm.network
import urbanaccess.network
import urbanaccess.plot


from .. import config
from ..common import create_unique_path


def main(fake_gtfs_path=None, target_path=None, date=None, start_hour=None, end_hour=None, bbox=None):
if fake_gtfs_path:
assert not date and not start_hour and not end_hour and not bbox
else:
assert date and start_hour and end_hour and bbox
from .create_fake_gtfs import main
fake_gtfs_path = main(date=date, start_hour=start_hour, end_hour=end_hour, bbox=bbox)
if not target_path:
target_path = create_unique_path(os.path.join(config.URBANACCESS_DATA_PATH, 'network'))
assert os.path.exists(os.path.join(fake_gtfs_path, 'siri_feed', 'stop_times.txt'))
assert os.path.exists(os.path.join(fake_gtfs_path, 'metadata.json'))
print(dedent(f"""
Creating urbanaccess network
fake_gtfs_path={fake_gtfs_path}
target_path={target_path}
"""))
with open(os.path.join(fake_gtfs_path, 'metadata.json')) as f:
fake_gtfs_metadata = json.load(f)
start_hour = fake_gtfs_metadata['start_hour']
end_hour = fake_gtfs_metadata['end_hour']
bbox = tuple(fake_gtfs_metadata['bbox'])
loaded_feeds = urbanaccess.gtfs.load.gtfsfeed_to_df(gtfsfeed_path=fake_gtfs_path)
urbanaccess_net = urbanaccess.gtfs.network.create_transit_net(
gtfsfeeds_dfs=loaded_feeds,
day='tuesday', # day doesn't matter because the fake gtfs data has service enabled for all days
timerange=[f'{start_hour:02}:00:00', f'{end_hour:02}:00:00']
)
nodes, edges = urbanaccess.osm.load.ua_network_from_bbox(bbox=bbox, remove_lcn=True)
urbanaccess.osm.network.create_osm_net(osm_edges=edges, osm_nodes=nodes, travel_speed_mph=3)
urbanaccess.network.integrate_network(urbanaccess_network=urbanaccess_net, headways=False)
urbanaccess.network.save_network(urbanaccess_network=urbanaccess_net, dir=target_path, filename='final_net.h5',
overwrite_key=True)
network_path = os.path.join(target_path, "final_net.h5")
print(f'Successfully stored UrbanAccess network at "{network_path}"')
return network_path
7 changes: 7 additions & 0 deletions stride/urbanaccess/helpers.py
@@ -0,0 +1,7 @@
import urbanaccess.network


def load_network(network_path):
*dir, filename = network_path.split('/')
dir = '/'.join(dir)
return urbanaccess.network.load_network(dir=dir, filename=filename)

0 comments on commit 9490ba0

Please sign in to comment.