Skip to content

Commit

Permalink
Initial A* implementation (#467)
Browse files Browse the repository at this point in the history
* Initial A* implementation

This implementation uses a haversine distance heuristic, in theory
this a consistent heuristic [1] that would give us "an optimal path
without processing any node more than once" when the graph cost is set
to distance, however, in testing I've found that the distance
calculations based and lon/lat done by myself and QGIS do not align
with those in the project. Unfortunately this makes the heuristic not
admissible [2] meaning A* will not always find the least-cost path,
and may not do the minimum amount of work.

From some basic instrumenting I found A* is consistently an order of
magnitude better than the existing Dijkstra's implementation when it
comes to heap operation counts. Time wise I haven't performed real
benchmarks.

Currently there is only one heuristic implemented for distance, if the
graph cost field is set to `free_flow_time` the heuristic, while not
useless is theory, it is practically as the magnitudes of the distances
dominates any graph cost meaning A* will explore the whole network,
just like Dijkstra's.

[1] https://en.wikipedia.org/wiki/Consistent_heuristic
[2] https://en.wikipedia.org/wiki/Admissible_heuristic

* Indexing fixes found on Arkansas network

* Updates graph types

* A* improvements and corrections, equirectangular heuristic added

* Update "without a model" example

* fixup! A* improvements and corrections, equirectangular heuristic added

* Set lon/lat index manually instead of passing to prepare_graph

* fixup! Set lon/lat index manually instead of passing to prepare_graph

* Split path computation and skimming example, use Coquimbo over Sioux

* Add note about skimming with A*

* General clean up

* Extend existing test to include A* via subtests

* Style

* Add runtime heuristic switching

* Document heuristic switching

* Heuristic swithcing tests

* Python 3.7 compatibility

* Missed changes

* Add note for A* non-distance metrics

* Bumps up version for release

---------

Co-authored-by: pveigadecamargo <pveigadecamargo@anl.gov>
  • Loading branch information
Jake-Moss and pveigadecamargo committed Nov 25, 2023
1 parent 3d7f017 commit df559bf
Show file tree
Hide file tree
Showing 14 changed files with 576 additions and 174 deletions.
2 changes: 1 addition & 1 deletion __version__.py
@@ -1,5 +1,5 @@
version = 0.9
minor_version = "4"
minor_version = "5"
release_name = "Queluz"

release_version = f"{version}.{minor_version}"
62 changes: 47 additions & 15 deletions aequilibrae/paths/AoN.pyx
Expand Up @@ -170,17 +170,16 @@ def one_to_all(origin, matrix, graph, result, aux_result, curr_thread):
save_path_file(origin_index, links, zones, predecessors_view, conn_view, path_file_base, path_index_file_base, write_feather)
return origin

def path_computation(origin, destination, graph, results, early_exit = False):
def path_computation(origin, destination, graph, results):
# type: (int, int, Graph, PathResults) -> (None)
"""
:param graph: AequilibraE graph. Needs to have been set with number of centroids and list of skims (if any)
:param results: AequilibraE Matrix properly set for computation using matrix.computational_view([matrix list])
:param skimming: if we will skim for all nodes or not
:param early_exit: Exit Dijkstra's once the destination has been found. Constructs a partial shortest path tree.
"""
cdef ITYPE_t nodes, orig, dest, p, b, origin_index, dest_index, connector, zones
cdef long i, j, skims, a, block_flows_through_centroids
cdef bint early_exit_c = early_exit
cdef bint early_exit_bint = results.early_exit

results.origin = origin
results.destination = destination
Expand Down Expand Up @@ -219,6 +218,18 @@ def path_computation(origin, destination, graph, results, early_exit = False):
new_b_nodes = graph.graph.b_node.values.copy()
cdef long long [:] b_nodes_view = new_b_nodes

cdef bint a_star_bint = results.a_star
cdef double [:] lat_view
cdef double [:] lon_view
cdef long long [:] nodes_to_indices_view
cdef Heuristic heuristic
if results.a_star:
lat_view = graph.lonlat_index.lat.values
lon_view = graph.lonlat_index.lon.values
nodes_to_indices_view = graph.nodes_to_indices
heuristic = HEURISTIC_MAP[results._heuristic]


#Now we do all procedures with NO GIL
with nogil:
if block_flows_through_centroids: # Unblocks the centroid if that is the case
Expand All @@ -230,15 +241,31 @@ def path_computation(origin, destination, graph, results, early_exit = False):
b_nodes_view,
original_b_nodes_view)

w = path_finding(origin_index,
dest_index if early_exit_c else -1,
g_view,
b_nodes_view,
graph_fs_view,
predecessors_view,
ids_graph_view,
conn_view,
reached_first_view)
if a_star_bint:
w = path_finding_a_star(origin_index,
dest_index,
g_view,
b_nodes_view,
graph_fs_view,
nodes_to_indices_view,
lat_view,
lon_view,
predecessors_view,
ids_graph_view,
conn_view,
reached_first_view,
heuristic)
else:
w = path_finding(origin_index,
dest_index if early_exit_bint else -1,
g_view,
b_nodes_view,
graph_fs_view,
predecessors_view,
ids_graph_view,
conn_view,
reached_first_view)


if skims > 0:
skim_single_path(origin_index,
Expand Down Expand Up @@ -290,9 +317,12 @@ def path_computation(origin, destination, graph, results, early_exit = False):
results.path_link_directions = None
results.milepost = None

def update_path_trace(results, destination, graph, early_exit = False):
def update_path_trace(results, destination, graph):
# type: (PathResults, int, Graph) -> (None)
"""
If `results.early_exit` is `True`, early exit will be enabled if the path is to be recomputed.
If `results.a_star` is `True`, A* will be used if the path is to be recomputed.
:param graph: AequilibraE graph. Needs to have been set with number of centroids and list of skims (if any)
:param results: AequilibraE Matrix properly set for computation using matrix.computational_view([matrix list])
:param skimming: if we will skim for all nodes or not
Expand All @@ -312,8 +342,10 @@ def update_path_trace(results, destination, graph, early_exit = False):

# If the predecessor is -1 and early exit was enabled we cannot differentiate between
# an unreachable node and one we just didn't see yet. We need to recompute the tree with the new destination
if results.predecessors[dest_index] == -1 and results._early_exit:
results.compute_path(results.origin, destination, early_exit=early_exit)
# If `a_star` was enabled then the stored tree has no guarantees and may not be useful due to the heuristic used
# TODO: revisit with heuristic specific reuse logic
if results.predecessors[dest_index] == -1 and results._early_exit or results._a_star:
results.compute_path(results.origin, destination, early_exit=results.early_exit, a_star=results.a_star)

# By the invariant hypothesis presented at https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm#Proof_of_correctness
# Dijkstra's algorithm produces the shortest path tree for all scanned nodes. That is if a node was scanned,
Expand Down
162 changes: 160 additions & 2 deletions aequilibrae/paths/basic_path_finding.pyx
Expand Up @@ -9,8 +9,9 @@ LIST OF ALL THE THINGS WE NEED TO DO TO NOT HAVE TO HAVE nodes 1..n as CENTROIDS
- Re-write function **network_loading** on the part of loading flows to centroids
"""
cimport cython
from libc.math cimport isnan, INFINITY
from libc.math cimport isnan, INFINITY, sin, cos, asin, sqrt, pi
from libc.string cimport memset
from libc.stdlib cimport malloc, free

include 'pq_4ary_heap.pyx'

Expand Down Expand Up @@ -318,7 +319,7 @@ cpdef int path_finding(long origin,

# initialization of the heap elements
# all nodes have INFINITY key and NOT_IN_HEAP state
init_heap(&pqueue, <size_t>N)
init_heap(&pqueue, <size_t>M)

# the key is set to zero for the origin vertex,
# which is inserted into the heap
Expand Down Expand Up @@ -358,3 +359,160 @@ cpdef int path_finding(long origin,

free_heap(&pqueue)
return found - 1

cdef enum Heuristic:
HAVERSINE
EQUIRECTANGULAR

HEURISTIC_MAP = {"haversine": HAVERSINE, "equirectangular": EQUIRECTANGULAR}

@cython.wraparound(False)
@cython.embedsignature(True)
@cython.boundscheck(False)
cdef inline double haversine_heuristic(double lat1, double lon1, double lat2, double lon2, void* data) noexcept nogil:
"""
A haversine heuristic written to minimise expensive (trig) operations.
Arguments:
**lat1** (:obj:`double`): Latitude of destination
**lon1** (:obj:`double`): Longitude of destination
**lat2** (:obj:`double`): Latitude of node to evalutate
**lon2** (:obj:`double`): Longitude of node to evalutate
**data** (:obj:`void*`): This void pointer should hold a precomputed cos(lat1) as a double
Returns the distance between (lat1, lon1) and (lat2, lon2).
"""
cdef:
double cos_lat1 = (<double*>data)[0] # Cython doesn't support c-style derefs, use array notation instead
double dlat = lat2 - lat1
double dlon = lon2 - lon1
double sin_dlat = sin(dlat / 2)
double sin_dlon = sin(dlon / 2)
double a = sin_dlat * sin_dlat + cos_lat1 * cos(lat2) * sin_dlon * sin_dlon
return 2.0 * 6371000.0 * asin(sqrt(a))


@cython.wraparound(False)
@cython.embedsignature(True)
@cython.boundscheck(False)
cdef inline double equirectangular_heuristic(double lat1, double lon1, double lat2, double lon2, void* _data) noexcept nogil:
"""
A Equirectangular approximation heuristic, useful for small distances.
Not admissible for large distances. A* may not return the optimal path with this heuristic.
Arguments:
**lat1** (:obj:`double`): Latitude of destination
**lon1** (:obj:`double`): Longitude of destination
**lat2** (:obj:`double`): Latitude of node to evalutate
**lon2** (:obj:`double`): Longitude of node to evalutate
**data** (:obj:`void*`): Unused void pointer, for compatibilty with other heuristics
Returns the distance between (lat1, lon1) and (lat2, lon2).
Reference: https://www.movable-type.co.uk/scripts/latlong.html
"""
cdef:
double x = (lon2 - lon1) * cos((lat1 + lat2) / 2.0)
double y = (lat2 - lat1)
return 6371000.0 * sqrt(x * x + y * y)


@cython.wraparound(False)
@cython.embedsignature(True)
@cython.boundscheck(False)
cpdef int path_finding_a_star(long origin,
long destination,
double[:] graph_costs,
long long [:] csr_indices,
long long [:] graph_fs,
long long [:] nodes_to_indices,
double [:] lats,
double [:] lons,
long long [:] pred,
long long [:] ids,
long long [:] connectors,
long long [:] reached_first,
Heuristic heuristic) noexcept nogil:
"""
Based on the pseudocode presented at https://en.wikipedia.org/wiki/A*_search_algorithm#Pseudocode
The following variables have been renamed to be consistent with out Dijkstra's implementation
- openSet: pqueue
- cameFrom: pred
- fScore: pqueue.Elements[idx].key, for some idx
"""

cdef unsigned int N = graph_costs.shape[0]
cdef unsigned int M = pred.shape[0]

cdef:
size_t current, neighbour, idx # indices
DTYPE_t tail_vert_val, tentative_gScore # vertex travel times
PriorityQueue pqueue # binary heap
ElementState vert_state # vertex state
size_t origin_vert = <size_t>origin
size_t destination_vert = <size_t>destination if destination != -1 else 0
ITYPE_t found = 0
double *gScore = <double *>malloc(M * sizeof(double))

cdef:
double deg2rad = pi / 180.0
double lat1_rad = lats[destination_vert] * deg2rad
double lon1_rad = lons[destination_vert] * deg2rad
double h, cos_lat1 = cos(lat1_rad)
double (*heur)(double, double, double, double, void*) noexcept nogil
void* data

if heuristic == HAVERSINE:
heur = haversine_heuristic
data = <void*>&cos_lat1
else: # heuristic == EQUIRECTANGULAR:
heur = equirectangular_heuristic
data = <void*>NULL


for i in range(M):
pred[i] = -1
connectors[i] = -1
reached_first[i] = -1
gScore[i] = INFINITY

# initialization of the heap elements
# all nodes have INFINITY key and NOT_IN_HEAP state
init_heap(&pqueue, <size_t>M)

# the key is set to zero for the origin vertex,
# which is inserted into the heap
insert(&pqueue, origin_vert, 0.0)
gScore[origin_vert] = 0.0

# main loop
while pqueue.size > 0:
current = extract_min(&pqueue)
reached_first[found] = current
found += 1

if current == destination_vert:
break

# loop on outgoing edges
for idx in range(<size_t>graph_fs[current], <size_t>graph_fs[current + 1]):
neighbour = <size_t>csr_indices[idx]

tentative_gScore = gScore[current] + graph_costs[idx]
if tentative_gScore < gScore[neighbour]:
pred[neighbour] = current
connectors[neighbour] = ids[idx]
gScore[neighbour] = tentative_gScore

h = heur(lat1_rad, lon1_rad, lats[neighbour] * deg2rad, lons[neighbour] * deg2rad, data)

# Unlike Dijkstra's we can remove a node from the heap and rediscover it with a cheaper path
if pqueue.Elements[neighbour].state != IN_HEAP:
insert(&pqueue, neighbour, tentative_gScore + h)
else:
decrease_key(&pqueue, neighbour, tentative_gScore + h)


free_heap(&pqueue)
free(gScore)
return found - 1
8 changes: 6 additions & 2 deletions aequilibrae/paths/graph.py
Expand Up @@ -47,11 +47,13 @@ def __init__(self, logger=None):

# These are the fields actually used in computing paths
self.all_nodes = np.array(0) # Holds an array with all nodes in the original network
self.nodes_to_indices = np.array(0) # Holds the reverse of the all_nodes
self.nodes_to_indices = np.array(0, np.int64) # Holds the reverse of the all_nodes
self.fs = np.array([]) # This method will hold the forward star for the graph
self.cost = np.array([]) # This array holds the values being used in the shortest path routine
self.skims = None

self.lonlat_index = pd.DataFrame([]) # Holds a node_id to lon/lat coord index for nodes within this graph

self.compact_all_nodes = np.array(0) # Holds an array with all nodes in the original network
self.compact_nodes_to_indices = np.array(0) # Holds the reverse of the all_nodes
self.compact_fs = np.array([]) # This method will hold the forward star for the graph
Expand Down Expand Up @@ -188,7 +190,7 @@ def __build_directed_graph(self, network: pd.DataFrame, centroids: np.ndarray):

num_nodes = all_nodes.shape[0]

nodes_to_indices = np.repeat(-1, int(all_nodes.max()) + 1)
nodes_to_indices = np.full(int(all_nodes.max()) + 1, -1, dtype=np.int64)
nlist = np.arange(num_nodes)
nodes_to_indices[all_nodes] = nlist

Expand Down Expand Up @@ -303,6 +305,8 @@ def set_skimming(self, skim_fields: list) -> None:
"""
Sets the list of skims to be computed
Skimming with A* may produce results that differ from tradditional Dijkstra's due to its use a heuristic.
:Arguments:
**skim_fields** (:obj:`list`): Fields must be numeric
"""
Expand Down

0 comments on commit df559bf

Please sign in to comment.