Initial A* implementation (#467)

* Initial A* implementation This implementation uses a haversine distance heuristic, in theory this a consistent heuristic [1] that would give us "an optimal path without processing any node more than once" when the graph cost is set to distance, however, in testing I've found that the distance calculations based and lon/lat done by myself and QGIS do not align with those in the project. Unfortunately this makes the heuristic not admissible [2] meaning A* will not always find the least-cost path, and may not do the minimum amount of work. From some basic instrumenting I found A* is consistently an order of magnitude better than the existing Dijkstra's implementation when it comes to heap operation counts. Time wise I haven't performed real benchmarks. Currently there is only one heuristic implemented for distance, if the graph cost field is set to `free_flow_time` the heuristic, while not useless is theory, it is practically as the magnitudes of the distances dominates any graph cost meaning A* will explore the whole network, just like Dijkstra's. [1] https://en.wikipedia.org/wiki/Consistent_heuristic [2] https://en.wikipedia.org/wiki/Admissible_heuristic * Indexing fixes found on Arkansas network * Updates graph types * A* improvements and corrections, equirectangular heuristic added * Update "without a model" example * fixup! A* improvements and corrections, equirectangular heuristic added * Set lon/lat index manually instead of passing to prepare_graph * fixup! Set lon/lat index manually instead of passing to prepare_graph * Split path computation and skimming example, use Coquimbo over Sioux * Add note about skimming with A* * General clean up * Extend existing test to include A* via subtests * Style * Add runtime heuristic switching * Document heuristic switching * Heuristic swithcing tests * Python 3.7 compatibility * Missed changes * Add note for A* non-distance metrics * Bumps up version for release --------- Co-authored-by: pveigadecamargo <pveigadecamargo@anl.gov>
AequilibraE · Nov 25, 2023 · df559bf · df559bf
1 parent 3d7f017
commit df559bf
Show file tree

Hide file tree

Showing 14 changed files with 576 additions and 174 deletions.
diff --git a/__version__.py b/__version__.py
@@ -1,5 +1,5 @@
 version = 0.9
-minor_version = "4"
+minor_version = "5"
 release_name = "Queluz"
 
 release_version = f"{version}.{minor_version}"
diff --git a/aequilibrae/paths/AoN.pyx b/aequilibrae/paths/AoN.pyx
@@ -170,17 +170,16 @@ def one_to_all(origin, matrix, graph, result, aux_result, curr_thread):
         save_path_file(origin_index, links, zones, predecessors_view, conn_view, path_file_base, path_index_file_base, write_feather)
     return origin
 
-def path_computation(origin, destination, graph, results, early_exit = False):
+def path_computation(origin, destination, graph, results):
     # type: (int, int, Graph, PathResults) -> (None)
     """
     :param graph: AequilibraE graph. Needs to have been set with number of centroids and list of skims (if any)
     :param results: AequilibraE Matrix properly set for computation using matrix.computational_view([matrix list])
     :param skimming: if we will skim for all nodes or not
-    :param early_exit: Exit Dijkstra's once the destination has been found. Constructs a partial shortest path tree.
     """
     cdef ITYPE_t nodes, orig, dest, p, b, origin_index, dest_index, connector, zones
     cdef long i, j, skims, a, block_flows_through_centroids
-    cdef bint early_exit_c = early_exit
+    cdef bint early_exit_bint = results.early_exit
 
     results.origin = origin
     results.destination = destination
@@ -219,6 +218,18 @@ def path_computation(origin, destination, graph, results, early_exit = False):
     new_b_nodes = graph.graph.b_node.values.copy()
     cdef long long [:] b_nodes_view = new_b_nodes
 
+    cdef bint a_star_bint = results.a_star
+    cdef double [:] lat_view
+    cdef double [:] lon_view
+    cdef long long [:] nodes_to_indices_view
+    cdef Heuristic heuristic
+    if results.a_star:
+        lat_view = graph.lonlat_index.lat.values
+        lon_view = graph.lonlat_index.lon.values
+        nodes_to_indices_view = graph.nodes_to_indices
+        heuristic = HEURISTIC_MAP[results._heuristic]
+
+
     #Now we do all procedures with NO GIL
     with nogil:
         if block_flows_through_centroids: # Unblocks the centroid if that is the case
@@ -230,15 +241,31 @@ def path_computation(origin, destination, graph, results, early_exit = False):
                                     b_nodes_view,
                                     original_b_nodes_view)
 
-        w = path_finding(origin_index,
-                         dest_index if early_exit_c else -1,
-                         g_view,
-                         b_nodes_view,
-                         graph_fs_view,
-                         predecessors_view,
-                         ids_graph_view,
-                         conn_view,
-                         reached_first_view)
+        if a_star_bint:
+            w = path_finding_a_star(origin_index,
+                                    dest_index,
+                                    g_view,
+                                    b_nodes_view,
+                                    graph_fs_view,
+                                    nodes_to_indices_view,
+                                    lat_view,
+                                    lon_view,
+                                    predecessors_view,
+                                    ids_graph_view,
+                                    conn_view,
+                                    reached_first_view,
+                                    heuristic)
+        else:
+            w = path_finding(origin_index,
+                             dest_index if early_exit_bint else -1,
+                             g_view,
+                             b_nodes_view,
+                             graph_fs_view,
+                             predecessors_view,
+                             ids_graph_view,
+                             conn_view,
+                             reached_first_view)
+
 
         if skims > 0:
             skim_single_path(origin_index,
@@ -290,9 +317,12 @@ def path_computation(origin, destination, graph, results, early_exit = False):
         results.path_link_directions = None
         results.milepost = None
 
-def update_path_trace(results, destination, graph, early_exit = False):
+def update_path_trace(results, destination, graph):
     # type: (PathResults, int, Graph) -> (None)
     """
+    If `results.early_exit` is `True`, early exit will be enabled if the path is to be recomputed.
+    If `results.a_star` is `True`, A* will be used if the path is to be recomputed.
+
     :param graph: AequilibraE graph. Needs to have been set with number of centroids and list of skims (if any)
     :param results: AequilibraE Matrix properly set for computation using matrix.computational_view([matrix list])
     :param skimming: if we will skim for all nodes or not
@@ -312,8 +342,10 @@ def update_path_trace(results, destination, graph, early_exit = False):
 
         # If the predecessor is -1 and early exit was enabled we cannot differentiate between
         # an unreachable node and one we just didn't see yet. We need to recompute the tree with the new destination
-        if results.predecessors[dest_index] == -1 and results._early_exit:
-            results.compute_path(results.origin, destination, early_exit=early_exit)
+        # If `a_star` was enabled then the stored tree has no guarantees and may not be useful due to the heuristic used
+        # TODO: revisit with heuristic specific reuse logic
+        if results.predecessors[dest_index] == -1 and results._early_exit or results._a_star:
+            results.compute_path(results.origin, destination, early_exit=results.early_exit, a_star=results.a_star)
 
         # By the invariant hypothesis presented at https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm#Proof_of_correctness
         # Dijkstra's algorithm produces the shortest path tree for all scanned nodes. That is if a node was scanned,

diff --git a/aequilibrae/paths/basic_path_finding.pyx b/aequilibrae/paths/basic_path_finding.pyx
@@ -9,8 +9,9 @@ LIST OF ALL THE THINGS WE NEED TO DO TO NOT HAVE TO HAVE nodes 1..n as CENTROIDS
 - Re-write function **network_loading** on the part of loading flows to centroids
 """
 cimport cython
-from libc.math cimport isnan, INFINITY
+from libc.math cimport isnan, INFINITY, sin, cos, asin, sqrt, pi
 from libc.string cimport memset
+from libc.stdlib cimport malloc, free
 
 include 'pq_4ary_heap.pyx'
 
@@ -318,7 +319,7 @@ cpdef int path_finding(long origin,
 
     # initialization of the heap elements
     # all nodes have INFINITY key and NOT_IN_HEAP state
-    init_heap(&pqueue, <size_t>N)
+    init_heap(&pqueue, <size_t>M)
 
     # the key is set to zero for the origin vertex,
     # which is inserted into the heap
@@ -358,3 +359,160 @@ cpdef int path_finding(long origin,
 
     free_heap(&pqueue)
     return found - 1
+
+cdef enum Heuristic:
+    HAVERSINE
+    EQUIRECTANGULAR
+
+HEURISTIC_MAP = {"haversine": HAVERSINE, "equirectangular": EQUIRECTANGULAR}
+
+@cython.wraparound(False)
+@cython.embedsignature(True)
+@cython.boundscheck(False)
+cdef inline double haversine_heuristic(double lat1, double lon1, double lat2, double lon2, void* data) noexcept nogil:
+    """
+    A haversine heuristic written to minimise expensive (trig) operations.
+
+    Arguments:
+        **lat1** (:obj:`double`): Latitude of destination
+        **lon1** (:obj:`double`): Longitude of destination
+        **lat2** (:obj:`double`): Latitude of node to evalutate
+        **lon2** (:obj:`double`): Longitude of node to evalutate
+        **data** (:obj:`void*`): This void pointer should hold a precomputed cos(lat1) as a double
+
+    Returns the distance between (lat1, lon1) and (lat2, lon2).
+    """
+    cdef:
+        double cos_lat1 = (<double*>data)[0]  # Cython doesn't support c-style derefs, use array notation instead
+        double dlat = lat2 - lat1
+        double dlon = lon2 - lon1
+        double sin_dlat = sin(dlat / 2)
+        double sin_dlon = sin(dlon / 2)
+        double a = sin_dlat * sin_dlat + cos_lat1 * cos(lat2) * sin_dlon * sin_dlon
+    return 2.0 * 6371000.0 * asin(sqrt(a))
+
+
+@cython.wraparound(False)
+@cython.embedsignature(True)
+@cython.boundscheck(False)
+cdef inline double equirectangular_heuristic(double lat1, double lon1, double lat2, double lon2, void* _data) noexcept nogil:
+    """
+    A Equirectangular approximation heuristic, useful for small distances.
+    Not admissible for large distances. A* may not return the optimal path with this heuristic.
+
+    Arguments:
+        **lat1** (:obj:`double`): Latitude of destination
+        **lon1** (:obj:`double`): Longitude of destination
+        **lat2** (:obj:`double`): Latitude of node to evalutate
+        **lon2** (:obj:`double`): Longitude of node to evalutate
+        **data** (:obj:`void*`): Unused void pointer, for compatibilty with other heuristics
+
+    Returns the distance between (lat1, lon1) and (lat2, lon2).
+
+    Reference: https://www.movable-type.co.uk/scripts/latlong.html
+    """
+    cdef:
+        double x = (lon2 - lon1) * cos((lat1 + lat2) / 2.0)
+        double y = (lat2 - lat1)
+    return 6371000.0 * sqrt(x * x + y * y)
+
+
+@cython.wraparound(False)
+@cython.embedsignature(True)
+@cython.boundscheck(False)
+cpdef int path_finding_a_star(long origin,
+                              long destination,
+                              double[:] graph_costs,
+                              long long [:] csr_indices,
+                              long long [:] graph_fs,
+                              long long [:] nodes_to_indices,
+                              double [:] lats,
+                              double [:] lons,
+                              long long [:] pred,
+                              long long [:] ids,
+                              long long [:] connectors,
+                              long long [:] reached_first,
+                              Heuristic heuristic) noexcept nogil:
+    """
+    Based on the pseudocode presented at https://en.wikipedia.org/wiki/A*_search_algorithm#Pseudocode
+    The following variables have been renamed to be consistent with out Dijkstra's implementation
+     - openSet: pqueue
+     - cameFrom: pred
+     - fScore: pqueue.Elements[idx].key, for some idx
+    """
+
+    cdef unsigned int N = graph_costs.shape[0]
+    cdef unsigned int M = pred.shape[0]
+
+    cdef:
+        size_t current, neighbour, idx # indices
+        DTYPE_t tail_vert_val, tentative_gScore  # vertex travel times
+        PriorityQueue pqueue  # binary heap
+        ElementState vert_state  # vertex state
+        size_t origin_vert = <size_t>origin
+        size_t destination_vert = <size_t>destination if destination != -1 else 0
+        ITYPE_t found = 0
+        double *gScore = <double *>malloc(M * sizeof(double))
+
+    cdef:
+        double deg2rad = pi / 180.0
+        double lat1_rad = lats[destination_vert] * deg2rad
+        double lon1_rad = lons[destination_vert] * deg2rad
+        double h, cos_lat1 = cos(lat1_rad)
+        double (*heur)(double, double, double, double, void*) noexcept nogil
+        void* data
+
+    if heuristic == HAVERSINE:
+        heur = haversine_heuristic
+        data = <void*>&cos_lat1
+    else:  # heuristic == EQUIRECTANGULAR:
+        heur = equirectangular_heuristic
+        data = <void*>NULL
+
+
+    for i in range(M):
+        pred[i] = -1
+        connectors[i] = -1
+        reached_first[i] = -1
+        gScore[i] = INFINITY
+
+    # initialization of the heap elements
+    # all nodes have INFINITY key and NOT_IN_HEAP state
+    init_heap(&pqueue, <size_t>M)
+
+    # the key is set to zero for the origin vertex,
+    # which is inserted into the heap
+    insert(&pqueue, origin_vert, 0.0)
+    gScore[origin_vert] = 0.0
+
+    # main loop
+    while pqueue.size > 0:
+        current = extract_min(&pqueue)
+        reached_first[found] = current
+        found += 1
+
+        if current == destination_vert:
+            break
+
+        # loop on outgoing edges
+        for idx in range(<size_t>graph_fs[current], <size_t>graph_fs[current + 1]):
+            neighbour = <size_t>csr_indices[idx]
+
+            tentative_gScore = gScore[current] + graph_costs[idx]
+            if tentative_gScore < gScore[neighbour]:
+                pred[neighbour] = current
+                connectors[neighbour] = ids[idx]
+                gScore[neighbour] = tentative_gScore
+
+                h = heur(lat1_rad, lon1_rad, lats[neighbour] * deg2rad, lons[neighbour] * deg2rad, data)
+
+                # Unlike Dijkstra's we can remove a node from the heap and rediscover it with a cheaper path
+                if pqueue.Elements[neighbour].state != IN_HEAP:
+                    insert(&pqueue, neighbour, tentative_gScore + h)
+                else:
+                    decrease_key(&pqueue, neighbour, tentative_gScore + h)
+
+
+    free_heap(&pqueue)
+    free(gScore)
+    return found - 1
diff --git a/aequilibrae/paths/graph.py b/aequilibrae/paths/graph.py
@@ -47,11 +47,13 @@ def __init__(self, logger=None):
 
         # These are the fields actually used in computing paths
         self.all_nodes = np.array(0)  # Holds an array with all nodes in the original network
-        self.nodes_to_indices = np.array(0)  # Holds the reverse of the all_nodes
+        self.nodes_to_indices = np.array(0, np.int64)  # Holds the reverse of the all_nodes
         self.fs = np.array([])  # This method will hold the forward star for the graph
         self.cost = np.array([])  # This array holds the values being used in the shortest path routine
         self.skims = None
 
+        self.lonlat_index = pd.DataFrame([])  # Holds a node_id to lon/lat coord index for nodes within this graph
+
         self.compact_all_nodes = np.array(0)  # Holds an array with all nodes in the original network
         self.compact_nodes_to_indices = np.array(0)  # Holds the reverse of the all_nodes
         self.compact_fs = np.array([])  # This method will hold the forward star for the graph
@@ -188,7 +190,7 @@ def __build_directed_graph(self, network: pd.DataFrame, centroids: np.ndarray):
 
         num_nodes = all_nodes.shape[0]
 
-        nodes_to_indices = np.repeat(-1, int(all_nodes.max()) + 1)
+        nodes_to_indices = np.full(int(all_nodes.max()) + 1, -1, dtype=np.int64)
         nlist = np.arange(num_nodes)
         nodes_to_indices[all_nodes] = nlist
 
@@ -303,6 +305,8 @@ def set_skimming(self, skim_fields: list) -> None:
         """
         Sets the list of skims to be computed
 
+        Skimming with A* may produce results that differ from tradditional Dijkstra's due to its use a heuristic.
+
         :Arguments:
             **skim_fields** (:obj:`list`): Fields must be numeric
         """