Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add nx.config dict for configuring dispatching and backends #7225

Merged
merged 20 commits into from Mar 15, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion networkx/__init__.py
Expand Up @@ -17,7 +17,7 @@
from networkx.exception import *

from networkx import utils
from networkx.utils.backends import _dispatchable
from networkx.utils.backends import _dispatchable, backend_config

from networkx import classes
from networkx.classes import filters
Expand Down
2 changes: 1 addition & 1 deletion networkx/algorithms/operators/tests/test_binary.py
Expand Up @@ -53,7 +53,7 @@ def test_intersection():
assert set(I2.nodes()) == {1, 2, 3, 4}
assert sorted(I2.edges()) == [(2, 3)]
# Only test if not performing auto convert testing of backend implementations
if not nx.utils.backends._dispatchable._automatic_backends:
if not nx.backend_config["automatic_backends"]:
with pytest.raises(TypeError):
nx.intersection(G2, H)
with pytest.raises(TypeError):
Expand Down
4 changes: 2 additions & 2 deletions networkx/classes/tests/test_backends.py
Expand Up @@ -23,8 +23,8 @@ def test_pickle():


@pytest.mark.skipif(
"not nx._dispatchable._automatic_backends "
"or nx._dispatchable._automatic_backends[0] != 'nx-loopback'"
"not nx.backend_config['automatic_backends'] "
"or nx.backend_config['automatic_backends'][0] != 'nx-loopback'"
)
def test_graph_converter_needs_backend():
# When testing, `nx.from_scipy_sparse_array` will *always* call the backend
Expand Down
6 changes: 3 additions & 3 deletions networkx/conftest.py
Expand Up @@ -46,11 +46,11 @@ def pytest_configure(config):
if backend is None:
backend = os.environ.get("NETWORKX_TEST_BACKEND")
if backend:
networkx.utils.backends._dispatchable._automatic_backends = [backend]
networkx.backend_config["automatic_backends"] = [backend]
fallback_to_nx = config.getoption("--fallback-to-nx")
if not fallback_to_nx:
fallback_to_nx = os.environ.get("NETWORKX_FALLBACK_TO_NX")
networkx.utils.backends._dispatchable._fallback_to_nx = bool(fallback_to_nx)
networkx.backend_config["fallback_to_nx"] = bool(fallback_to_nx)
# nx-loopback backend is only available when testing
backends = entry_points(name="nx-loopback", group="networkx.backends")
if backends:
Expand All @@ -70,7 +70,7 @@ def pytest_collection_modifyitems(config, items):
# Setting this to True here allows tests to be set up before dispatching
# any function call to a backend.
networkx.utils.backends._dispatchable._is_testing = True
if automatic_backends := networkx.utils.backends._dispatchable._automatic_backends:
if automatic_backends := networkx.backend_config["automatic_backends"]:
# Allow pluggable backends to add markers to tests (such as skip or xfail)
# when running in auto-conversion test mode
backend = networkx.utils.backends.backends[automatic_backends[0]].load()
Expand Down
45 changes: 28 additions & 17 deletions networkx/utils/backends.py
Expand Up @@ -95,7 +95,19 @@ class WrappedSparse:

from ..exception import NetworkXNotImplemented

__all__ = ["_dispatchable"]
__all__ = ["_dispatchable", "backend_config"]

# Get default configuration from environment variables at import time.
# Default backend config will be initialized from `backend_info` just below.
backend_config = {
"fallback_to_nx": os.environ.get("NETWORKX_FALLBACK_TO_NX", "true").strip().lower()
== "true",
"automatic_backends": [
x.strip()
for x in os.environ.get("NETWORKX_AUTOMATIC_BACKENDS", "").split(",")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what does NETWORKX_AUTOMATIC_BACKENDS mean here? like what's its functionality? is it the default backend?
Sorry, I don't think I understand all of the backend functionality fully, so sorry if I am being repetitive here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No worries, thanks for asking!

Previously, configuring backends was only possible via environment variables. NETWORKX_AUTOMATIC_BACKENDS environment variable is a comma separated list of backends that indicates which backends we want to automatically convert to and run when e.g. the input graph is a NetworkX graph. This is a prioritized list, so backends listed first are tried first.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okay, thank you :)

if x.strip()
],
}


def _get_backends(group, *, load_and_call=False):
Expand Down Expand Up @@ -126,6 +138,12 @@ def _get_backends(group, *, load_and_call=False):

backends = _get_backends("networkx.backends")
backend_info = _get_backends("networkx.backend_info", load_and_call=True)
# Initialize default configuration from backends (if provided)
backend_config.update(
(backend, info["default_config"])
for backend, info in backend_info.items()
if "default_config" in info
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here, it might be more optimal to have a separate function(like get_info) that returns only the backend's configurations, like get_config(config_dict=None)(if config_dict is not None then the configuration dictionary is updated and returned otherwise the default configurations are returned) because config dictionary would probably be updated quite frequently and we probably shouldn't load all the backend_info every time. Please let me know if I'm missing something here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure I completely understand your comment. get_info is called exactly once at import time for each backend (before this PR and in this PR). Among other things, this helps the dispatch machinery know which backends implement which functions.

By optionally including "default_config" in the dict returned by get_info, we are able to initialize the default configuration for each backend at import time. This happens only once. This may be nice for users so they can do things like:

import networkx as nx
nx.backend_config["parallel"]["n_job"] = 5

nx.backend_config is how we expect users to update configuration.

We could make a separate entry point for the default config, but I don't know if it would be worth doing since it seems to me that the "backend_info" entry point works well enough.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, I thought this kind of thing was possible :

import networkx as nx
nx.backend_config["parallel"]["n_job"] = 5
# defining G
print(nx.betweenness_centrality(G, backend="parallel"))

nx.backend_config["parallel"]["n_job"] = -1
nx.backend_config["parallel"]["backend"] = 'threading'
print(nx.betweenness_centrality(G, backend="parallel"))

so I thought we would be extracting all the configurations and adding them in the joblib.Parallel() in all the functions, so having a separate get_config function made sense to me.

But, I think, based on the clarification above, that all the configuration code lines will be executed while importing networkx so the final configuration for both the function calls would be {"n_jobs" : -1, "backend" : "threading"}, right?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coming back to this question:

  • I think the code you write above would work with this configuration system.
  • the default values will be obtained from the backend_info get_info function.
  • the user overrides those values as desired for each function call.
  • the function dispatches to nx-parallel which looks at the config to get the values to send to joblib.

I think your get_config function would live in nx-parallel and look at nx.config.backends.parallel.<name> for each option. Or, we could use the whole config object like: joblib.Parallel(**nx.config.backends.parallel). But the user would be able to change them between each function call.


# Load and cache backends on-demand
_loaded_backends = {} # type: ignore[var-annotated]
Expand Down Expand Up @@ -154,14 +172,6 @@ class _dispatchable:
# For example: `PYTHONPATH=. pytest --backend graphblas --fallback-to-nx`
# Future work: add configuration to control these
_is_testing = False
_fallback_to_nx = (
os.environ.get("NETWORKX_FALLBACK_TO_NX", "true").strip().lower() == "true"
)
_automatic_backends = [
x.strip()
for x in os.environ.get("NETWORKX_AUTOMATIC_BACKENDS", "").split(",")
if x.strip()
]

def __new__(
cls,
Expand Down Expand Up @@ -436,13 +446,14 @@ def __call__(self, /, *args, backend=None, **kwargs):
# if (val := args[pos] if pos < len(args) else kwargs.get(gname)) is not None
# }

if self._is_testing and self._automatic_backends and backend_name is None:
automatic_backends = backend_config["automatic_backends"]
if self._is_testing and automatic_backends and backend_name is None:
# Special path if we are running networkx tests with a backend.
return self._convert_and_call_for_tests(
self._automatic_backends[0],
automatic_backends[0],
args,
kwargs,
fallback_to_nx=self._fallback_to_nx,
fallback_to_nx=backend_config["fallback_to_nx"],
)

# Check if any graph comes from a backend
Expand Down Expand Up @@ -504,7 +515,7 @@ def __call__(self, /, *args, backend=None, **kwargs):
raise ImportError(f"Unable to load backend: {graph_backend_name}")
if (
"networkx" in graph_backend_names
and graph_backend_name not in self._automatic_backends
and graph_backend_name not in automatic_backends
):
# Not configured to convert networkx graphs to this backend
raise TypeError(
Expand All @@ -520,11 +531,11 @@ def __call__(self, /, *args, backend=None, **kwargs):
graph_backend_name,
args,
kwargs,
fallback_to_nx=self._fallback_to_nx,
fallback_to_nx=backend_config["fallback_to_nx"],
)
# All graphs are backend graphs--no need to convert!
return getattr(backend, self.name)(*args, **kwargs)
# Future work: try to convert and run with other backends in self._automatic_backends
# Future work: try to convert and run with other backends in automatic_backends
raise NetworkXNotImplemented(
f"'{self.name}' not implemented by {graph_backend_name}"
)
Expand All @@ -538,13 +549,13 @@ def __call__(self, /, *args, backend=None, **kwargs):
# Only networkx graphs; try to convert and run with a backend with automatic
# conversion, but don't do this by default for graph generators or loaders.
if self.graphs:
for backend_name in self._automatic_backends:
for backend_name in automatic_backends:
if self._can_backend_run(backend_name, *args, **kwargs):
return self._convert_and_call(
backend_name,
args,
kwargs,
fallback_to_nx=self._fallback_to_nx,
fallback_to_nx=backend_config["fallback_to_nx"],
)
# Default: run with networkx on networkx inputs
return self.orig_func(*args, **kwargs)
Expand Down