Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add nx.config dict for configuring dispatching and backends #7225

Merged
merged 20 commits into from Mar 15, 2024
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion networkx/__init__.py
Expand Up @@ -17,7 +17,7 @@
from networkx.exception import *

from networkx import utils
from networkx.utils.backends import _dispatchable
from networkx.utils.backends import _dispatchable, config

from networkx import classes
from networkx.classes import filters
Expand Down
2 changes: 1 addition & 1 deletion networkx/algorithms/operators/tests/test_binary.py
Expand Up @@ -53,7 +53,7 @@ def test_intersection():
assert set(I2.nodes()) == {1, 2, 3, 4}
assert sorted(I2.edges()) == [(2, 3)]
# Only test if not performing auto convert testing of backend implementations
if not nx.utils.backends._dispatchable._automatic_backends:
if not nx.config["backend_priority"]:
with pytest.raises(TypeError):
nx.intersection(G2, H)
with pytest.raises(TypeError):
Expand Down
4 changes: 2 additions & 2 deletions networkx/classes/tests/test_backends.py
Expand Up @@ -31,8 +31,8 @@ def test_pickle():


@pytest.mark.skipif(
"not nx._dispatchable._automatic_backends "
"or nx._dispatchable._automatic_backends[0] != 'nx-loopback'"
"not nx.config['backend_priority'] "
"or nx.config['backend_priority'][0] != 'nx-loopback'"
)
def test_graph_converter_needs_backend():
# When testing, `nx.from_scipy_sparse_array` will *always* call the backend
Expand Down
16 changes: 8 additions & 8 deletions networkx/conftest.py
Expand Up @@ -45,12 +45,6 @@ def pytest_configure(config):
backend = config.getoption("--backend")
if backend is None:
backend = os.environ.get("NETWORKX_TEST_BACKEND")
if backend:
networkx.utils.backends._dispatchable._automatic_backends = [backend]
fallback_to_nx = config.getoption("--fallback-to-nx")
if not fallback_to_nx:
fallback_to_nx = os.environ.get("NETWORKX_FALLBACK_TO_NX")
networkx.utils.backends._dispatchable._fallback_to_nx = bool(fallback_to_nx)
# nx-loopback backend is only available when testing
backends = entry_points(name="nx-loopback", group="networkx.backends")
if backends:
Expand All @@ -64,16 +58,22 @@ def pytest_configure(config):
" Try `pip install -e .`, or change your PYTHONPATH\n"
" Make sure python finds the networkx repo you are testing\n\n"
)
if backend:
networkx.config["backend_priority"] = [backend]
fallback_to_nx = config.getoption("--fallback-to-nx")
if not fallback_to_nx:
fallback_to_nx = os.environ.get("NETWORKX_FALLBACK_TO_NX")
networkx.utils.backends._dispatchable._fallback_to_nx = bool(fallback_to_nx)


def pytest_collection_modifyitems(config, items):
# Setting this to True here allows tests to be set up before dispatching
# any function call to a backend.
networkx.utils.backends._dispatchable._is_testing = True
if automatic_backends := networkx.utils.backends._dispatchable._automatic_backends:
if backend_priority := networkx.config["backend_priority"]:
# Allow pluggable backends to add markers to tests (such as skip or xfail)
# when running in auto-conversion test mode
backend = networkx.utils.backends.backends[automatic_backends[0]].load()
backend = networkx.utils.backends.backends[backend_priority[0]].load()
if hasattr(backend, "on_start_tests"):
getattr(backend, "on_start_tests")(items)

Expand Down
43 changes: 32 additions & 11 deletions networkx/utils/backends.py
Expand Up @@ -106,7 +106,7 @@ class WrappedSparse:
from ..exception import NetworkXNotImplemented
from .decorators import argmap

__all__ = ["_dispatchable"]
__all__ = ["_dispatchable", "config"]


def _do_nothing():
Expand Down Expand Up @@ -142,6 +142,31 @@ def _get_backends(group, *, load_and_call=False):
backends = _get_backends("networkx.backends")
backend_info = _get_backends("networkx.backend_info", load_and_call=True)

# We must import from config after defining `backends` above
from .configs import Config, config

# Get default configuration from environment variables at import time
config.backend_priority = [
x.strip()
for x in os.environ.get(
"NETWORKX_BACKEND_PRIORITY",
os.environ.get("NETWORKX_AUTOMATIC_BACKENDS", ""),
).split(",")
if x.strip()
]
# Initialize default configuration for backends
config.backends = Config(
**{
backend: (
cfg if isinstance(cfg := info["default_config"], Config) else Config(**cfg)
)
if "default_config" in info
else Config()
for backend, info in backend_info.items()
}
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here, it might be more optimal to have a separate function(like get_info) that returns only the backend's configurations, like get_config(config_dict=None)(if config_dict is not None then the configuration dictionary is updated and returned otherwise the default configurations are returned) because config dictionary would probably be updated quite frequently and we probably shouldn't load all the backend_info every time. Please let me know if I'm missing something here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure I completely understand your comment. get_info is called exactly once at import time for each backend (before this PR and in this PR). Among other things, this helps the dispatch machinery know which backends implement which functions.

By optionally including "default_config" in the dict returned by get_info, we are able to initialize the default configuration for each backend at import time. This happens only once. This may be nice for users so they can do things like:

import networkx as nx
nx.backend_config["parallel"]["n_job"] = 5

nx.backend_config is how we expect users to update configuration.

We could make a separate entry point for the default config, but I don't know if it would be worth doing since it seems to me that the "backend_info" entry point works well enough.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, I thought this kind of thing was possible :

import networkx as nx
nx.backend_config["parallel"]["n_job"] = 5
# defining G
print(nx.betweenness_centrality(G, backend="parallel"))

nx.backend_config["parallel"]["n_job"] = -1
nx.backend_config["parallel"]["backend"] = 'threading'
print(nx.betweenness_centrality(G, backend="parallel"))

so I thought we would be extracting all the configurations and adding them in the joblib.Parallel() in all the functions, so having a separate get_config function made sense to me.

But, I think, based on the clarification above, that all the configuration code lines will be executed while importing networkx so the final configuration for both the function calls would be {"n_jobs" : -1, "backend" : "threading"}, right?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coming back to this question:

  • I think the code you write above would work with this configuration system.
  • the default values will be obtained from the backend_info get_info function.
  • the user overrides those values as desired for each function call.
  • the function dispatches to nx-parallel which looks at the config to get the values to send to joblib.

I think your get_config function would live in nx-parallel and look at nx.config.backends.parallel.<name> for each option. Or, we could use the whole config object like: joblib.Parallel(**nx.config.backends.parallel). But the user would be able to change them between each function call.

type(config.backends).__doc__ = "All installed NetworkX backends and their configs."

# Load and cache backends on-demand
_loaded_backends = {} # type: ignore[var-annotated]

Expand Down Expand Up @@ -180,11 +205,6 @@ class _dispatchable:
_fallback_to_nx = (
os.environ.get("NETWORKX_FALLBACK_TO_NX", "true").strip().lower() == "true"
)
_automatic_backends = [
x.strip()
for x in os.environ.get("NETWORKX_AUTOMATIC_BACKENDS", "").split(",")
if x.strip()
]

def __new__(
cls,
Expand Down Expand Up @@ -532,11 +552,12 @@ def __call__(self, /, *args, backend=None, **kwargs):
for g in graphs_resolved.values()
}

if self._is_testing and self._automatic_backends and backend_name is None:
backend_priority = config.backend_priority
if self._is_testing and backend_priority and backend_name is None:
# Special path if we are running networkx tests with a backend.
# This even runs for (and handles) functions that mutate input graphs.
return self._convert_and_call_for_tests(
self._automatic_backends[0],
backend_priority[0],
args,
kwargs,
fallback_to_nx=self._fallback_to_nx,
Expand All @@ -563,7 +584,7 @@ def __call__(self, /, *args, backend=None, **kwargs):
raise ImportError(f"Unable to load backend: {graph_backend_name}")
if (
"networkx" in graph_backend_names
and graph_backend_name not in self._automatic_backends
and graph_backend_name not in backend_priority
):
# Not configured to convert networkx graphs to this backend
raise TypeError(
Expand All @@ -584,7 +605,7 @@ def __call__(self, /, *args, backend=None, **kwargs):
)
# All graphs are backend graphs--no need to convert!
return getattr(backend, self.name)(*args, **kwargs)
# Future work: try to convert and run with other backends in self._automatic_backends
# Future work: try to convert and run with other backends in backend_priority
raise NetworkXNotImplemented(
f"'{self.name}' not implemented by {graph_backend_name}"
)
Expand Down Expand Up @@ -622,7 +643,7 @@ def __call__(self, /, *args, backend=None, **kwargs):
)
):
# Should we warn or log if we don't convert b/c the input will be mutated?
for backend_name in self._automatic_backends:
for backend_name in backend_priority:
if self._should_backend_run(backend_name, *args, **kwargs):
return self._convert_and_call(
backend_name,
Expand Down
183 changes: 183 additions & 0 deletions networkx/utils/configs.py
@@ -0,0 +1,183 @@
import collections
import types
import typing
from dataclasses import dataclass

__all__ = ["Config", "config"]


@dataclass(init=False, eq=False, slots=True, kw_only=True, match_args=False)
class Config:
"""The base class for NetworkX configuration.

There are two ways to use this to create configurations. The first is to
simply pass the initial configuration as keyword arguments to ``Config``:

>>> cfg = Config(eggs=1, spam=5)
>>> cfg
Config(eggs=1, spam=5)

The second--and preferred--way is to subclass ``Config`` with docs and annotations.

>>> class MyConfig(Config):
... '''Breakfast!'''
...
... eggs: int
... spam: int
...
... def _check_config(self, key, value):
... assert isinstance(value, int) and value >= 0
>>> cfg = MyConfig(eggs=1, spam=5)

Once defined, configuration items may be modified, but can't be added or deleted.
``Config`` is a ``Mapping``, and can get and set configs via attributes or brackets:

>>> cfg.eggs = 2
>>> cfg.eggs
2
>>> cfg["spam"] = 42
>>> cfg["spam"]
42

Subclasses may also define ``_check_config`` (as done in the example above)
to ensure the value being assigned is valid:

>>> cfg.spam = -1
Traceback (most recent call last):
...
AssertionError

"""

def __new__(cls, **kwargs):
orig_class = cls
if cls is Config:
# Enable the "simple" case of accepting config definition as keywords
cls = type(
cls.__name__,
(cls,),
{"__annotations__": {key: typing.Any for key in kwargs}},
)
cls = dataclass(eq=False, slots=True, kw_only=True, match_args=False)(cls)
cls._orig_class = orig_class # Save original class so we can pickle
instance = object.__new__(cls)
instance.__init__(**kwargs)
return instance

def _check_config(self, key, value):
"""Check whether config value is valid. This is useful for subclasses."""

# Control behavior of attributes
def __dir__(self):
return self.__dataclass_fields__.keys()

def __setattr__(self, key, value):
if key not in self.__dataclass_fields__:
raise AttributeError(f"Invalid config name: {key!r}")
self._check_config(key, value)
object.__setattr__(self, key, value)

def __delattr__(self, key):
raise TypeError(f"Configuration items can't be deleted (can't delete {key!r}).")

# Be a `collection.abc.Collection`
def __contains__(self, key):
return key in self.__dataclass_fields__

def __iter__(self):
return iter(self.__dataclass_fields__)

def __len__(self):
return len(self.__dataclass_fields__)

def __reversed__(self):
return reversed(self.__dataclass_fields__)

# Add dunder methods for `collections.abc.Mapping`
def __getitem__(self, key):
try:
return getattr(self, key)
except AttributeError as err:
raise KeyError(*err.args) from None

def __setitem__(self, key, value):
try:
setattr(self, key, value)
except AttributeError as err:
raise KeyError(*err.args) from None

__delitem__ = __delattr__
_ipython_key_completions_ = __dir__ # config["<TAB>

# Go ahead and make it a `collections.abc.Mapping`
def get(self, key, default=None):
return getattr(self, key, default)

def items(self):
return collections.abc.ItemsView(self)

def keys(self):
return collections.abc.KeysView(self)

def values(self):
return collections.abc.ValuesView(self)

# dataclass can define __eq__ for us, but do it here so it works after pickling
def __eq__(self, other):
if not isinstance(other, Config):
return NotImplemented
return self._orig_class == other._orig_class and self.items() == other.items()

# Make pickle work
def __reduce__(self):
return self._deserialize, (self._orig_class, dict(self))

@staticmethod
def _deserialize(cls, kwargs):
return cls(**kwargs)

# Make type annotations work with key and value types; e.g. Config[str, int]
def __class_getitem__(cls, item):
return types.GenericAlias(cls, item)


# Register, b/c `Mapping.__subclasshook__` returns `NotImplemented`
collections.abc.Mapping.register(Config)


class NetworkXConfig(Config):
"""Write me!"""

backend_priority: list[str]
backends: Config[str, Config]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mypy reminds me again that I still know very little about type annotations. mypy doesn't like Config[str, Config].

My "solution" will be to delete __class_getitem__ so this syntax isn't even possible. The working solution probably includes Generic and TypeVar and who knows what. Oh well... I thought this was worth trying.


def _check_config(self, key, value):
from .backends import backends

if key == "backend_priority":
if not (isinstance(value, list) and all(isinstance(x, str) for x in value)):
raise TypeError(
f"{key!r} config must be a list of backend names; got {value!r}"
)
if missing := {x for x in value if x not in backends}:
missing = ", ".join(map(repr, sorted(missing)))
raise ValueError(f"Unknown backend when setting {key!r}: {missing}")
elif key == "backends":
if not (
isinstance(value, Config)
and all(isinstance(key, str) for key in value)
and all(isinstance(val, Config) for val in value.values())
):
raise TypeError(
f"{key!r} config must be a Config of backend configs; got {value!r}"
)
if missing := {x for x in value if x not in backends}:
missing = ", ".join(map(repr, sorted(missing)))
raise ValueError(f"Unknown backend when setting {key!r}: {missing}")


# Backend configuration will be updated in backends.py
config = NetworkXConfig(
backend_priority=[],
backends=Config(),
)