Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed May 6, 2024
1 parent 821d9ef commit 0156bc4
Show file tree
Hide file tree
Showing 19 changed files with 124 additions and 189 deletions.
15 changes: 6 additions & 9 deletions openml/_api_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,18 +366,15 @@ def _send_request( # noqa: C901
# -- Check if encoding is not UTF-8 perhaps
if __is_checksum_equal(response.content, md5_checksum):
raise OpenMLHashException(
"Checksum of downloaded file is unequal to the expected checksum {}"
"because the text encoding is not UTF-8 when downloading {}. "
f"Checksum of downloaded file is unequal to the expected checksum {md5_checksum}"
f"because the text encoding is not UTF-8 when downloading {url}. "
"There might be a sever-sided issue with the file, "
"see: https://github.com/openml/openml-python/issues/1180.".format(
md5_checksum,
url,
),
"see: https://github.com/openml/openml-python/issues/1180.",
)

raise OpenMLHashException(
"Checksum of downloaded file is unequal to the expected checksum {} "
"when downloading {}.".format(md5_checksum, url),
f"Checksum of downloaded file is unequal to the expected checksum {md5_checksum} "
f"when downloading {url}.",
)

return response
Expand Down Expand Up @@ -443,7 +440,7 @@ def __parse_server_exception(
server_exception = xmltodict.parse(response.text)
except xml.parsers.expat.ExpatError as e:
raise e
except Exception as e: # noqa: BLE001
except Exception as e:
# OpenML has a sophisticated error system
# where information about failures is provided. try to parse this
raise OpenMLServerError(
Expand Down
3 changes: 2 additions & 1 deletion openml/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
""""Command Line Interface for `openml` to configure its settings."""
""" "Command Line Interface for `openml` to configure its settings."""

from __future__ import annotations

import argparse
Expand Down
8 changes: 4 additions & 4 deletions openml/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,17 +273,17 @@ def _setup(config: _Config | None = None) -> None:
_root_cache_directory.mkdir(exist_ok=True, parents=True)
except PermissionError:
openml_logger.warning(
"No permission to create openml cache directory at %s! This can result in "
"OpenML-Python not working properly." % _root_cache_directory,
f"No permission to create openml cache directory at {_root_cache_directory}! This can result in "
"OpenML-Python not working properly.",
)

if cache_exists:
_create_log_handlers()
else:
_create_log_handlers(create_file_handler=False)
openml_logger.warning(
"No permission to create OpenML directory at %s! This can result in OpenML-Python "
"not working properly." % config_dir,
f"No permission to create OpenML directory at {config_dir}! This can result in OpenML-Python "
"not working properly.",
)


Expand Down
8 changes: 4 additions & 4 deletions openml/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,14 +156,14 @@ def find_invalid_characters(string: str, pattern: str) -> str:
)

if dataset_id is None:
pattern = "^[\x00-\x7F]*$"
pattern = "^[\x00-\x7f]*$"
if description and not re.match(pattern, description):
# not basiclatin (XSD complains)
invalid_characters = find_invalid_characters(description, pattern)
raise ValueError(
f"Invalid symbols {invalid_characters} in description: {description}",
)
pattern = "^[\x00-\x7F]*$"
pattern = "^[\x00-\x7f]*$"
if citation and not re.match(pattern, citation):
# not basiclatin (XSD complains)
invalid_characters = find_invalid_characters(citation, pattern)
Expand Down Expand Up @@ -540,7 +540,7 @@ def _cache_compressed_file_from_file(
elif data_file.suffix == ".pq":
try:
data = pd.read_parquet(data_file)
except Exception as e: # noqa: BLE001
except Exception as e:
raise Exception(f"File: {data_file}") from e

categorical = [data[c].dtype.name == "category" for c in data.columns]
Expand Down Expand Up @@ -806,7 +806,7 @@ def get_data( # noqa: C901, PLR0912, PLR0915
to_exclude.extend(self.ignore_attribute)

if len(to_exclude) > 0:
logger.info("Going to remove the following attributes: %s" % to_exclude)
logger.info(f"Going to remove the following attributes: {to_exclude}")
keep = np.array([column not in to_exclude for column in attribute_names])
data = data.loc[:, keep] if isinstance(data, pd.DataFrame) else data[:, keep]

Expand Down
28 changes: 9 additions & 19 deletions openml/datasets/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,7 @@ def list_datasets(
*,
output_format: Literal["dataframe"],
**kwargs: Any,
) -> pd.DataFrame:
...
) -> pd.DataFrame: ...


@overload
Expand All @@ -98,8 +97,7 @@ def list_datasets(
tag: str | None,
output_format: Literal["dataframe"],
**kwargs: Any,
) -> pd.DataFrame:
...
) -> pd.DataFrame: ...


@overload
Expand All @@ -111,8 +109,7 @@ def list_datasets(
tag: str | None = ...,
output_format: Literal["dict"] = "dict",
**kwargs: Any,
) -> pd.DataFrame:
...
) -> pd.DataFrame: ...


def list_datasets(
Expand Down Expand Up @@ -207,17 +204,15 @@ def _list_datasets(
data_id: list | None = ...,
output_format: Literal["dict"] = "dict",
**kwargs: Any,
) -> dict:
...
) -> dict: ...


@overload
def _list_datasets(
data_id: list | None = ...,
output_format: Literal["dataframe"] = "dataframe",
**kwargs: Any,
) -> pd.DataFrame:
...
) -> pd.DataFrame: ...


def _list_datasets(
Expand Down Expand Up @@ -256,18 +251,16 @@ def _list_datasets(
for operator, value in kwargs.items():
api_call += f"/{operator}/{value}"
if data_id is not None:
api_call += "/data_id/%s" % ",".join([str(int(i)) for i in data_id])
api_call += "/data_id/{}".format(",".join([str(int(i)) for i in data_id]))
return __list_datasets(api_call=api_call, output_format=output_format)


@overload
def __list_datasets(api_call: str, output_format: Literal["dict"] = "dict") -> dict:
...
def __list_datasets(api_call: str, output_format: Literal["dict"] = "dict") -> dict: ...


@overload
def __list_datasets(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame:
...
def __list_datasets(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame: ...


def __list_datasets(
Expand Down Expand Up @@ -804,10 +797,7 @@ def create_dataset( # noqa: C901, PLR0912, PLR0915
if not is_row_id_an_attribute:
raise ValueError(
"'row_id_attribute' should be one of the data attribute. "
" Got '{}' while candidates are {}.".format(
row_id_attribute,
[attr[0] for attr in attributes_],
),
f" Got '{row_id_attribute}' while candidates are {[attr[0] for attr in attributes_]}.",
)

if isinstance(data, pd.DataFrame):
Expand Down
22 changes: 10 additions & 12 deletions openml/evaluations/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ def list_evaluations(
per_fold: bool | None = ...,
sort_order: str | None = ...,
output_format: Literal["dict", "object"] = "dict",
) -> dict:
...
) -> dict: ...


@overload
Expand All @@ -51,8 +50,7 @@ def list_evaluations(
per_fold: bool | None = ...,
sort_order: str | None = ...,
output_format: Literal["dataframe"] = ...,
) -> pd.DataFrame:
...
) -> pd.DataFrame: ...


def list_evaluations(
Expand Down Expand Up @@ -204,24 +202,24 @@ def _list_evaluations(
-------
dict of objects, or dataframe
"""
api_call = "evaluation/list/function/%s" % function
api_call = f"evaluation/list/function/{function}"
if kwargs is not None:
for operator, value in kwargs.items():
api_call += f"/{operator}/{value}"
if tasks is not None:
api_call += "/task/%s" % ",".join([str(int(i)) for i in tasks])
api_call += "/task/{}".format(",".join([str(int(i)) for i in tasks]))
if setups is not None:
api_call += "/setup/%s" % ",".join([str(int(i)) for i in setups])
api_call += "/setup/{}".format(",".join([str(int(i)) for i in setups]))
if flows is not None:
api_call += "/flow/%s" % ",".join([str(int(i)) for i in flows])
api_call += "/flow/{}".format(",".join([str(int(i)) for i in flows]))
if runs is not None:
api_call += "/run/%s" % ",".join([str(int(i)) for i in runs])
api_call += "/run/{}".format(",".join([str(int(i)) for i in runs]))
if uploaders is not None:
api_call += "/uploader/%s" % ",".join([str(int(i)) for i in uploaders])
api_call += "/uploader/{}".format(",".join([str(int(i)) for i in uploaders]))
if study is not None:
api_call += "/study/%d" % study
if sort_order is not None:
api_call += "/sort_order/%s" % sort_order
api_call += f"/sort_order/{sort_order}"

return __list_evaluations(api_call, output_format=output_format)

Expand All @@ -236,7 +234,7 @@ def __list_evaluations(
# Minimalistic check if the XML is useful
if "oml:evaluations" not in evals_dict:
raise ValueError(
"Error in return XML, does not contain " '"oml:evaluations": %s' % str(evals_dict),
"Error in return XML, does not contain " f'"oml:evaluations": {evals_dict!s}',
)

assert isinstance(evals_dict["oml:evaluations"]["oml:evaluation"], list), type(
Expand Down
43 changes: 17 additions & 26 deletions openml/extensions/sklearn/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ def _deserialize_sklearn( # noqa: PLR0915, C901, PLR0913, PLR0912
strict_version=strict_version,
)
else:
raise ValueError("Cannot flow_to_sklearn %s" % serialized_type)
raise ValueError(f"Cannot flow_to_sklearn {serialized_type}")

else:
rval = OrderedDict(
Expand Down Expand Up @@ -979,17 +979,15 @@ def flatten_all(list_):
# length 2 is for {VotingClassifier.estimators,
# Pipeline.steps, FeatureUnion.transformer_list}
# length 3 is for ColumnTransformer
msg = "Length of tuple of type {} does not match assumptions".format(
sub_component_type,
)
msg = f"Length of tuple of type {sub_component_type} does not match assumptions"
raise ValueError(msg)

if isinstance(sub_component, str):
if sub_component not in SKLEARN_PIPELINE_STRING_COMPONENTS:
msg = (
"Second item of tuple does not match assumptions. "
"If string, can be only 'drop' or 'passthrough' but"
"got %s" % sub_component
f"got {sub_component}"
)
raise ValueError(msg)
elif sub_component is None:
Expand All @@ -1002,15 +1000,15 @@ def flatten_all(list_):
elif not isinstance(sub_component, OpenMLFlow):
msg = (
"Second item of tuple does not match assumptions. "
"Expected OpenMLFlow, got %s" % type(sub_component)
f"Expected OpenMLFlow, got {type(sub_component)}"
)
raise TypeError(msg)

if identifier in reserved_keywords:
parent_model = f"{model.__module__}.{model.__class__.__name__}"
msg = "Found element shadowing official " "parameter for {}: {}".format(
parent_model,
identifier,
msg = (
"Found element shadowing official "
f"parameter for {parent_model}: {identifier}"
)
raise PyOpenMLError(msg)

Expand All @@ -1035,9 +1033,9 @@ def flatten_all(list_):
model=None,
)
component_reference: OrderedDict[str, str | dict] = OrderedDict()
component_reference[
"oml-python:serialized_object"
] = COMPOSITION_STEP_CONSTANT
component_reference["oml-python:serialized_object"] = (
COMPOSITION_STEP_CONSTANT
)
cr_value: dict[str, Any] = OrderedDict()
cr_value["key"] = identifier
cr_value["step_name"] = identifier
Expand Down Expand Up @@ -1218,7 +1216,7 @@ def _check_dependencies(
for dependency_string in dependencies_list:
match = DEPENDENCIES_PATTERN.match(dependency_string)
if not match:
raise ValueError("Cannot parse dependency %s" % dependency_string)
raise ValueError(f"Cannot parse dependency {dependency_string}")

dependency_name = match.group("name")
operation = match.group("operation")
Expand All @@ -1237,7 +1235,7 @@ def _check_dependencies(
installed_version > required_version or installed_version == required_version
)
else:
raise NotImplementedError("operation '%s' is not supported" % operation)
raise NotImplementedError(f"operation '{operation}' is not supported")
message = (
"Trying to deserialize a model with dependency "
f"{dependency_string} not satisfied."
Expand Down Expand Up @@ -1812,10 +1810,7 @@ def _prediction_to_probabilities(
# then we need to add a column full of zeros into the probabilities
# for class 3 because the rest of the library expects that the
# probabilities are ordered the same way as the classes are ordered).
message = "Estimator only predicted for {}/{} classes!".format(
proba_y.shape[1],
len(task.class_labels),
)
message = f"Estimator only predicted for {proba_y.shape[1]}/{len(task.class_labels)} classes!"
warnings.warn(message, stacklevel=2)
openml.config.logger.warning(message)

Expand Down Expand Up @@ -2008,9 +2003,7 @@ def is_subcomponent_specification(values):
pass
else:
raise TypeError(
"Subcomponent flow should be of type flow, but is {}".format(
type(subcomponent_flow),
),
f"Subcomponent flow should be of type flow, but is {type(subcomponent_flow)}",
)

current = {
Expand Down Expand Up @@ -2129,8 +2122,7 @@ def instantiate_model_from_hpo_class(
"""
if not self._is_hpo_class(model):
raise AssertionError(
"Flow model %s is not an instance of sklearn.model_selection._search.BaseSearchCV"
% model,
f"Flow model {model} is not an instance of sklearn.model_selection._search.BaseSearchCV",
)
base_estimator = model.estimator
base_estimator.set_params(**trace_iteration.get_parameters())
Expand Down Expand Up @@ -2192,8 +2184,7 @@ def _obtain_arff_trace(
"""
if not self._is_hpo_class(model):
raise AssertionError(
"Flow model %s is not an instance of sklearn.model_selection._search.BaseSearchCV"
% model,
f"Flow model {model} is not an instance of sklearn.model_selection._search.BaseSearchCV",
)
if not hasattr(model, "cv_results_"):
raise ValueError("model should contain `cv_results_`")
Expand Down Expand Up @@ -2228,7 +2219,7 @@ def _obtain_arff_trace(
# hyperparameter layer_sizes of MLPClassifier
type = "STRING" # noqa: A001
else:
raise TypeError("Unsupported param type in param grid: %s" % key)
raise TypeError(f"Unsupported param type in param grid: {key}")

# renamed the attribute param to parameter, as this is a required
# OpenML convention - this also guards against name collisions
Expand Down
10 changes: 4 additions & 6 deletions openml/flows/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,15 +135,13 @@ def __init__( # noqa: PLR0913
keys_parameters_meta_info = set(parameters_meta_info.keys())
if len(keys_parameters.difference(keys_parameters_meta_info)) > 0:
raise ValueError(
"Parameter %s only in parameters, but not in "
"parameters_meta_info."
% str(keys_parameters.difference(keys_parameters_meta_info)),
f"Parameter {keys_parameters.difference(keys_parameters_meta_info)!s} only in parameters, but not in "
"parameters_meta_info.",
)
if len(keys_parameters_meta_info.difference(keys_parameters)) > 0:
raise ValueError(
"Parameter %s only in parameters_meta_info, "
"but not in parameters."
% str(keys_parameters_meta_info.difference(keys_parameters)),
f"Parameter {keys_parameters_meta_info.difference(keys_parameters)!s} only in parameters_meta_info, "
"but not in parameters.",
)

self.external_version = external_version
Expand Down

0 comments on commit 0156bc4

Please sign in to comment.