[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
openml · May 6, 2024 · 0156bc4 · 0156bc4
1 parent 821d9ef
commit 0156bc4
Show file tree

Hide file tree

Showing 19 changed files with 124 additions and 189 deletions.
diff --git a/openml/_api_calls.py b/openml/_api_calls.py
@@ -366,18 +366,15 @@ def _send_request(  # noqa: C901
                     # -- Check if encoding is not UTF-8 perhaps
                     if __is_checksum_equal(response.content, md5_checksum):
                         raise OpenMLHashException(
-                            "Checksum of downloaded file is unequal to the expected checksum {}"
-                            "because the text encoding is not UTF-8 when downloading {}. "
+                            f"Checksum of downloaded file is unequal to the expected checksum {md5_checksum}"
+                            f"because the text encoding is not UTF-8 when downloading {url}. "
                             "There might be a sever-sided issue with the file, "
-                            "see: https://github.com/openml/openml-python/issues/1180.".format(
-                                md5_checksum,
-                                url,
-                            ),
+                            "see: https://github.com/openml/openml-python/issues/1180.",
                         )
 
                     raise OpenMLHashException(
-                        "Checksum of downloaded file is unequal to the expected checksum {} "
-                        "when downloading {}.".format(md5_checksum, url),
+                        f"Checksum of downloaded file is unequal to the expected checksum {md5_checksum} "
+                        f"when downloading {url}.",
                     )
 
                 return response
@@ -443,7 +440,7 @@ def __parse_server_exception(
         server_exception = xmltodict.parse(response.text)
     except xml.parsers.expat.ExpatError as e:
         raise e
-    except Exception as e:  # noqa: BLE001
+    except Exception as e:
         # OpenML has a sophisticated error system
         # where information about failures is provided. try to parse this
         raise OpenMLServerError(

diff --git a/openml/cli.py b/openml/cli.py
@@ -1,4 +1,5 @@
-""""Command Line Interface for `openml` to configure its settings."""
+""" "Command Line Interface for `openml` to configure its settings."""
+
 from __future__ import annotations
 
 import argparse

diff --git a/openml/config.py b/openml/config.py
@@ -273,17 +273,17 @@ def _setup(config: _Config | None = None) -> None:
             _root_cache_directory.mkdir(exist_ok=True, parents=True)
     except PermissionError:
         openml_logger.warning(
-            "No permission to create openml cache directory at %s! This can result in "
-            "OpenML-Python not working properly." % _root_cache_directory,
+            f"No permission to create openml cache directory at {_root_cache_directory}! This can result in "
+            "OpenML-Python not working properly.",
         )
 
     if cache_exists:
         _create_log_handlers()
     else:
         _create_log_handlers(create_file_handler=False)
         openml_logger.warning(
-            "No permission to create OpenML directory at %s! This can result in OpenML-Python "
-            "not working properly." % config_dir,
+            f"No permission to create OpenML directory at {config_dir}! This can result in OpenML-Python "
+            "not working properly.",
         )
 
 

diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py
@@ -156,14 +156,14 @@ def find_invalid_characters(string: str, pattern: str) -> str:
             )
 
         if dataset_id is None:
-            pattern = "^[\x00-\x7F]*$"
+            pattern = "^[\x00-\x7f]*$"
             if description and not re.match(pattern, description):
                 # not basiclatin (XSD complains)
                 invalid_characters = find_invalid_characters(description, pattern)
                 raise ValueError(
                     f"Invalid symbols {invalid_characters} in description: {description}",
                 )
-            pattern = "^[\x00-\x7F]*$"
+            pattern = "^[\x00-\x7f]*$"
             if citation and not re.match(pattern, citation):
                 # not basiclatin (XSD complains)
                 invalid_characters = find_invalid_characters(citation, pattern)
@@ -540,7 +540,7 @@ def _cache_compressed_file_from_file(
         elif data_file.suffix == ".pq":
             try:
                 data = pd.read_parquet(data_file)
-            except Exception as e:  # noqa: BLE001
+            except Exception as e:
                 raise Exception(f"File: {data_file}") from e
 
             categorical = [data[c].dtype.name == "category" for c in data.columns]
@@ -806,7 +806,7 @@ def get_data(  # noqa: C901, PLR0912, PLR0915
                 to_exclude.extend(self.ignore_attribute)
 
         if len(to_exclude) > 0:
-            logger.info("Going to remove the following attributes: %s" % to_exclude)
+            logger.info(f"Going to remove the following attributes: {to_exclude}")
             keep = np.array([column not in to_exclude for column in attribute_names])
             data = data.loc[:, keep] if isinstance(data, pd.DataFrame) else data[:, keep]
 

diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
@@ -85,8 +85,7 @@ def list_datasets(
     *,
     output_format: Literal["dataframe"],
     **kwargs: Any,
-) -> pd.DataFrame:
-    ...
+) -> pd.DataFrame: ...
 
 
 @overload
@@ -98,8 +97,7 @@ def list_datasets(
     tag: str | None,
     output_format: Literal["dataframe"],
     **kwargs: Any,
-) -> pd.DataFrame:
-    ...
+) -> pd.DataFrame: ...
 
 
 @overload
@@ -111,8 +109,7 @@ def list_datasets(
     tag: str | None = ...,
     output_format: Literal["dict"] = "dict",
     **kwargs: Any,
-) -> pd.DataFrame:
-    ...
+) -> pd.DataFrame: ...
 
 
 def list_datasets(
@@ -207,17 +204,15 @@ def _list_datasets(
     data_id: list | None = ...,
     output_format: Literal["dict"] = "dict",
     **kwargs: Any,
-) -> dict:
-    ...
+) -> dict: ...
 
 
 @overload
 def _list_datasets(
     data_id: list | None = ...,
     output_format: Literal["dataframe"] = "dataframe",
     **kwargs: Any,
-) -> pd.DataFrame:
-    ...
+) -> pd.DataFrame: ...
 
 
 def _list_datasets(
@@ -256,18 +251,16 @@ def _list_datasets(
         for operator, value in kwargs.items():
             api_call += f"/{operator}/{value}"
     if data_id is not None:
-        api_call += "/data_id/%s" % ",".join([str(int(i)) for i in data_id])
+        api_call += "/data_id/{}".format(",".join([str(int(i)) for i in data_id]))
     return __list_datasets(api_call=api_call, output_format=output_format)
 
 
 @overload
-def __list_datasets(api_call: str, output_format: Literal["dict"] = "dict") -> dict:
-    ...
+def __list_datasets(api_call: str, output_format: Literal["dict"] = "dict") -> dict: ...
 
 
 @overload
-def __list_datasets(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame:
-    ...
+def __list_datasets(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame: ...
 
 
 def __list_datasets(
@@ -804,10 +797,7 @@ def create_dataset(  # noqa: C901, PLR0912, PLR0915
         if not is_row_id_an_attribute:
             raise ValueError(
                 "'row_id_attribute' should be one of the data attribute. "
-                " Got '{}' while candidates are {}.".format(
-                    row_id_attribute,
-                    [attr[0] for attr in attributes_],
-                ),
+                f" Got '{row_id_attribute}' while candidates are {[attr[0] for attr in attributes_]}.",
             )
 
     if isinstance(data, pd.DataFrame):

diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py
@@ -32,8 +32,7 @@ def list_evaluations(
     per_fold: bool | None = ...,
     sort_order: str | None = ...,
     output_format: Literal["dict", "object"] = "dict",
-) -> dict:
-    ...
+) -> dict: ...
 
 
 @overload
@@ -51,8 +50,7 @@ def list_evaluations(
     per_fold: bool | None = ...,
     sort_order: str | None = ...,
     output_format: Literal["dataframe"] = ...,
-) -> pd.DataFrame:
-    ...
+) -> pd.DataFrame: ...
 
 
 def list_evaluations(
@@ -204,24 +202,24 @@ def _list_evaluations(
     -------
     dict of objects, or dataframe
     """
-    api_call = "evaluation/list/function/%s" % function
+    api_call = f"evaluation/list/function/{function}"
     if kwargs is not None:
         for operator, value in kwargs.items():
             api_call += f"/{operator}/{value}"
     if tasks is not None:
-        api_call += "/task/%s" % ",".join([str(int(i)) for i in tasks])
+        api_call += "/task/{}".format(",".join([str(int(i)) for i in tasks]))
     if setups is not None:
-        api_call += "/setup/%s" % ",".join([str(int(i)) for i in setups])
+        api_call += "/setup/{}".format(",".join([str(int(i)) for i in setups]))
     if flows is not None:
-        api_call += "/flow/%s" % ",".join([str(int(i)) for i in flows])
+        api_call += "/flow/{}".format(",".join([str(int(i)) for i in flows]))
     if runs is not None:
-        api_call += "/run/%s" % ",".join([str(int(i)) for i in runs])
+        api_call += "/run/{}".format(",".join([str(int(i)) for i in runs]))
     if uploaders is not None:
-        api_call += "/uploader/%s" % ",".join([str(int(i)) for i in uploaders])
+        api_call += "/uploader/{}".format(",".join([str(int(i)) for i in uploaders]))
     if study is not None:
         api_call += "/study/%d" % study
     if sort_order is not None:
-        api_call += "/sort_order/%s" % sort_order
+        api_call += f"/sort_order/{sort_order}"
 
     return __list_evaluations(api_call, output_format=output_format)
 
@@ -236,7 +234,7 @@ def __list_evaluations(
     # Minimalistic check if the XML is useful
     if "oml:evaluations" not in evals_dict:
         raise ValueError(
-            "Error in return XML, does not contain " '"oml:evaluations": %s' % str(evals_dict),
+            "Error in return XML, does not contain " f'"oml:evaluations": {evals_dict!s}',
         )
 
     assert isinstance(evals_dict["oml:evaluations"]["oml:evaluation"], list), type(

diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
@@ -419,7 +419,7 @@ def _deserialize_sklearn(  # noqa: PLR0915, C901, PLR0913, PLR0912
                         strict_version=strict_version,
                     )
                 else:
-                    raise ValueError("Cannot flow_to_sklearn %s" % serialized_type)
+                    raise ValueError(f"Cannot flow_to_sklearn {serialized_type}")
 
             else:
                 rval = OrderedDict(
@@ -979,17 +979,15 @@ def flatten_all(list_):
                         # length 2 is for {VotingClassifier.estimators,
                         # Pipeline.steps, FeatureUnion.transformer_list}
                         # length 3 is for ColumnTransformer
-                        msg = "Length of tuple of type {} does not match assumptions".format(
-                            sub_component_type,
-                        )
+                        msg = f"Length of tuple of type {sub_component_type} does not match assumptions"
                         raise ValueError(msg)
 
                     if isinstance(sub_component, str):
                         if sub_component not in SKLEARN_PIPELINE_STRING_COMPONENTS:
                             msg = (
                                 "Second item of tuple does not match assumptions. "
                                 "If string, can be only 'drop' or 'passthrough' but"
-                                "got %s" % sub_component
+                                f"got {sub_component}"
                             )
                             raise ValueError(msg)
                     elif sub_component is None:
@@ -1002,15 +1000,15 @@ def flatten_all(list_):
                     elif not isinstance(sub_component, OpenMLFlow):
                         msg = (
                             "Second item of tuple does not match assumptions. "
-                            "Expected OpenMLFlow, got %s" % type(sub_component)
+                            f"Expected OpenMLFlow, got {type(sub_component)}"
                         )
                         raise TypeError(msg)
 
                     if identifier in reserved_keywords:
                         parent_model = f"{model.__module__}.{model.__class__.__name__}"
-                        msg = "Found element shadowing official " "parameter for {}: {}".format(
-                            parent_model,
-                            identifier,
+                        msg = (
+                            "Found element shadowing official "
+                            f"parameter for {parent_model}: {identifier}"
                         )
                         raise PyOpenMLError(msg)
 
@@ -1035,9 +1033,9 @@ def flatten_all(list_):
                             model=None,
                         )
                         component_reference: OrderedDict[str, str | dict] = OrderedDict()
-                        component_reference[
-                            "oml-python:serialized_object"
-                        ] = COMPOSITION_STEP_CONSTANT
+                        component_reference["oml-python:serialized_object"] = (
+                            COMPOSITION_STEP_CONSTANT
+                        )
                         cr_value: dict[str, Any] = OrderedDict()
                         cr_value["key"] = identifier
                         cr_value["step_name"] = identifier
@@ -1218,7 +1216,7 @@ def _check_dependencies(
         for dependency_string in dependencies_list:
             match = DEPENDENCIES_PATTERN.match(dependency_string)
             if not match:
-                raise ValueError("Cannot parse dependency %s" % dependency_string)
+                raise ValueError(f"Cannot parse dependency {dependency_string}")
 
             dependency_name = match.group("name")
             operation = match.group("operation")
@@ -1237,7 +1235,7 @@ def _check_dependencies(
                     installed_version > required_version or installed_version == required_version
                 )
             else:
-                raise NotImplementedError("operation '%s' is not supported" % operation)
+                raise NotImplementedError(f"operation '{operation}' is not supported")
             message = (
                 "Trying to deserialize a model with dependency "
                 f"{dependency_string} not satisfied."
@@ -1812,10 +1810,7 @@ def _prediction_to_probabilities(
                     # then we need to add a column full of zeros into the probabilities
                     # for class 3 because the rest of the library expects that the
                     # probabilities are ordered the same way as the classes are ordered).
-                    message = "Estimator only predicted for {}/{} classes!".format(
-                        proba_y.shape[1],
-                        len(task.class_labels),
-                    )
+                    message = f"Estimator only predicted for {proba_y.shape[1]}/{len(task.class_labels)} classes!"
                     warnings.warn(message, stacklevel=2)
                     openml.config.logger.warning(message)
 
@@ -2008,9 +2003,7 @@ def is_subcomponent_specification(values):
                                 pass
                             else:
                                 raise TypeError(
-                                    "Subcomponent flow should be of type flow, but is {}".format(
-                                        type(subcomponent_flow),
-                                    ),
+                                    f"Subcomponent flow should be of type flow, but is {type(subcomponent_flow)}",
                                 )
 
                         current = {
@@ -2129,8 +2122,7 @@ def instantiate_model_from_hpo_class(
         """
         if not self._is_hpo_class(model):
             raise AssertionError(
-                "Flow model %s is not an instance of sklearn.model_selection._search.BaseSearchCV"
-                % model,
+                f"Flow model {model} is not an instance of sklearn.model_selection._search.BaseSearchCV",
             )
         base_estimator = model.estimator
         base_estimator.set_params(**trace_iteration.get_parameters())
@@ -2192,8 +2184,7 @@ def _obtain_arff_trace(
         """
         if not self._is_hpo_class(model):
             raise AssertionError(
-                "Flow model %s is not an instance of sklearn.model_selection._search.BaseSearchCV"
-                % model,
+                f"Flow model {model} is not an instance of sklearn.model_selection._search.BaseSearchCV",
             )
         if not hasattr(model, "cv_results_"):
             raise ValueError("model should contain `cv_results_`")
@@ -2228,7 +2219,7 @@ def _obtain_arff_trace(
                         # hyperparameter layer_sizes of MLPClassifier
                         type = "STRING"  # noqa: A001
                     else:
-                        raise TypeError("Unsupported param type in param grid: %s" % key)
+                        raise TypeError(f"Unsupported param type in param grid: {key}")
 
                 # renamed the attribute param to parameter, as this is a required
                 # OpenML convention - this also guards against name collisions

diff --git a/openml/flows/flow.py b/openml/flows/flow.py
@@ -135,15 +135,13 @@ def __init__(  # noqa: PLR0913
         keys_parameters_meta_info = set(parameters_meta_info.keys())
         if len(keys_parameters.difference(keys_parameters_meta_info)) > 0:
             raise ValueError(
-                "Parameter %s only in parameters, but not in "
-                "parameters_meta_info."
-                % str(keys_parameters.difference(keys_parameters_meta_info)),
+                f"Parameter {keys_parameters.difference(keys_parameters_meta_info)!s} only in parameters, but not in "
+                "parameters_meta_info.",
             )
         if len(keys_parameters_meta_info.difference(keys_parameters)) > 0:
             raise ValueError(
-                "Parameter %s only in parameters_meta_info, "
-                "but not in parameters."
-                % str(keys_parameters_meta_info.difference(keys_parameters)),
+                f"Parameter {keys_parameters_meta_info.difference(keys_parameters)!s} only in parameters_meta_info, "
+                "but not in parameters.",
             )
 
         self.external_version = external_version