From 086ab215e35d3f30181c977d5e937c6d9dc522a4 Mon Sep 17 00:00:00 2001 From: barrust Date: Sun, 7 Jan 2024 21:53:28 -0500 Subject: [PATCH] move over to configuration usage; mostly complete --- mediawiki/__init__.py | 3 +- mediawiki/configuraton.py | 10 ++- mediawiki/mediawiki.py | 167 +++++++++++++++++--------------------- mediawiki/utilities.py | 4 +- tests/mediawiki_test.py | 16 ++-- 5 files changed, 92 insertions(+), 108 deletions(-) diff --git a/mediawiki/__init__.py b/mediawiki/__init__.py index e34bc36..0515c4e 100644 --- a/mediawiki/__init__.py +++ b/mediawiki/__init__.py @@ -1,6 +1,7 @@ """ mediawiki module initialization """ +from mediawiki.configuraton import URL, VERSION from mediawiki.exceptions import ( DisambiguationError, HTTPTimeoutError, @@ -12,7 +13,7 @@ PageError, RedirectError, ) -from mediawiki.mediawiki import URL, VERSION, MediaWiki +from mediawiki.mediawiki import MediaWiki from mediawiki.mediawikipage import MediaWikiPage __author__ = "Tyler Barrus" diff --git a/mediawiki/configuraton.py b/mediawiki/configuraton.py index 656c574..f0a337e 100644 --- a/mediawiki/configuraton.py +++ b/mediawiki/configuraton.py @@ -2,13 +2,14 @@ from datetime import datetime, timedelta from typing import Dict, Optional, Union -from mediawiki.mediawiki import URL, VERSION +URL: str = "https://github.com/barrust/mediawiki" +VERSION: str = "0.7.4" @dataclass class Configuration: _lang: str = field(default="en", init=False, repr=False) - _api_url: str = field(default="https://{lang}.wikipedia.org/w/api.php", init=False, repr=False) + _api_url: str = field(default="https://en.wikipedia.org/w/api.php", init=False, repr=False) _category_prefix: str = field(default="Category", init=False, repr=False) _timeout: Optional[float] = field(default=15.0, init=False, repr=False) _user_agent: str = field(default=f"python-mediawiki/VERSION-{VERSION}/({URL})/BOT", init=False, repr=False) @@ -79,6 +80,9 @@ def __init__( if use_cache: self.use_cache = use_cache + if timeout: + self.timeout = timeout + def __repr__(self): keys = [ x.replace("_", "", 1) @@ -195,7 +199,7 @@ def password(self, password: Optional[str]): @property def refresh_interval(self) -> Optional[int]: - return self._rate_limit + return self._refresh_interval @refresh_interval.setter def refresh_interval(self, refresh_interval: Optional[int]): diff --git a/mediawiki/mediawiki.py b/mediawiki/mediawiki.py index abcc392..87f9580 100644 --- a/mediawiki/mediawiki.py +++ b/mediawiki/mediawiki.py @@ -13,6 +13,7 @@ import requests import requests.exceptions as rex +from mediawiki.configuraton import VERSION, Configuration from mediawiki.exceptions import ( HTTPTimeoutError, MediaWikiAPIURLError, @@ -25,9 +26,6 @@ from mediawiki.mediawikipage import MediaWikiPage from mediawiki.utilities import memoize -URL: str = "https://github.com/barrust/mediawiki" -VERSION: str = "0.7.4" - class MediaWiki: """MediaWiki API Wrapper Instance @@ -49,27 +47,16 @@ class MediaWiki: __slots__ = [ "_version", - "_lang", - "_api_url", - "_cat_prefix", - "_timeout", - "_user_agent", + "_config", "_session", - "_rate_limit", - "_rate_limit_last_call", - "_min_wait", "_extensions", "_api_version", "_api_version_str", "_base_url", "__supported_languages", "__available_languages", - "_cache", - "_refresh_interval", - "_use_cache", "_is_logged_in", - "_proxies", - "_verify_ssl", + "_cache", ] def __init__( @@ -88,27 +75,24 @@ def __init__( ): """Init Function""" self._version = VERSION - self._lang = lang.lower() - self._api_url = url.format(lang=self._lang) - self._cat_prefix = "" - self.category_prefix = cat_prefix - self._timeout = 15.0 - self.timeout = timeout + url.format(lang=lang.lower()) + self._config = Configuration( + lang=lang, + api_url=url.format(lang=lang), + category_prefix=cat_prefix, + timeout=timeout, + proxies=proxies, + user_agent=user_agent, + verify_ssl=verify_ssl, + rate_limit=rate_limit, + rate_limit_wait=rate_limit_wait, + ) + # requests library parameters self._session: Optional[requests.Session] = None - self._user_agent = f"python-mediawiki/VERSION-{VERSION}/({URL})/BOT" - self._proxies: Optional[Dict] = None - self._verify_ssl: Union[bool, str] = True - self.verify_ssl = verify_ssl + # set libary parameters - if user_agent is not None: - self.user_agent = user_agent - self.proxies = proxies # this will call self._reset_session() - - self._rate_limit = False - self.rate_limit = bool(rate_limit) - self._rate_limit_last_call: Optional[datetime] = None - self._min_wait = rate_limit_wait + self._extensions = None self._api_version = None self._api_version_str = None @@ -118,8 +102,6 @@ def __init__( # for memoized results self._cache: Dict = {} - self._refresh_interval: Optional[int] = None - self._use_cache = True # for login information self._is_logged_in = False @@ -168,35 +150,37 @@ def extensions(self) -> List[str]: @property def rate_limit(self) -> bool: """bool: Turn on or off Rate Limiting""" - return self._rate_limit + return self._config.rate_limit @rate_limit.setter def rate_limit(self, rate_limit: bool): """Turn on or off rate limiting""" - self._rate_limit = bool(rate_limit) - self._rate_limit_last_call = None - self.clear_memoized() + self._config.rate_limit = rate_limit + if self._config._clear_memoized: + self.clear_memoized() @property def proxies(self) -> Optional[Dict]: """dict: Turn on, off, or set proxy use with the Requests library""" - return self._proxies + return self._config.proxies @proxies.setter def proxies(self, proxies: Optional[Dict]): """Turn on, off, or set proxy use through the Requests library""" - self._proxies = proxies if isinstance(proxies, dict) else None - self._reset_session() + self._config.proxies = proxies + if self._config._reset_session: + self._reset_session() + self._config._reset_session = False @property def use_cache(self) -> bool: """bool: Whether caching should be used; on (**True**) or off (**False**)""" - return self._use_cache + return self._config.use_cache @use_cache.setter def use_cache(self, use_cache: bool): """toggle using the cache or not""" - self._use_cache = bool(use_cache) + self._config.use_cache = use_cache @property def rate_limit_min_wait(self) -> timedelta: @@ -204,37 +188,39 @@ def rate_limit_min_wait(self) -> timedelta: Note: Only used if rate_limit is **True**""" - return self._min_wait + return self._config.rate_limit_min_wait @rate_limit_min_wait.setter def rate_limit_min_wait(self, min_wait: timedelta): """Set minimum wait to use for rate limiting""" - self._min_wait = min_wait - self._rate_limit_last_call = None + self._config.rate_limit_min_wait = min_wait + self._config._rate_limit_last_call = None @property - def timeout(self) -> float: + def timeout(self) -> Optional[float]: """float: Response timeout for API requests Note: Use **None** for no response timeout""" - return self._timeout + return self._config.timeout @timeout.setter - def timeout(self, timeout: float): + def timeout(self, timeout: Optional[float]): """Set request timeout in seconds (or fractions of a second)""" - self._timeout = None if timeout is None else float(timeout) + self._config.timeout = timeout @property def verify_ssl(self) -> Union[bool, str]: """bool | str: Verify SSL when using requests or path to cert file""" - return self._verify_ssl + return self._config.verify_ssl @verify_ssl.setter def verify_ssl(self, verify_ssl: Union[bool, str]): """Set request verify SSL parameter; defaults to True if issue""" - self._verify_ssl = verify_ssl if isinstance(verify_ssl, (bool, str)) else True - self._reset_session() + self._config.verify_ssl = verify_ssl + if self._config._reset_session: + self._reset_session() + self._config._reset_session = False @property def language(self) -> str: @@ -244,21 +230,15 @@ def language(self) -> str: Use correct language titles with the updated API URL Note: Some API URLs do not encode language; unable to update if this is the case""" - return self._lang + return self._config.lang @language.setter def language(self, lang: str): """Set the language to use; attempts to change the API URL""" - lang = lang.lower() - if self._lang == lang: - return - - url = self._api_url - tmp = url.replace(f"/{self._lang}.", f"/{lang}.") - - self._api_url = tmp - self._lang = lang - self.clear_memoized() + self._config.lang == lang + if self._config._clear_memoized: + self.clear_memoized() + self._config._clear_memoized = False @property def category_prefix(self) -> str: @@ -266,27 +246,28 @@ def category_prefix(self) -> str: Note: Use the correct category name for the language selected""" - return self._cat_prefix + return self._config.category_prefix @category_prefix.setter def category_prefix(self, prefix: str): """Set the category prefix correctly""" - self._cat_prefix = prefix[:-1] if prefix[-1:] == ":" else prefix + self._config.category_prefix = prefix @property def user_agent(self) -> str: """str: User agent string Note: If using in as part of another project, this should be changed""" - return self._user_agent + return self._config.user_agent @user_agent.setter def user_agent(self, user_agent: str): """Set the new user agent string Note: Will need to re-log into the MediaWiki if user agent string is changed""" - self._user_agent = user_agent - self._reset_session() + self._config.user_agent = user_agent + if self._config._reset_session: + self._reset_session() @property def api_url(self) -> str: @@ -294,7 +275,7 @@ def api_url(self) -> str: Note: Not settable; See :py:func:`mediawiki.MediaWiki.set_api_url`""" - return self._api_url + return self._config.api_url @property def memoized(self) -> Dict[Any, Any]: @@ -308,14 +289,12 @@ def memoized(self) -> Dict[Any, Any]: @property def refresh_interval(self) -> Optional[int]: """int: The interval at which the memoize cache is to be refresh""" - return self._refresh_interval + return self._config.refresh_interval @refresh_interval.setter def refresh_interval(self, refresh_interval: int): """Set the new cache refresh interval""" - self._refresh_interval = ( - refresh_interval if isinstance(refresh_interval, int) and refresh_interval > 0 else None - ) + self._config.refresh_interval = refresh_interval def login(self, username: str, password: str, strict: bool = True) -> bool: """Login as specified user @@ -381,10 +360,10 @@ def set_api_url( :py:func:`mediawiki.exceptions.MediaWikiAPIURLError`: if the \ url is not a valid MediaWiki site or login fails """ - old_api_url = self._api_url - old_lang = self._lang - self._lang = lang.lower() - self._api_url = api_url.format(lang=self._lang) + old_api_url = self._config.api_url + old_lang = self._config.lang + self._config.lang = lang.lower() + self._config.api_url = api_url.format(lang=self._config.lang) self._is_logged_in = False try: @@ -395,8 +374,8 @@ def set_api_url( self.__available_languages = None # reset this except (rex.ConnectTimeout, MediaWikiException) as exc: # reset api url and lang in the event that the exception was caught - self._api_url = old_api_url - self._lang = old_lang + self._config.api_url = old_api_url + self._config.lang = old_lang raise MediaWikiAPIURLError(api_url) from exc self.clear_memoized() @@ -405,12 +384,12 @@ def _reset_session(self): if self._session: self._session.close() - headers = {"User-Agent": self._user_agent} + headers = {"User-Agent": self._config.user_agent} self._session = requests.Session() self._session.headers.update(headers) - if self._proxies is not None: - self._session.proxies.update(self._proxies) - self._session.verify = self._verify_ssl + if self._config.proxies is not None: + self._session.proxies.update(self._config.proxies) + self._session.verify = self._config.verify_ssl self._is_logged_in = False def clear_memoized(self): @@ -852,17 +831,17 @@ def wiki_request(self, params: Dict[str, Any]) -> Dict[Any, Any]: if "action" not in params: params["action"] = "query" - limit = self._rate_limit - last_call = self._rate_limit_last_call - if limit and last_call and last_call + self._min_wait > datetime.now(): + limit = self._config.rate_limit + last_call = self._config._rate_limit_last_call + if limit and last_call and last_call + self._config.rate_limit_min_wait > datetime.now(): # call time to quick for rate limited api requests, wait - wait_time = (last_call + self._min_wait) - datetime.now() + wait_time = (last_call + self._config.rate_limit_min_wait) - datetime.now() time.sleep(wait_time.total_seconds()) req = self._get_response(params) - if self._rate_limit: - self._rate_limit_last_call = datetime.now() + if self._config.rate_limit: + self._config._rate_limit_last_call = datetime.now() return req @@ -993,7 +972,7 @@ def _get_response(self, params: Dict[str, Any]) -> Dict[str, Any]: """wrap the call to the requests package""" try: if self._session is not None: - return self._session.get(self._api_url, params=params, timeout=self._timeout).json() + return self._session.get(self._config.api_url, params=params, timeout=self._config.timeout).json() return {} except JSONDecodeError: return {} @@ -1002,7 +981,7 @@ def _post_response(self, params: Dict[str, Any]) -> Dict[str, Any]: """wrap a post call to the requests package""" try: if self._session is not None: - return self._session.post(self._api_url, data=params, timeout=self._timeout).json() + return self._session.post(self._config.api_url, data=params, timeout=self._config.timeout).json() return {} except JSONDecodeError: return {} diff --git a/mediawiki/utilities.py b/mediawiki/utilities.py index b35e791..4f232b1 100644 --- a/mediawiki/utilities.py +++ b/mediawiki/utilities.py @@ -30,8 +30,8 @@ def memoize(func: Callable) -> Callable: def wrapper(*args, **kwargs): """wrap it up and store info in a cache""" cache = args[0].memoized - refresh = args[0].refresh_interval - use_cache = args[0].use_cache + refresh = args[0]._config.refresh_interval + use_cache = args[0]._config.use_cache # short circuit if not using cache if use_cache is False: diff --git a/tests/mediawiki_test.py b/tests/mediawiki_test.py index a0b9bad..0db6e62 100644 --- a/tests/mediawiki_test.py +++ b/tests/mediawiki_test.py @@ -203,7 +203,7 @@ def test_rate_limit(self): site = MediaWikiOverloaded() site.rate_limit = True self.assertEqual(site.rate_limit, True) - self.assertEqual(site._rate_limit_last_call, None) + self.assertEqual(site._config._rate_limit_last_call, None) self.assertEqual(site.rate_limit_min_wait, timedelta(milliseconds=50)) def test_rate_limit_min_wait(self): @@ -211,15 +211,15 @@ def test_rate_limit_min_wait(self): site = MediaWikiOverloaded() site.rate_limit_min_wait = timedelta(milliseconds=150) self.assertEqual(site.rate_limit, False) - self.assertEqual(site._rate_limit_last_call, None) + self.assertEqual(site._config._rate_limit_last_call, None) self.assertEqual(site.rate_limit_min_wait, timedelta(milliseconds=150)) def test_rate_limit_min_wait_reset(self): """test setting rate limiting""" site = MediaWikiOverloaded(rate_limit=True) - self.assertNotEqual(site._rate_limit_last_call, None) # should be set + self.assertNotEqual(site._config._rate_limit_last_call, None) # should be set site.rate_limit_min_wait = timedelta(milliseconds=150) - self.assertEqual(site._rate_limit_last_call, None) + self.assertEqual(site._config._rate_limit_last_call, None) self.assertEqual(site.rate_limit, True) self.assertEqual(site.rate_limit_min_wait, timedelta(milliseconds=150)) @@ -1026,16 +1026,16 @@ class TestMediaWikiRequests(unittest.TestCase): def test_wiki_request(self): """test wiki request by testing the timing....""" site = MediaWikiOverloaded() - # self.assertEqual(site._rate_limit_last_call, None) + # self.assertEqual(site._config._rate_limit_last_call, None) site.rate_limit = True site.rate_limit_min_wait = timedelta(seconds=2) site.search("chest set") - start_time = site._rate_limit_last_call + start_time = site._config._rate_limit_last_call site.opensearch("new york") site.prefixsearch("ar") - end_time = site._rate_limit_last_call + end_time = site._config._rate_limit_last_call self.assertGreater(end_time - start_time, timedelta(seconds=2)) - self.assertNotEqual(site._rate_limit_last_call, None) + self.assertNotEqual(site._config._rate_limit_last_call, None) class TestMediaWikiPage(unittest.TestCase):