Skip to content

Commit

Permalink
0.4.0 (#43)
Browse files Browse the repository at this point in the history
* Add fix to use the `query-continue` parameter to continue to pull category members [issue #39](#39)
* Better handle large categorymember selections
* Add better handling of exception attributes including adding them to the documentation
* Correct the pulling of the section titles without additional markup [#42](#42)
* Handle memoization of unicode parameters in python 2.7
* ***Change default timeout*** for HTTP requests to 15 seconds
  • Loading branch information
barrust committed Mar 9, 2018
1 parent 6b248ff commit 19af7ce
Show file tree
Hide file tree
Showing 8 changed files with 177 additions and 47 deletions.
7 changes: 6 additions & 1 deletion CHANGELOG.md
Expand Up @@ -2,11 +2,16 @@

## Current

### Version 0.3.17
### Version 0.4.0

* Add fix to use the `query-continue` parameter to continue to pull category
members [issue #39](https://github.com/barrust/mediawiki/issues/39)
* Better handle large categorymember selections
* Add better handling of exception attributes including adding them to the
documentation
* Correct the pulling of the section titles without additional markup [#42](https://github.com/barrust/mediawiki/issues/42)
* Handle memoization of unicode parameters in python 2.7
* ***Change default timeout*** for HTTP requests to 15 seconds


### Version 0.3.16
Expand Down
4 changes: 3 additions & 1 deletion CONTRIBUTING.md
Expand Up @@ -159,4 +159,6 @@ A special thanks to all the code contributors to pymediawiki!

[@barrust](https://github.com/barrust) (Maintainer)

[@dan-blanchard](https://github.com/dan-blanchard) - Default URL conforms to passed in language
[@dan-blanchard](https://github.com/dan-blanchard) - Default URL conforms to passed in language [#26](https://github.com/barrust/mediawiki/pull/26)

[@nagash91](https://github.com/nagash91) - Pull section titles without additional markup [#42](https://github.com/barrust/mediawiki/issues/42)
127 changes: 113 additions & 14 deletions mediawiki/exceptions.py
Expand Up @@ -27,30 +27,54 @@ class MediaWikiException(MediaWikiBaseException):
''' MediaWiki Exception Class '''

def __init__(self, error):
self.error = error
self._error = error
msg = ('An unknown error occured: "{0}". Please report '
'it on GitHub!').format(self.error)
super(MediaWikiException, self).__init__(msg)

@property
def error(self):
""" The error message that the MediaWiki site returned
:getter: Returns the raised error message
:type: str """
return self._error


class PageError(MediaWikiBaseException):
''' Exception raised when no MediaWiki page matched a query '''

def __init__(self, title=None, pageid=None):
if title:
self.title = title
self._title = title
msg = ('"{0}" does not match any pages. Try another '
'query!').format(self.title)
elif pageid:
self.pageid = pageid
self._pageid = pageid
msg = ('Page id "{0}" does not match any pages. Try '
'another id!').format(self.pageid)
else:
self.title = ''
self._title = ''
msg = ('"{0}" does not match any pages. Try another '
'query!').format(self.title)
super(PageError, self).__init__(msg)

@property
def title(self):
""" The title that caused the page error
:getter: Returns the title that caused the page error
:type: str """
return self._title

@property
def pageid(self):
""" The title that caused the page error
:getter: Returns the pageid that caused the page error
:type: str """
return self._pageid


class RedirectError(MediaWikiBaseException):
''' Exception raised when a page title unexpectedly resolves to
Expand All @@ -60,13 +84,21 @@ class RedirectError(MediaWikiBaseException):
are set to **False** '''

def __init__(self, title):
self.title = title
self._title = title
msg = ('"{0}" resulted in a redirect. Set the redirect '
'property to True to allow automatic '
'redirects.').format(self.title)

super(RedirectError, self).__init__(msg)

@property
def title(self):
""" The title that was redirected
:getter: Returns the title that was a redirect
:type: str """
return self._title


class DisambiguationError(MediaWikiBaseException):
''' Exception raised when a page resolves to a Disambiguation page
Expand All @@ -77,53 +109,120 @@ class DisambiguationError(MediaWikiBaseException):
.. note:: `options` only includes titles that link to valid \
MediaWiki pages '''

def __init__(self, title, may_refer_to, details=None):
self.title = title
self.options = sorted(may_refer_to)
self.details = details
def __init__(self, title, may_refer_to, url, details=None):
self._title = title
self._options = sorted(may_refer_to)
self._details = details
self._url = url
msg = ('\n"{0}" may refer to: \n '
'{1}').format(self.title, '\n '.join(self.options))
super(DisambiguationError, self).__init__(msg)

@property
def url(self):
""" The url, if possible, of the disambiguation page
:getter: Returns the url for the page
:type: str """
return self._url

@property
def title(self):
""" The title of the page
:getter: Returns the title of the disambiguation page
:type: str """
return self._title

@property
def options(self):
""" The list of possible page titles
:getter: Returns a list of `may refer to` pages
:type: list(str) """
return self._options

@property
def details(self):
""" The details of the proposed non-disambigous pages
:getter: Returns the disambiguous page information
:type: list """
return self._details


class HTTPTimeoutError(MediaWikiBaseException):
''' Exception raised when a request to the Mediawiki site times out. '''

def __init__(self, query):
self.query = query
self._query = query
msg = ('Searching for "{0}" resulted in a timeout. Try '
'again in a few seconds, and ensure you have rate '
'limiting set to True.').format(self.query)
super(HTTPTimeoutError, self).__init__(msg)

@property
def query(self):
""" The query that timed out
:getter: Returns the query that timed out
:type: str """
return self._query


class MediaWikiAPIURLError(MediaWikiBaseException):
''' Exception raised when the MediaWiki server does not support the API '''

def __init__(self, api_url):
self.api_url = api_url
self._api_url = api_url
msg = '{0} is not a valid MediaWiki API URL'.format(self.api_url)
super(MediaWikiAPIURLError, self).__init__(msg)

@property
def api_url(self):
""" The api url that raised the exception
:getter: Returns the attempted api url
:type: str """
return self._api_url


class MediaWikiGeoCoordError(MediaWikiBaseException):
''' Exceptions to handle GeoData exceptions '''

def __init__(self, error):
self.error = error
self._error = error
msg = ('GeoData search resulted in the following '
'error: {0} - Please use valid coordinates or a proper '
'page title.').format(self.error)
super(MediaWikiGeoCoordError, self).__init__(msg)

@property
def error(self):
""" The error that was thrown when pulling GeoCoordinates
:getter: The error message
:type: str """
return self._error


class MediaWikiCategoryTreeError(MediaWikiBaseException):
''' Exception when the category tree is unable to complete for an unknown
reason '''

def __init__(self, category):
self.category = category
self._category = category
msg = ("Categorytree threw an exception for trying to get the "
"same category '{}' too many times. Please try again later "
"and perhaps use the rate limiting option.").format(category)
"and perhaps use the rate limiting "
"option.").format(self._category)
super(MediaWikiCategoryTreeError, self).__init__(msg)

@property
def category(self):
""" The category that threw an exception during category tree \
generation
:getter: Returns the category that caused the exception
:type: str """
return self._category
16 changes: 10 additions & 6 deletions mediawiki/mediawiki.py
Expand Up @@ -16,7 +16,7 @@
from .utilities import (memoize)

URL = 'https://github.com/barrust/mediawiki'
VERSION = '0.3.17'
VERSION = '0.4.0'


class MediaWiki(object):
Expand All @@ -36,17 +36,17 @@ class MediaWiki(object):
'''

def __init__(self, url='http://{lang}.wikipedia.org/w/api.php', lang='en',
timeout=None, rate_limit=False,
timeout=15.0, rate_limit=False,
rate_limit_wait=timedelta(milliseconds=50)):
''' Init Function '''
self._version = VERSION
self._lang = lang.lower()
self._api_url = url.format(lang=self._lang)
self._timeout = timeout
self.timeout = timeout
self._user_agent = ('python-mediawiki/VERSION-{0}'
'/({1})/BOT').format(VERSION, URL)
self._session = None
self._rate_limit = rate_limit
self.rate_limit = bool(rate_limit)
self._rate_limit_last_call = None
self._min_wait = rate_limit_wait
self._extensions = None
Expand Down Expand Up @@ -166,7 +166,7 @@ def timeout(self):
:getter: Returns the number of seconds to wait for a resonse
:setter: Sets the number of seconds to wait for a response
:type: integer or None
:type: float or None
.. note:: Use **None** for no response timeout
'''
Expand All @@ -175,7 +175,11 @@ def timeout(self):
@timeout.setter
def timeout(self, timeout):
''' Set request timeout in seconds (or fractions of a second) '''
self._timeout = timeout

if timeout is None:
self._timeout = None # no timeout
return
self._timeout = float(timeout) # allow the exception to be raised

@property
def language(self):
Expand Down
23 changes: 13 additions & 10 deletions mediawiki/mediawikipage.py
Expand Up @@ -6,6 +6,7 @@

from __future__ import (unicode_literals, absolute_import)
from decimal import (Decimal)
import re
from bs4 import (BeautifulSoup, Tag)
from .utilities import (str_or_unicode, is_relative_url)
from .exceptions import (MediaWikiException, PageError, RedirectError,
Expand Down Expand Up @@ -438,16 +439,18 @@ def sections(self):
:setter: Not settable
:type: list
'''
# NOTE: Due to MediaWiki sites adding superscripts or italics or bold
# information in the sections, moving to regex to get the
# `non-decorated` name instead of using the query api!
if self._sections is False:
query_params = {'action': 'parse', 'prop': 'sections'}
if not getattr(self, 'title', None):
query_params['pageid'] = self.pageid
else:
query_params['page'] = self.title
request = self.mediawiki.wiki_request(query_params)
sections = request['parse']['sections']
self._sections = [section['line'] for section in sections]

self._sections = list()
section_regexp = r'\n==* .* ==*\n' # '== {STUFF_NOT_\n} =='
found_obj = re.findall(section_regexp, self.content)

if found_obj is not None:
for obj in found_obj:
obj = obj.lstrip('\n= ').rstrip(' =\n')
self._sections.append(obj)
return self._sections

def section(self, section_title):
Expand Down Expand Up @@ -572,7 +575,7 @@ def _raise_disambiguation_error(self, page, pageid):
one_disambiguation['title'] = lis_item.text
disambiguation.append(one_disambiguation)
raise DisambiguationError(getattr(self, 'title', page['title']),
may_refer_to,
may_refer_to, page['fullurl'],
disambiguation)

def _handle_redirect(self, redirect, preload, query, page):
Expand Down
4 changes: 4 additions & 0 deletions mediawiki/utilities.py
Expand Up @@ -55,6 +55,10 @@ def wrapper(*args, **kwargs):
tmp.extend(args[1:])
for k in sorted(defaults.keys()):
tmp.append('({0}: {1})' .format(k, defaults[k]))

# handle possible unicode characters
if sys.version_info < (3, 0):
tmp = [unicode(x) for x in tmp]
key = ' - '.join(tmp)

# pull from the cache if it is available
Expand Down
17 changes: 15 additions & 2 deletions tests/mediawiki_test.py
Expand Up @@ -21,7 +21,7 @@
class MediaWikiOverloaded(MediaWiki):
''' Overload the MediaWiki class to change how wiki_request works '''
def __init__(self, url='http://{lang}.wikipedia.org/w/api.php', lang='en',
timeout=None, rate_limit=False,
timeout=15, rate_limit=False,
rate_limit_wait=timedelta(milliseconds=50)):
''' new init '''

Expand Down Expand Up @@ -194,14 +194,25 @@ def test_rate_limit_min_wait_reset(self):
def test_default_timeout(self):
''' test default timeout '''
site = MediaWikiOverloaded()
self.assertEqual(site.timeout, None)
self.assertEqual(site.timeout, 15)

def test_set_timeout(self):
''' test setting timeout '''
site = MediaWikiOverloaded()
site.timeout = 30
self.assertEqual(site.timeout, 30)

def test_set_timeout_none(self):
''' test setting timeout to None '''
site = MediaWikiOverloaded()
site.timeout = None
self.assertEqual(site.timeout, None)

def test_set_timeout_bad(self):
''' test that we raise the ValueError '''
self.assertRaises(ValueError,
lambda: MediaWikiOverloaded(timeout='foo'))

def test_memoized(self):
''' test returning the memoized cache '''
site = MediaWikiOverloaded()
Expand Down Expand Up @@ -694,6 +705,8 @@ def test_disambiguation_error_msg(self):
site.page('bush')
except DisambiguationError as ex:
self.assertEqual(ex.message, response['disambiguation_error_msg'])
self.assertEqual(ex.title, 'Bush')
self.assertEqual(ex.url, 'https://en.wikipedia.org/wiki/Bush')

def test_disamb_error_msg_w_empty(self):
''' test that disambiguation error is thrown correctly and no
Expand Down

0 comments on commit 19af7ce

Please sign in to comment.