Skip to content

Commit

Permalink
0.3.16 (#36)
Browse files Browse the repository at this point in the history
* fix #35 disambiguation parse error
* add `use_cache` property to turn off using the caching functionality
* minor code cleanup
  • Loading branch information
barrust committed Nov 21, 2017
1 parent bf38c20 commit 243e7a0
Show file tree
Hide file tree
Showing 10 changed files with 1,004 additions and 828 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Expand Up @@ -2,6 +2,11 @@

## Current

### Version 0.3.16

* Add ability to turn off caching completely
* Fix bug when disambiguation link does not have a title [issue #35](https://github.com/barrust/mediawiki/issues/35)

### Version 0.3.15

* Add parse all links within a section [issue #33](https://github.com/barrust/mediawiki/issues/33)
Expand Down
18 changes: 9 additions & 9 deletions CONTRIBUTING.md
Expand Up @@ -45,15 +45,16 @@ within the library, please do not hesitate to report the issue!
will help narrow down the search for the cause of the issue and may lead to a
quicker fix!

A **good bug report** will consist of the following:
A **great bug report** will consist of the following:

* A descriptive title

* A brief description of the issue

* Description of the expected results

* An code example to reproduce the error. Please use [Markdown code blocks](https://help.github.com/articles/creating-and-highlighting-code-blocks/)
* An code example to reproduce the error. Please use
[Markdown code blocks](https://help.github.com/articles/creating-and-highlighting-code-blocks/)
with syntax highlighting

* The link to the API URL if not the default:
Expand Down Expand Up @@ -102,7 +103,7 @@ a pull request. There are a few guidelines for creating pull requests:
master (unless there is not a development branch!)

* If the PR only changes documentation, please add `[ci skip]` to the commit
message. To learn more, you can [read about skipping integration testing ](https://docs.travis-ci.com/user/customizing-the-build#Skipping-a-build)
message. To learn more, you can [read about skipping integration testing](https://docs.travis-ci.com/user/customizing-the-build#Skipping-a-build)

* Reference ***any and all*** [issues](https://github.com/barrust/mediawiki/issues)
related to the pull request
Expand All @@ -129,13 +130,11 @@ access.

#### Coding Style

All code must adhere to the [PEP8](https://www.python.org/dev/peps/pep-0008/)
coding style standard. Code that does not adhere will be flagged as a failed
build during continuous integration testing.

The MediaWiki API wrapper project follows the PEP8 coding style for consistency
The MediaWiki API wrapper project follows the
[PEP8](https://www.python.org/dev/peps/pep-0008/) coding style for consistency
and readability. Code that does not comply with PEP8 will not be accepted into
the project as-is.
the project as-is. All code should adhere to the PEP8 coding style standard
where possible.

The MediaWiki API wrapper project also uses [pylint](https://www.pylint.org/)
to help identify potential errors, code duplication, and non-pythonic syntax.
Expand All @@ -159,4 +158,5 @@ pep8 mediawiki
A special thanks to all the code contributors to pymediawiki!

[@barrust](https://github.com/barrust) (Maintainer)

[@dan-blanchard](https://github.com/dan-blanchard) - Default URL conforms to passed in language
6 changes: 3 additions & 3 deletions mediawiki/exceptions.py
Expand Up @@ -5,9 +5,9 @@
from .utilities import (str_or_unicode)


ODD_ERROR_MESSAGE = ('This should not happen. Please report on '
'GitHub if the MediaWiki site is available: '
'github.com/barrust/mediawiki')
ODD_ERROR_MESSAGE = ('This should not happen. If the MediaWiki site you are '
'querying is available, then please report this issue on '
'GitHub: github.com/barrust/mediawiki')


class MediaWikiBaseException(Exception):
Expand Down
68 changes: 41 additions & 27 deletions mediawiki/mediawiki.py
Expand Up @@ -16,7 +16,7 @@
from .utilities import (memoize)

URL = 'https://github.com/barrust/mediawiki'
VERSION = '0.3.15'
VERSION = '0.3.16'


class MediaWiki(object):
Expand Down Expand Up @@ -51,18 +51,20 @@ def __init__(self, url='http://{lang}.wikipedia.org/w/api.php', lang='en',
self._min_wait = rate_limit_wait
self._extensions = None
self._api_version = None
self._api_version_str = None
self._base_url = None
self.__supported_languages = None

# for memoized results
self._cache = dict()
self._refresh_interval = None
self._use_cache = True

# call helper functions to get everything set up
self._reset_session()
try:
self._get_site_info()
except Exception:
except MediaWikiException:
raise MediaWikiAPIURLError(url)

# non-settable properties
Expand All @@ -84,7 +86,7 @@ def api_version(self):
:setter: Not settable
:type: string
'''
return '.'.join([str(x) for x in self._api_version])
return self._api_version_str

@property
def base_url(self):
Expand All @@ -105,7 +107,7 @@ def extensions(self):
:setter: Not settable
:type: list
'''
return sorted(list(self._extensions))
return self._extensions

# settable properties
@property
Expand All @@ -125,6 +127,21 @@ def rate_limit(self, rate_limit):
self._rate_limit_last_call = None
self.clear_memoized()

@property
def use_cache(self):
''' Boolean value if the cache is to be used
:getter: Returns if the cache should be used
:setter: Turs on (**True**) or off (**False**) the caching algorithm
:type: Boolean
'''
return self._use_cache

@use_cache.setter
def use_cache(self, use_cache):
''' toggle using the cache or not '''
self._use_cache = bool(use_cache)

@property
def rate_limit_min_wait(self):
''' Time to wait between calls
Expand Down Expand Up @@ -267,7 +284,7 @@ def set_api_url(self, api_url='http://{lang}.wikipedia.org/w/api.php',
try:
self._get_site_info()
self.__supported_languages = None # reset this
except Exception:
except MediaWikiException:
# reset api url and lang in the event that the exception was caught
self._api_url = old_api_url
self._lang = old_lang
Expand Down Expand Up @@ -320,7 +337,6 @@ def random(self, pages=1):

if len(titles) == 1:
return titles[0]

return titles
# end random

Expand Down Expand Up @@ -352,15 +368,14 @@ def search(self, query, results=10, suggestion=False):

self._check_error_response(raw_results, query)

search_results = (d['title'] for d in raw_results['query']['search'])
search_results = [d['title'] for d in raw_results['query']['search']]

if suggestion:
sug = None
if raw_results['query'].get('searchinfo'):
sug = raw_results['query']['searchinfo']['suggestion']
return list(search_results), sug

return list(search_results)
return search_results, sug
return search_results
# end search

@memoize
Expand Down Expand Up @@ -432,9 +447,7 @@ def test_lat_long(val):

self._check_error_response(raw_results, title)

res = (d['title'] for d in raw_results['query']['geosearch'])

return list(res)
return [d['title'] for d in raw_results['query']['geosearch']]

@memoize
def opensearch(self, query, results=10, redirect=True):
Expand Down Expand Up @@ -502,11 +515,7 @@ def prefixsearch(self, prefix, results=10):

self._check_error_response(raw_results, prefix)

res = list()
for rec in raw_results['query']['prefixsearch']:
res.append(rec['title'])

return res
return [rec['title'] for rec in raw_results['query']['prefixsearch']]

@memoize
def summary(self, title, sentences=0, chars=0, auto_suggest=True,
Expand Down Expand Up @@ -641,7 +650,7 @@ def __cat_tree_rec(cat, depth, tree, level, categories, links):
break
except PageError:
raise PageError('Category:{0}'.format(cat))
except Exception:
except:
tries = tries + 1
time.sleep(1)
else:
Expand Down Expand Up @@ -773,27 +782,34 @@ def _get_site_info(self):
'siprop': 'extensions|general'
})

# shouldn't a check for success be done here?
gen = response['query']['general']
# parse what we need out here!
query = response.get('query', None)
if query is None or query.get('general', None) is None:
raise MediaWikiException('Missing query in response')

gen = query.get('general', None)

api_version = gen['generator'].split(' ')[1].split('-')[0]

major_minor = api_version.split('.')
for i, item in enumerate(major_minor):
major_minor[i] = int(item)
self._api_version = tuple(major_minor)
self._api_version_str = '.'.join([str(x) for x in self._api_version])

# parse the base url out
tmp = gen['server']
tmp = gen.get('server', '')
if tmp == '':
raise MediaWikiException('Unable to parse base url')
if tmp.startswith('http://') or tmp.startswith('https://'):
self._base_url = tmp
elif gen['base'].startswith('https:'):
self._base_url = 'https:{}'.format(tmp)
else:
self._base_url = 'http:{}'.format(tmp)

self._extensions = set()
for ext in response['query']['extensions']:
self._extensions.add(ext['name'])
self._extensions = [ext['name'] for ext in query['extensions']]
self._extensions = sorted(list(set(self._extensions)))
# end _get_site_info

@staticmethod
Expand All @@ -811,8 +827,6 @@ def _check_error_response(response, query):
raise MediaWikiGeoCoordError(err)
else:
raise MediaWikiException(err)
else:
return

@staticmethod
def _check_query(value, message):
Expand Down
50 changes: 24 additions & 26 deletions mediawiki/mediawikipage.py
Expand Up @@ -258,7 +258,8 @@ def hatnotes(self):

@property
def references(self):
''' External links, or references, listed anywhere on the MediaWiki page
''' External links, or references, listed anywhere on the MediaWiki \
page
:getter: Returns the list of all external links
:setter: Not settable
Expand All @@ -269,10 +270,8 @@ def references(self):
'''
if self._references is False:
params = {'prop': 'extlinks', 'ellimit': 'max'}
self._references = list()
for link in self._continued_query(params):
self._references.append(link['*'])
self._references = sorted(self._references)
tmp = [link['*'] for link in self._continued_query(params)]
self._references = sorted(tmp)
return self._references

@property
Expand All @@ -284,18 +283,21 @@ def categories(self):
:type: list
'''
if self._categories is False:
self._categories = list()

def _get_cat(val):
''' parse the category correctly '''
tmp = val['title']
if tmp.startswith('Category:'):
return tmp[9:]
return tmp

params = {
'prop': 'categories',
'cllimit': 'max',
'clshow': '!hidden'
}
for link in self._continued_query(params):
cat = link['title']
if cat.startswith('Category:'):
cat = cat[9:]
self._categories.append(cat)
self._categories = sorted(self._categories)
tmp = [_get_cat(link) for link in self._continued_query(params)]
self._categories = sorted(tmp)
return self._categories

@property
Expand Down Expand Up @@ -338,9 +340,8 @@ def links(self):
'plnamespace': 0,
'pllimit': 'max'
}
for link in self._continued_query(params):
self._links.append(link['title'])
self._links = sorted(self._links)
tmp = [link['title'] for link in self._continued_query(params)]
self._links = sorted(tmp)
return self._links

@property
Expand All @@ -359,9 +360,8 @@ def redirects(self):
'rdprop': 'title',
'rdlimit': 'max'
}
for link in self._continued_query(params):
self._redirects.append(link['title'])
self._redirects = sorted(self._redirects)
tmp = [link['title'] for link in self._continued_query(params)]
self._redirects = sorted(tmp)
return self._redirects

@property
Expand All @@ -382,9 +382,9 @@ def backlinks(self):
'blfilterredir': 'nonredirects',
'blnamespace': 0
}
for link in self._continued_query(params, 'backlinks'):
self._backlinks.append(link['title'])
self._backlinks = sorted(self._backlinks)
tmp = [link['title']
for link in self._continued_query(params, 'backlinks')]
self._backlinks = sorted(tmp)
return self._backlinks

@property
Expand Down Expand Up @@ -558,14 +558,14 @@ def _raise_disambiguation_error(self, page, pageid):
lis = BeautifulSoup(html, 'html.parser').find_all('li')
filtered_lis = [li for li in lis if 'tocsection' not in
''.join(li.get('class', list()))]
may_refer_to = [li.a.get_text()
for li in filtered_lis if li.a]
may_refer_to = [li.a.get_text() for li in filtered_lis if li.a]

disambiguation = list()
for lis_item in filtered_lis:
item = lis_item.find_all('a')
one_disambiguation = dict()
one_disambiguation['description'] = lis_item.text
if item:
if item and hasattr(item, 'title'):
one_disambiguation['title'] = item[0]['title']
else:
# these are non-linked records so double up the text
Expand All @@ -584,7 +584,6 @@ def _handle_redirect(self, redirect, preload, query, page):
normalized = query['normalized'][0]
if normalized['from'] != self.title:
raise MediaWikiException(ODD_ERROR_MESSAGE)
# assert normalized['from'] == self.title, ODD_ERROR_MESSAGE
from_title = normalized['to']
else:
if not getattr(self, 'title', None):
Expand All @@ -593,7 +592,6 @@ def _handle_redirect(self, redirect, preload, query, page):
from_title = self.title
if redirects['from'] != from_title:
raise MediaWikiException(ODD_ERROR_MESSAGE)
# assert redirects['from'] == from_title, ODD_ERROR_MESSAGE

# change the title and reload the whole object
self.__init__(self.mediawiki, title=redirects['to'],
Expand Down

0 comments on commit 243e7a0

Please sign in to comment.