Skip to content

Commit

Permalink
Add wikibase, wikibase_item, entities and claims.
Browse files Browse the repository at this point in the history
Wikibase and wikibase_item():
>>> import mwclient
>>> site = mwclient.Site(('https', 'fr.wikipedia.org'))
>>> p = site.Pages['Nicolas Sarkozy']
>>> p.wikibase_item()
<Item object Q329 (('https', u'www.wikidata.org'))>
>>> site.wikibase()
<WikiBaseSite object '('https', u'www.wikidata.org')/w/'>

Added entities() to WikiBaseSite
It works this way
>>> import mwclient
>>> site = mwclient.WikiBaseSite(('https', 'www.wikidata.org'))
>>> ent = site.entities('Q422')
>>> q422 = ent[0]
>>> q422
<Item object Q422 (('https', 'www.wikidata.org'))>
>>> q422.claims()

Fetching labels, descriptions and sitelinks for entities
>>> import mwclient
>>> site = mwclient.WikiBaseSite(('https', 'www.wikidata.org'))
>>> q = mwclient.entity.Item(site, 'Q42')
>>> q.sitelinks
>>> q.labels
>>> q.descriptions

ids filter for entities():
>>> import mwclient
>>> wikidata = mwclient.WikiBaseSite(('https', 'www.wikidata.org'))
>>> entities = wikidata.entities(ids=['Q42', 'P238'])
>>> for entity in entities:
>>>    print entity
<Property object P238 (('https', 'www.wikidata.org'))>
<Item object Q42 (('https', 'www.wikidata.org'))>

Property filter in claims():
>> import mwclient
>> site = mwclient.Site(('https', 'en.wikipedia.org'))
>> article = site.Pages['Nicolas Sarkozy']
>> item = article.wikibase_item()
>> #only distinction
>> for claim in item.claims(prop=['P166']):
>>     print claim
>> # All Claims
>> for claim in item.claims():
>>     print claim

Claim object have now both properties:
* `snaktype`
* `value` set to `None` if `snaktype` is either `somevalue` or `novalue`
  • Loading branch information
PierreSelim committed May 19, 2019
1 parent 72338f6 commit 898fa0b
Show file tree
Hide file tree
Showing 4 changed files with 356 additions and 2 deletions.
5 changes: 3 additions & 2 deletions mwclient/__init__.py
Expand Up @@ -23,8 +23,9 @@
OTHER DEALINGS IN THE SOFTWARE.
"""

from mwclient.errors import * # pylint: disable=unused-import
from mwclient.client import Site, __ver__ # pylint: disable=unused-import
from mwclient.errors import * # pylint: disable=unused-import
from mwclient.client import Site, WikiBaseSite, __ver__ # pylint: disable=unused-import
from mwclient import entity
import logging
import warnings

Expand Down
49 changes: 49 additions & 0 deletions mwclient/client.py
Expand Up @@ -16,6 +16,7 @@

import mwclient.errors as errors
import mwclient.listing as listing
import mwclient.entity as entity
from mwclient.sleep import Sleepers
from mwclient.util import parse_timestamp, read_in_chunks

Expand Down Expand Up @@ -118,6 +119,9 @@ def __init__(self, host, path='/w/', ext='.php', pool=None, retry_timeout=30,
self.Categories = self.categories
self.Images = self.images

# wikibase caching
self._wikibase_repository = None

# Initialization status
self.initialized = False

Expand Down Expand Up @@ -1107,3 +1111,48 @@ def ask(self, query, title=None):
answers = results['query'].get('results') or {}
for key, value in answers.items():
yield {key: value}

@property
def wikibase_repository(self):
"""Wiki base repository."""
if self._wikibase_repository is None:
result = self.api('query', meta='wikibase')
url = result['query']['wikibase']['repo']['url']
method = 'https'
host = url['base'].replace('//', '')
if '://' in url['base']:
method, host = url['base'].split('://')
path = url['scriptpath'] + "/"
self._wikibase_repository = WikiBaseSite(('https', host),
path=path,
pool=self.connection)
return self._wikibase_repository


class WikiBaseSite(Site):

"""WikiBaseSite object to access to WikiBase API."""

def __repr__(self):
"""Representation of the WikiBaseSite object."""
return "<WikiBaseSite object '%s%s'>" % (self.host, self.path)

def entities(self, ids):
"""Returns entities.
API doc: https://www.mediawiki.org/wiki/Wikibase/API/en#wbgetentities
Args:
ids (list): ID or IDs of the entities to fetch."""
result = self.api('wbgetentities', ids="|".join(ids))
entities = []
for entityid in result['entities']:
if result['entities'][entityid]['type'] == 'item':
item = entity.Item(self, entityid)
item.setinfofromwbgetentities(result['entities'][entityid])
entities.append(item)
elif result['entities'][entityid]['type'] == 'property':
prop = entity.Property(self, entityid)
prop.setinfofromwbgetentities(result['entities'][entityid])
entities.append(prop)
return entities
277 changes: 277 additions & 0 deletions mwclient/entity.py
@@ -0,0 +1,277 @@
"""WikiBase Entities and related objects."""


class Entity(object):

"""Wikibase Entity, either Item or Property.
This class should not be implemented directly,
It is meant to be abstract for Item and Property.
Attributes:
site (WikiBaseSite): reference to a WikiBaseSite
entity (str): Q number of the entity.
descriptions (dict): dictionary containing description per language
labels (dict): dictionary containing labels per language
"""

def __init__(self, site, normalized_entity):
"""Common part of constructor for Item and Property."""
self.site = site

self.entity = normalized_entity
# caching descriptions, labels, sitelinks
# self._sitelinks = None
self._descriptions = None
self._labels = None

# caching claims
self._itemclaims = None

def setinfofromwbgetentities(self, result):
"""Set descriptions, labels and claims from wbgetentities result."""
self._descriptions = dict()
for language in result['descriptions']:
lang = result['descriptions'][language]['language']
value = result['descriptions'][language]['value']
self._descriptions[lang] = value
self._labels = dict()
for language in result['labels']:
lang = result['labels'][language]['language']
value = result['labels'][language]['value']
self._labels[lang] = value
if self._itemclaims is None:
self._itemclaims = []
for prop in result['claims']:
for claim in result['claims'][prop]:
mainsnak = claim['mainsnak']
self._itemclaims.append(Claim.fromsnak(self.site,
mainsnak))

@property
def labels(self):
"""Labels dictionary per language"""
if self._labels is None:
entities = self.site.api('wbgetentities', ids=self.entity)
result = entities['entities'][self.entity]
self.setinfofromwbgetentities(result)
return self._labels

@property
def descriptions(self):
"""Descriptions dictionary per language"""
if self._descriptions is None:
entities = self.site.api('wbgetentities', ids=self.entity)
result = entities['entities'][self.entity]
self.setinfofromwbgetentities(result)
return self._descriptions

def claims(self, prop=None):
"""Claims about an Entity.
API Doc: https://www.mediawiki.org/wiki/Wikibase/API/en#wbgetclaims
We will probably need to implement rank and props later on.
Args:
prop (list, optional): list of property e.g. ['P238', 'P239']
"""
if self._itemclaims is None:
self._itemclaims = []
info = self.site.api('wbgetclaims', entity=self.entity)['claims']

for propid in info:
for claim in info[propid]:
mainsnak = claim['mainsnak']
self._itemclaims.append(Claim.fromsnak(self.site,
mainsnak))
if prop is None:
return self._itemclaims
else:
return [claim for claim in self._itemclaims if claim.prop in prop]


class Item(Entity):

"""Wikibase Item.
Attributes:
site (WikiBaseSite): reference to a WikiBaseSite
entity (str): Q number of the entity.
sitelinks (dict): dictionary containing sitelinks per wiki
descriptions (dict): dictionary containing description per language
labels (dict): dictionary containing labels per language
"""

def __init__(self, site, entity):
"""Constructor.
Args:
site (WikiBaseSite): reference to a WikiBaseSite
entity (str): Q number of the entity.
"""
# Normalizing entity name
super(Item, self).__init__(site, 'Q' + entity.upper().lstrip('Q'))

self._sitelinks = None

def setinfofromwbgetentities(self, result):
"""Set sitelinks, descriptions, labels, claims from wbgetentities."""
super(Item, self).setinfofromwbgetentities(result)
self._sitelinks = dict()
for wiki in result['sitelinks']:
site = result['sitelinks'][wiki]['site']
title = result['sitelinks'][wiki]['title']
badges = result['sitelinks'][wiki]['badges']
self._sitelinks[site] = {'title': title, 'badges': badges}

@property
def sitelinks(self):
"""Sitelinks dictionary with title, and badges per site.
Example:
>>> import mwclient
>>> site = mwclient.WikiBaseSite(('https', 'www.wikidata.org'))
>>> q = mwclient.entity.Item(site, 'Q3340172')
>>> q.sitelinks
"""
if self._sitelinks is None:
entities = self.site.api('wbgetentities', ids=self.entity)
result = entities['entities'][self.entity]
self.setinfofromwbgetentities(result)
return self._sitelinks

def __repr__(self):
"""Item representation."""
return "<Item object %s (%s)>" % (self.entity, self.site.host)


class Property(Entity):

"""Wikibase Property."""

def __init__(self, site, entity):
"""Constructor.
Args:
site (WikiBaseSite): reference to a WikiBaseSite
entity (str): Q number of the entity.
"""
# Normalizing entity name
super(Property, self).__init__(site, 'P' + entity.upper().lstrip('P'))

def __repr__(self):
"""Property representation."""
return "<Property object %s (%s)>" % (self.entity, self.site.host)


class Claim(object):

"""Claim
Attributes:
prop (str): property id.
snak (dict): snak with all values return in mainsnak from API call.
snaktype (str): 'value', 'somevalue' or 'novalue'
datatype (str): datatype ('wikibase-item', 'string', etc.)
raw_value (dict): content of snak['datavalue']['value'] if snaktype is
'value', None othewise.
value (object): typed content of snak['datavalue']['value']
"""

def __init__(self, site, prop, datatype, snaktype, raw_value=None, snak=None):
"""Constructor"""
self.site = site
self.prop = prop
self.datatype = datatype
self.raw_value = raw_value
self.snaktype = snaktype
self.snak = snak

@classmethod
def fromsnak(cls, site, snak):
"""Claim from snak dictionary.
Args:
site (mwclient.WikiBaseSite): site
snak (dict): snak dictionary
"""
snakvalue = None
if snak['snaktype'] == 'value':
snakvalue = snak['datavalue']['value']
return cls(site, snak['property'],
snak['datatype'],
snak['snaktype'],
raw_value=snakvalue,
snak=snak)

def __repr__(self):
"""Representation."""
return "<Claim object %s [%s]>" % (self.prop, self.datatype)


@property
def value(self):
if self.datatype == 'string':
return self.raw_value
elif self.datatype == 'monolingualtext':
return MonolingualText(**self.raw_value)
elif self.datatype == 'commonsMedia':
return self.raw_value
elif self.datatype == 'external-id':
return self.raw_value
elif self.datatype == 'wikibase-item':
return Item(self.site, str(self.raw_value['numeric-id']))
elif self.datatype == 'wikibase-property':
return Property(self.site, str(self.raw_value['numeric-id']))
elif self.datatype == 'globe-coordinate':
return GlobeCoordinate(**self.raw_value)
elif self.datatype == 'time':
return TimeData(**self.raw_value)
elif self.datatype == 'quantity':
return Quantity(**self.raw_value)
else:
return self.raw_value


class GlobeCoordinate(object):
def __init__(self, latitude=None, longitude=None, altitude=None, precision=None,
globe=None):
self.latitude = latitude
self.longitude = longitude
self.altitude = altitude
self.precision = precision
self.globe = globe

def __repr__(self):
return "<GlobeCoordinate {}... {}... {}... ...>".format(
self.latitude,
self.longitude,
self.altitude)


class TimeData(object):
def __init__(self, time=None, timezone=None, before=None, after=None, precision=None,
calendarmodel=None):
self.time = time
self.timezone = timezone
self.before = before
self.after = after
self.precision = precision
self.calendarmodel = calendarmodel


class Quantity(object):
def __init__(self, amount=None, unit=None, upperBound=None, lowerBound=None):
self.amount = amount
self.unit = unit
self.upperBound = upperBound
self.lowerBound = lowerBound


class MonolingualText(object):
def __init__(self, text=None, language=None):
self.text = text
self.language = language

def __repr__(self):
return "<MonolingualText [%s] '%s'>" % (self.language, self.text)

0 comments on commit 898fa0b

Please sign in to comment.