Skip to content

Commit

Permalink
Merge pull request #526 from spencermountain/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
spencermountain committed Mar 24, 2023
2 parents 72df976 + a2bb8e7 commit a4c36b1
Show file tree
Hide file tree
Showing 22 changed files with 500 additions and 128 deletions.
15 changes: 10 additions & 5 deletions README.md
Expand Up @@ -359,22 +359,27 @@ the fetch method follows redirects.

### API plugin

**wtf.category(title, [lang], [options | callback])**
**wtf.getCategoryPages(title, [options])**

retrieves all pages and sub-categories belonging to a given category:

```js
wtf.extend(require('wtf-plugin-api'))
let result = await wtf.category('Category:Politicians_from_Paris')
let result = await wtf.getCategoryPages('Category:Politicians_from_Paris')
/*
{
pages: [{title: 'Paul Bacon', pageid: 1266127 }, ...],
categories: [ {title: 'Category:Mayors of Paris' } ]
[
{"pageid":52502362,"ns":0,"title":"William Abitbol"},
{"pageid":50101413,"ns":0,"title":"Marie-Joseph Charles des Acres de L'Aigle"}
...
{"pageid":62721979,"ns":14,"title":"Category:Councillors of Paris"},
{"pageid":856891,"ns":14,"title":"Category:Mayors of Paris"}
]
}
*/
```

**wtf.random([lang], [options], [callback])**
**wtf.random([options])**

fetches a random wikipedia article, from a given language or domain

Expand Down
38 changes: 29 additions & 9 deletions plugins/api/README.md
Expand Up @@ -125,17 +125,37 @@ wtf.getRandomCategory({lang:'fr'}).then(cat=>{
```

## Category Pages
fetch+parse all documents in a given category, to a specific depth.
fetch all documents and sub-categories in a given category. Only returns identifying information for the page, not the actual page content.
```js
// get the first sentence of all MLB stadiums:
wtf.getCategoryPages('Major League Baseball venues').then(docs => {
docs.map(doc => doc.sentence(0).text())
wtf.getCategoryPages('Major League Baseball venues').then(pages => {
pages.map(page => page.title)
// [
// 'Fenway park is a sports complex and major league baseball stadium...',
// 'Rogers Center is a entertainment venue ...'
// 'List of current Major League Baseball stadiums',
// 'List of former Major League Baseball stadiums'
// ...
// 'Category:Spring training ballparks',
// 'Category:Wrigley Field'
//]
})
```
Pages can be retrieved cursively from all sub-categories by passing `recursive: true` as part of options:
```js
wtf.getCategoryPages('Major League Baseball venues', {recursive: true})
```
To exclude certain categories from being expanded, specify these as part of `categoryExclusions`. The categories to exclude must be specified with the `Category:` prefix, but without the underscores commonly seen in wikipedia page titles. Note that the category pages themselves will still be returned, but the pages within those sub-categories will not.
```js
wtf.getCategoryPages('Major League Baseball venues', {
recursive: true,
categoryExclusions: [
'Category:Defunct Major League Baseball venues',
'Category:Major League ballpark logos'
]
})
```
As a safety limit, a maximum depth can be specified which limits how many sub-categories recursive mode will traverse down. **This is off by default.**
```js
wtf.getCategoryPages('Major League Baseball venues', {recursive: true, maxDepth: 2})
```


## Template pages
Expand Down Expand Up @@ -184,8 +204,8 @@ docs.forEach((doc) => {
* **doc.getIncoming()** - fetch all pages that link to this document
* **doc.getPageViews()** - daily traffic report for this document

* **wtf.getRandomCategory()** -
* **wtf.getTemplatePages()** -
* **wtf.getCategoryPages()** -
* **wtf.getRandomCategory()** - get the name of a random wikipedia category
* **wtf.getTemplatePages()** - fetches all pages that use a specific template or infobox
* **wtf.getCategoryPages()** - fetch all pages in a specified category

MIT
157 changes: 124 additions & 33 deletions plugins/api/builds/wtf-plugin-api.cjs
@@ -1,10 +1,41 @@
/* wtf-plugin-api 1.0.1 MIT */
/* wtf-plugin-api 2.0.0 MIT */
(function (global, factory) {
typeof exports === 'object' && typeof module !== 'undefined' ? module.exports = factory() :
typeof define === 'function' && define.amd ? define(factory) :
(global = typeof globalThis !== 'undefined' ? globalThis : global || self, global.wtfApi = factory());
})(this, (function () { 'use strict';

/**
* factory for header options
*
* @private
* @param {object} options
* @returns {object} the generated options
*/
const makeHeaders = function (options) {
let agent =
options.userAgent || options['User-Agent'] || options['Api-User-Agent'] || 'User of the wtf_wikipedia library';

let origin;
if (options.noOrigin) {
origin = '';
} else {
origin = options.origin || options.Origin || '*';
}

return {
method: 'GET',
headers: {
'Content-Type': 'application/json',
'Api-User-Agent': agent,
'User-Agent': agent,
Origin: origin,
'Accept-Encoding': 'gzip',
},
redirect: 'follow',
}
};

function normalize(title = '') {
title = title.replace(/ /g, '_');
title = title.trim();
Expand All @@ -24,8 +55,9 @@
return arr.join('&')
}

function fetchOne(url, http, prop) {
return http(url).then((res) => {
function fetchOne(url, options, http, prop) {
const headers = makeHeaders(options);
return http(url, headers).then((res) => {
let pages = Object.keys(res.query.pages || {});
if (pages.length === 0) {
return { pages: [], cursor: null }
Expand Down Expand Up @@ -60,13 +92,14 @@
return url
};

const getRedirects = async function (title, http) {
const getRedirects = async function (title, options, http) {
options = { ...defaults, ...options };
let list = [];
let getMore = true;
let append = '';
while (getMore) {
let url = makeUrl$5(title, defaults, append);
let { pages, cursor } = await fetchOne(url, http, 'redirects');
let url = makeUrl$5(title, options, append);
let { pages, cursor } = await fetchOne(url, options, http, 'redirects');
list = list.concat(pages);
if (cursor && cursor.rdcontinue) {
append = '&rdcontinue=' + cursor.lhcontinue;
Expand Down Expand Up @@ -101,13 +134,14 @@
return url
};

const getIncoming = async function (title, http) {
const getIncoming = async function (title, options, http) {
options = { ...defaults, ...options };
let list = [];
let getMore = true;
let append = '';
while (getMore) {
let url = makeUrl$4(title, defaults, append);
let { pages, cursor } = await fetchOne(url, http, 'linkshere');
let url = makeUrl$4(title, options, append);
let { pages, cursor } = await fetchOne(url, options, http, 'linkshere');
list = list.concat(pages);
if (cursor && cursor.lhcontinue) {
append = '&lhcontinue=' + cursor.lhcontinue;
Expand Down Expand Up @@ -139,9 +173,11 @@
return url
};

const getPageViews = function (doc, http) {
let url = makeUrl$3(doc.title(), defaults);
return http(url).then((res) => {
const getPageViews = function (doc, options, http) {
options = { ...defaults, ...options };
let url = makeUrl$3(doc.title(), options);
const headers = makeHeaders(options);
return http(url, headers).then((res) => {
let pages = Object.keys(res.query.pages || {});
if (pages.length === 0) {
return []
Expand Down Expand Up @@ -176,12 +212,13 @@

// fetch all the pages that use a specific template
const getTransclusions = async function (template, _options, http) {
let options = { ...defaults, ..._options };
let list = [];
let getMore = true;
let append = '';
while (getMore) {
let url = makeUrl$2(template, defaults, append);
let { pages, cursor } = await fetchOne(url, http, 'transcludedin');
let url = makeUrl$2(template, options, append);
let { pages, cursor } = await fetchOne(url, options, http, 'transcludedin');
list = list.concat(pages);
if (cursor && cursor.ticontinue) {
append = '&ticontinue=' + cursor.ticontinue;
Expand All @@ -197,14 +234,15 @@
list: 'categorymembers',
cmlimit: 500,
cmtype: 'page|subcat',
cmnamespace: 0,
cmnamespace: '0|14',
format: 'json',
origin: '*',
redirects: true
};

const fetchIt$1 = function (url, http, prop) {
return http(url).then((res) => {
const fetchIt$1 = function (url, options, http, prop) {
const headers = makeHeaders(options);
return http(url, headers).then((res) => {
let pages = Object.keys(res.query[prop] || {});
if (pages.length === 0) {
return { pages: [], cursor: null }
Expand All @@ -227,30 +265,80 @@
title = 'Category:' + title;
}
url += `&cmtitle=${normalize(title)}`;
url += `&cmprop=ids|title|type`;
if (append) {
url += append;
}
return url
};

const getCategory = async function (title, options, http) {
options = { ...defaults, ...options };
const getOneCategory = async function (title, options, http) {
let list = [];
let getMore = true;
let append = '';
while (getMore) {
let url = makeUrl$1(title, options, append);
let { pages, cursor } = await fetchIt$1(url, http, 'categorymembers');
let { pages, cursor } = await fetchIt$1(url, options, http, 'categorymembers');
list = list.concat(pages);
if (cursor && cursor.cmcontinue) {
append = '&cmcontinue=' + cursor.lhcontinue;
append = '&cmcontinue=' + cursor.cmcontinue;
} else {
getMore = false;
}
}
return list
};

async function getCategoriesRecursively(
title,
options,
exclusions,
maxDepth,
currentDepth,
pagesSeen,
http
) {
let results = await getOneCategory(title, options, http);
//check if we should recur - either if maxDepth not set or if we're not going to exceed it in this recursion
if (maxDepth === undefined || currentDepth < maxDepth) {
let categories = results.filter((entry) => entry.type === 'subcat');
if (exclusions) {
categories = categories.filter((category) => !exclusions.includes(category.title));
}
//prevent infinite loops by discarding any subcats we've already seen
categories = categories.filter((category) => !pagesSeen.includes(category.title));
pagesSeen.push(...categories.map((category) => category.title));
const subCatResults = [];
for (let category of categories) {
let subCatResult = await getCategoriesRecursively(
category.title,
options,
exclusions,
maxDepth,
currentDepth + 1,
pagesSeen,
http
);
subCatResults.push(subCatResult);
}
return results.concat(...subCatResults)
} else {
return results
}
}

async function getCategory(title, options, http) {
options = { ...defaults, ...options };
let exclusions = options?.categoryExclusions;
let recursive = options?.recursive === true;
let maxDepth = options?.maxDepth;
if (recursive) {
return await getCategoriesRecursively(title, options, exclusions, maxDepth, 0, [], http)
} else {
return await getOneCategory(title, options, http)
}
}

const params$1 = {
action: 'query',
generator: 'random',
Expand All @@ -264,8 +352,9 @@
redirects: 'true'
};

const fetchIt = function (url, http) {
return http(url).then((res) => {
const fetchIt = function (url, options, http) {
const headers = makeHeaders(options);
return http(url, headers).then((res) => {
let pages = Object.keys(res.query.pages || {});
if (pages.length === 0) {
return { pages: [], cursor: null }
Expand All @@ -284,10 +373,11 @@
};

const getRandom = async function (_options, http, wtf) {
let url = makeUrl(defaults);
let options = { ...defaults, ..._options };
let url = makeUrl(options);
let page = {};
try {
page = await fetchIt(url, http) || {};
page = await fetchIt(url, options, http) || {};
} catch (e) {
console.error(e);
}
Expand Down Expand Up @@ -317,7 +407,8 @@
url = `https://${options.domain}/${options.path}?`;
}
url += toUrlParams(params);
return http(url)
const headers = makeHeaders(options);
return http(url, headers)
.then((res) => {
try {
let o = res.query.pages;
Expand Down Expand Up @@ -460,13 +551,13 @@
const addMethod = function (models) {
// doc methods
models.Doc.prototype.getRedirects = function () {
return getRedirects(this.title(), models.http)
return getRedirects(this.title(), this.options(), models.http)
};
models.Doc.prototype.getIncoming = function () {
return getIncoming(this.title(), models.http)
return getIncoming(this.title(), this.options(), models.http)
};
models.Doc.prototype.getPageViews = function () {
return getPageViews(this, models.http)
return getPageViews(this, this.options(), models.http)
};

// constructor methods
Expand All @@ -485,11 +576,11 @@
models.wtf.fetchList = function (list, options) {
return fetchList(list, options, models.wtf)
};
models.wtf.getIncoming = function (title) {
return getIncoming(title, models.http)
models.wtf.getIncoming = function (title, options) {
return getIncoming(title, options, models.http)
};
models.wtf.getRedirects = function (title) {
return getRedirects(title, models.http)
models.wtf.getRedirects = function (title, options) {
return getRedirects(title, options, models.http)
};
// aliases
models.wtf.random = models.wtf.getRandomPage;
Expand Down

0 comments on commit a4c36b1

Please sign in to comment.