From 703af7ccd093271309870f5f94120fed7dd1a41d Mon Sep 17 00:00:00 2001 From: Dag-Inge Aas <284996+dagingaa@users.noreply.github.com> Date: Thu, 23 Nov 2023 10:42:11 +0100 Subject: [PATCH 01/16] Add support for Norwegian redirects Useful for parsing the Norwegian wikipedia dumps, to avoid redirects when processing the data dump --- src/_data/redirects.js | 1 + 1 file changed, 1 insertion(+) diff --git a/src/_data/redirects.js b/src/_data/redirects.js index a52a0b04..2d538484 100644 --- a/src/_data/redirects.js +++ b/src/_data/redirects.js @@ -45,4 +45,5 @@ export default [ 'リダイレクト',//ja '転送', //ja '重定向',//zh + 'omdirigering',//no ] From 4b2eec1a9e940bf24ebfe3574d7aa4d540f0df30 Mon Sep 17 00:00:00 2001 From: spencer kelly Date: Thu, 23 Nov 2023 08:45:03 -0500 Subject: [PATCH 02/16] tmp --- src/_data/redirects.js | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/_data/redirects.js b/src/_data/redirects.js index 2d538484..afcad9d8 100644 --- a/src/_data/redirects.js +++ b/src/_data/redirects.js @@ -1,15 +1,15 @@ export default [ - 'aanstuur',//af + 'aanstuur', //af 'adkas', //br - 'alih',//id + 'alih', //id 'aýdaw', - 'beralîkirin',//ku + 'beralîkirin', //ku 'doorverwijzing', //nl - 'lencong',//ms + 'lencong', //ms 'ohjaus', 'patrz', //pl 'přesměruj', - 'preusmjeri',//hr + 'preusmjeri', //hr 'redireccion', 'redirección', //es 'redirecionamento', //pt @@ -24,26 +24,26 @@ export default [ 'yönlendirme', 'yönlendi̇rme', //tr 'ανακατευθυνση', //el - 'айдау',//kk + 'айдау', //kk 'перанакіраваньне', - 'перенаправление',//ru + 'перенаправление', //ru 'перенаправлення', //uk 'пренасочување', //mk - 'преусмери',//sr + 'преусмери', //sr 'преусмјери', - 'ווייטערפירן',//yi - 'تحويل',//ar + 'ווייטערפירן', //yi + 'تحويل', //ar 'تغییر_مسیر', 'تغییرمسیر', //fa - 'رجوع مکرر',//ur - 'رجوع_مکرر',//ur - 'अनुप्रेषित',//hi - 'पुनर्निर्देशन',//hi - 'পুননির্দেশ',//bn + 'رجوع مکرر', //ur + 'رجوع_مکرر', //ur + 'अनुप्रेषित', //hi + 'पुनर्निर्देशन', //hi + 'পুননির্দেশ', //bn 'เปลี่ยนทาง', //th 'ប្តូរទីតាំងទៅ', //km - 'リダイレクト',//ja + 'リダイレクト', //ja '転送', //ja - '重定向',//zh - 'omdirigering',//no + '重定向', //zh + 'omdirigering', //no ] From bdd3abc358ed007e5b79c4c61ba7111dbe11212f Mon Sep 17 00:00:00 2001 From: spencer kelly Date: Thu, 23 Nov 2023 08:47:56 -0500 Subject: [PATCH 03/16] more redirect templates --- src/_data/redirects.js | 46 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/src/_data/redirects.js b/src/_data/redirects.js index afcad9d8..b0dff78c 100644 --- a/src/_data/redirects.js +++ b/src/_data/redirects.js @@ -1,36 +1,71 @@ export default [ 'aanstuur', //af + 'aastiurey', 'adkas', //br + 'ailgyfeirio', + 'alidirekto', 'alih', //id 'aýdaw', + 'baw-ing', 'beralîkirin', //ku + 'birzuzendu', + 'đổi hướng đến đây', 'doorverwijzing', //nl + 'header', + 'i̇stiqamətləndirmə', 'lencong', //ms + 'ohjaa tänne', 'ohjaus', + 'omdirigering', //no + 'pāradresācija', 'patrz', //pl + 'přesměrování', 'přesměruj', + 'preusmeritev', + 'preusmjerava', + 'preusmjerenje', 'preusmjeri', //hr + 'przekierowanie', + 'redir', + 'redirecció', 'redireccion', 'redirección', //es 'redirecionamento', //pt 'redirect', //en + 'redirect3', 'redirection', //fr + 'redirige aquí', + 'redirige', + 'redirixe equí', + 'rindirizz', 'rinvia', //it + 'stivre deike', + 'suunamine', 'tilvísun', + 'trimite', 'uudelleenohjaus', - 'weiterleitung', 'weiterleitung', //de + 'weiterleitungshinweis', + 'yoʻnaltirish', 'yönlendi̇r', - 'yönlendirme', 'yönlendi̇rme', //tr 'ανακατευθυνση', //el 'айдау', //kk + 'багыттама', + 'буссинаби', + 'дӏасахьажорг', + 'от пренасочване', + 'перанакіраванне', 'перанакіраваньне', + 'перанакіроўваецца сюды', 'перенаправление', //ru 'перенаправлення', //uk + 'перенаправлено', 'пренасочување', //mk + 'преусмерава ', 'преусмери', //sr 'преусмјери', + 'равонакунӣ', 'ווייטערפירן', //yi 'تحويل', //ar 'تغییر_مسیر', @@ -40,10 +75,13 @@ export default [ 'अनुप्रेषित', //hi 'पुनर्निर्देशन', //hi 'পুননির্দেশ', //bn - 'เปลี่ยนทาง', //th + 'পুনর্নির্দেশ', + 'යළියොමුව', + 'เปลี่ยนทาง', 'ប្តូរទីតាំងទៅ', //km + '다른 뜻 넘어옴', 'リダイレクト', //ja + '跳轉', '転送', //ja '重定向', //zh - 'omdirigering', //no ] From a6f2a6fa59ff4654ba1b209dc292ab228dcb83c5 Mon Sep 17 00:00:00 2001 From: spencer kelly Date: Thu, 23 Nov 2023 08:59:36 -0500 Subject: [PATCH 04/16] more disambig templates --- src/_data/disambig_templates.js | 106 +++++++++++++++++--------------- 1 file changed, 57 insertions(+), 49 deletions(-) diff --git a/src/_data/disambig_templates.js b/src/_data/disambig_templates.js index 0e39625d..6bba2ace 100644 --- a/src/_data/disambig_templates.js +++ b/src/_data/disambig_templates.js @@ -4,51 +4,10 @@ export default [ 'disambig', //en 'disambiguation', //en - // Disambiguation_message_boxes - 'letter-numbercombdisambig', - 'letter-number combination disambiguation', - 'dmbox', - 'airport disambiguation', - 'biology disambiguation', - 'call sign disambiguation', - 'caselaw disambiguation', - 'chinese title disambiguation', - 'disambiguation cleanup', - 'genus disambiguation', - 'hospital disambiguation', - 'human name disambiguation', - 'human name disambiguation cleanup', - 'letter-number combination disambiguation', - 'mathematical disambiguation', - 'military unit disambiguation', - 'music disambiguation', - 'number disambiguation', - 'opus number disambiguation', - 'phonetics disambiguation', - 'place name disambiguation', - 'portal disambiguation', - 'road disambiguation', - 'school disambiguation', - 'species latin name abbreviation disambiguation', - 'species latin name disambiguation', - 'station disambiguation', - 'synagogue disambiguation', - 'taxonomic authority disambiguation', - 'taxonomy disambiguation', - 'template disambiguation', - 'disamb2', - 'disamb3', - 'disamb4', - 'disambiguation lead', - 'disambiguation lead name', - 'disambiguation name', - 'disamb-term', - 'disamb-terms', - - - // i18n + 'aðgreining', 'aðgreining', //is 'aimai', //ja + 'airport disambiguation', 'ałtsʼáʼáztiin', //nv 'anlam ayrımı', //gag 'anlam ayrımı', //tr @@ -59,10 +18,14 @@ export default [ 'begriffsklärung', //de 'begriffsklärung', //pdc 'begriffsklearung', //bar + 'biology disambiguation', 'bisongidila', //kg 'bkl', //pfl 'bokokani', //ln 'caddayn', //so + 'call sign disambiguation', + 'caselaw disambiguation', + 'chinese title disambiguation', 'clerheans', //kw 'cudakirin', //ku 'čvor', //bs @@ -77,88 +40,129 @@ export default [ 'desambiguassiù', //lmo 'desambigui', //lfn 'dezambiguizare', //ro + 'dezanbìgua', + 'dəqiqləşdirmə', 'dəqiqləşdirmə', //az + 'disamb-term', + 'disamb-terms', + 'disamb2', + 'disamb3', + 'disamb4', 'disambigua', //it - 'disambigua', //lij - 'disambigua', //nap 'disambìgua', //sc - 'disambigua', //scn - 'disambiguasi', //id - 'disambiguasi', //su + 'disambiguasi', + 'disambiguation cleanup', + 'disambiguation lead name', + 'disambiguation lead', + 'disambiguation name', + 'disambiguazion', + 'disambigue', + 'discretiva', 'discretiva', //la 'disheñvelout', //br 'disingkek', //min 'dixanbigua', //vec 'dixebra', //ast 'diżambigwazzjoni', //mt + 'dmbox', 'doorverwijspagina', //nl 'dp', //nl - 'dp', //zea + 'dubbelsinnig', 'dubbelsinnig', //af 'dudalipen', //rmy 'dv', //nds_nl 'egyért', //hu + 'faaleaogaina', 'fleiri týdningar', //fo 'fleirtyding', //nn 'flertydig', //da 'förgrening', //sv + 'genus disambiguation', 'gì-ngiê', //cdo 'giklaro', //ceb 'gwahaniaethu', //cy 'homonimo', //io 'homónimos', //gl 'homonymie', //fr + 'hospital disambiguation', + 'huaʻōlelo puana like', 'huaʻōlelo puana like', //haw + 'human name disambiguation cleanup', + 'human name disambiguation', 'idirdhealú', //ga 'khu-pia̍t', //zh_min_nan 'kthjellim', //sq 'kujekesa', //sn + 'letter-number combination disambiguation', + 'letter-numbercombdisambig', 'maana', //sw 'maneo bin', //diq + 'mathematical disambiguation', 'mehrdüdig begreep', //nds 'menm non', //ht + 'military unit disambiguation', 'muardüüdag artiikel', //frr + 'music disambiguation', + 'myesakãrã', 'neibetsjuttings', //fy 'nozīmju atdalīšana', //lv + 'number disambiguation', 'nuorodinis', //lt 'nyahkekaburan', //ms 'omonimeye', //wa + 'omonimi', 'omonimia', //oc + 'opus number disambiguation', 'page dé frouque', //nrm 'paglilinaw', //tl 'panangilawlawag', //ilo 'pansayod', //war 'pejy mitovy anarana', //mg 'peker', //no + 'phonetics disambiguation', + 'place name disambiguation', + 'portal disambiguation', 'razdvojba', //hr 'razločitev', //sl 'razvrstavanje', //sh 'reddaghey', //gv + 'road disambiguation', 'rozcestník', //cs 'rozlišovacia stránka', //sk + 'school disambiguation', 'sclerir noziun', //rm 'selvendyssivu', //olo 'soilleireachadh', //gd + 'species latin name abbreviation disambiguation', + 'species latin name disambiguation', + 'station disambiguation', 'suzmunski', //jbo + 'synagogue disambiguation', 'täpsustuslehekülg', //et 'täsmennyssivu', //fi + 'taxonomic authority disambiguation', + 'taxonomy disambiguation', 'telplänov', //vo + 'template disambiguation', 'tlahtolmelahuacatlaliztli', //nah 'trang định hướng', //vi 'ujednoznacznienie', //pl 'verdudeliking', //li 'wěcejwóznamowosć', //dsb 'wjacezmyslnosć', //hsb + 'z', 'zambiguaçon', //mwl 'zeimeibu škiršona', //ltg 'αποσαφήνιση', //el 'айрық', //kk 'аҵакырацәа', //ab + 'бир аайы јок', 'вишезначна одредница', //sr 'ибҳомзудоӣ', //tg 'кёб магъаналы', //krc 'күп мәгънәләр', //tt 'күп мәғәнәлелек', //ba + 'массехк маӏан хилар', 'мъногосъмꙑслиѥ', //cu 'неадназначнасць', //be 'неадназначнасьць', //be_x_old @@ -202,7 +206,9 @@ export default [ 'වක්‍රෝත්ති', //si 'แก้ความกำกวม', //th 'သံတူကြောင်းကွဲ', //my + 'သဵင်မိူၼ် တူၼ်ႈထႅဝ်ပႅၵ်ႇ', 'ណែនាំ', //km + 'អសង្ស័យកម្ម', '동음이의', //ko '扤清楚', //gan '搞清楚', //zh_yue @@ -213,4 +219,6 @@ export default [ "sut'ichana qillqa", //qu // 'z', //vep // 'သဵင်မိူၼ် တူၼ်ႈထႅဝ်ပႅၵ်ႇ', //shn + `gestion dj'omònim`, + `sut'ichana qillqa`, ] From f7a1efe7e089026537c63132a1c4d2c4a4a4ec65 Mon Sep 17 00:00:00 2001 From: spencer kelly Date: Thu, 23 Nov 2023 09:13:26 -0500 Subject: [PATCH 05/16] more infoboxes --- src/_data/infoboxes.js | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/_data/infoboxes.js b/src/_data/infoboxes.js index 000cceba..3a8c5bb2 100644 --- a/src/_data/infoboxes.js +++ b/src/_data/infoboxes.js @@ -1,11 +1,13 @@ export default [ 'infobox', //en + 'amatl', 'anfo', //mwl 'anuāmapa', //haw 'bilgi kutusu', //tr 'bilgi', //tr 'bilgiquti', //uz + 'boaty fampahalalana', 'boaty', //mg 'boestkelaouiñ', //br 'bosca', //ga @@ -15,27 +17,42 @@ export default [ 'ficha', //es 'generalni', //hr 'gwybodlen3', //cy + 'hộp thông tin', 'info', //pt + 'infoboesse 2', 'infobokis', //tpi 'infoboks', //da + 'infobox deleted', + 'infobox generic', + 'infobox generiek', 'infochascha', //rm 'infokašćik', //dsb 'infokast', //et 'infokutija', //bs 'infolentelė', //lt + 'infookvir', 'infopolje', //sl 'informkesto', //eo + 'infoschede', 'infoskreine', //ltg 'infotaula', //eu 'inligtingskas', 'inligtingskas3', //af 'inligtingskas4', //af - 'kishtey', //gv + 'kishtey fys', + 'kotak info', 'kotak', //su + 'məlumat qutusu', + 'simple box', + 'tertcita tanxe', 'tertcita', //jbo + 'tiätuloová', 'tietolaatikko', //fi + 'wd bosca sonraí', 'yerleşim bilgi kutusu', + 'ynfoboks generyk', 'ynfoboks', //fy + 'πλαίσιο πληροφοριών', 'πλαίσιο', //el 'акарточка', //ab 'аҥа', //mhr @@ -52,18 +69,19 @@ export default [ 'қуттӣ', //tg 'ინფოდაფა', //ka 'տեղեկաքարտ', //hy - 'אינפאקעסטל', //yi 'תבנית', //he 'بطاقة', //ar 'ڄاڻخانو', //sd 'خانہ', //ur 'لغة', + 'معلوٗمات ڈَبہٕ', 'ज्ञानसन्दूक', //hi 'তথ্যছক', //bn 'ਜਾਣਕਾਰੀਡੱਬਾ', //pa 'సమాచారపెట్టె', //te 'තොරතුරුකොටුව', //si 'กล่องข้อมูล', //th + 'ກ່ອງຂໍ້ມູນ', 'ប្រអប់ព័ត៌មាន', //km '정보상자', //ko '明細', //zh_yue From f4c01747521b24611f2b2d636d09fdd66e2b14af Mon Sep 17 00:00:00 2001 From: spencer kelly Date: Sat, 23 Dec 2023 14:40:46 -0500 Subject: [PATCH 06/16] new metadata methods --- changelog.md | 7 ++++- scratch.js | 7 ++--- src/01-document/Document.js | 22 +++++++++++++++- src/_fetch/getResult.js | 14 ++++++---- src/_fetch/index.js | 9 +++---- src/_fetch/makeUrl.js | 20 ++++++++------- src/_fetch/parseDoc.js | 1 - tests/integration/metadata.test.js | 41 ++++++++++++++++++++++++++++++ 8 files changed, 96 insertions(+), 25 deletions(-) create mode 100644 tests/integration/metadata.test.js diff --git a/changelog.md b/changelog.md index 9fa9d313..b610dc8a 100644 --- a/changelog.md +++ b/changelog.md @@ -1,9 +1,14 @@ +#### 10.3.0 [Dec 2023] + +- **[new]** - fallbackTemplateFn handler #509 +- **[new]** - more i18n redirects and templates +- **[new]** - metadata methods `.revisionID()`, `.description()`, `.timestamp()`, `.pageImage()` + #### 10.2.1 [Nov 2023] - **[change]** - support more templates diff --git a/scratch.js b/scratch.js index 8d0d9231..601d367c 100644 --- a/scratch.js +++ b/scratch.js @@ -9,10 +9,11 @@ let str = ` str = ` Chapman {{Foo}} ` // str = `{{Refplease|date=November 2023|reason=Your explanation here}} in [[Jolgeh-ye Musaabad Rural District]],` -let doc = wtf(str) -// const doc = await wtf.fetch('Grand Bend') +// let doc = wtf(str) +const doc = await wtf.fetch('Grand Bend') +// console.log(doc.json()) // console.log(doc.template().json()) // console.log(doc.text()) -console.log(doc.references().map((r) => r.json())) +// console.log(doc.references().map((r) => r.json())) // console.log(doc.templates().map((r) => r.json())) diff --git a/src/01-document/Document.js b/src/01-document/Document.js index 358dc36a..4db443ce 100644 --- a/src/01-document/Document.js +++ b/src/01-document/Document.js @@ -46,9 +46,11 @@ class Document { sections: [], coordinates: [], // userAgent is used for successive calls to the API - userAgent: options.userAgent || options['User-Agent'] || options['Api-User-Agent'] || 'User of the wtf_wikipedia library', + userAgent: + options.userAgent || options['User-Agent'] || options['Api-User-Agent'] || 'User of the wtf_wikipedia library', templateFallbackFn: options.templateFallbackFn || null, revisionID: options.revisionID || null, + timestamp: options.timestamp || null, } // this._missing_templates = {} //for stats+debugging purposes @@ -515,6 +517,24 @@ class Document { } return this._revisionID || null } + timestamp(str) { + if (str !== undefined) { + this._timestamp = str + } + return this._timestamp || null + } + description(str) { + if (str !== undefined) { + this._description = str + } + return this._description || null + } + pageImage(str) { + if (str !== undefined) { + this._pageImage = str + } + return this._pageImage || null + } options() { return this._options diff --git a/src/_fetch/getResult.js b/src/_fetch/getResult.js index 49b314d8..02f88f11 100644 --- a/src/_fetch/getResult.js +++ b/src/_fetch/getResult.js @@ -10,17 +10,17 @@ */ const getResult = function (data, options = {}) { // handle nothing found or no data passed - if(!data?.query?.pages || !data?.query || !data){ + if (!data?.query?.pages || !data?.query || !data) { return null } - + //get all the pagesIds from the result let pages = Object.keys(data.query.pages) // map over the pageIds to parse out all the information return pages.map((id) => { // get the page by pageID - + let page = data.query.pages[id] || {} // if the page is missing or not found than return null @@ -34,6 +34,8 @@ const getResult = function (data, options = {}) { if (!text && page.revisions[0].slots) { text = page.revisions[0].slots.main['*'] } + let revisionID = page.revisions[0].revid + let timestamp = page.revisions[0].timestamp page.pageprops = page.pageprops || {} @@ -46,12 +48,14 @@ const getResult = function (data, options = {}) { title: page.title, pageID: page.pageid, namespace: page.ns, - domain: domain, + domain, + revisionID, + timestamp, + pageImage: page.pageprops['page_image_free'], wikidata: page.pageprops.wikibase_item, description: page.pageprops['wikibase-shortdesc'], }) - return { wiki: text, meta: meta } }) } diff --git a/src/_fetch/index.js b/src/_fetch/index.js index 2a8256e5..585c98d5 100644 --- a/src/_fetch/index.js +++ b/src/_fetch/index.js @@ -10,11 +10,11 @@ const isUrl = /^https?:\/\// /** * @typedef fetchDefaults * @property {string | undefined} [path] the path to the wiki api. default: api.php - * @property {string | undefined} [wiki] + * @property {string | undefined} [wiki] * @property {string | undefined} [domain] the domain of the wiki you want to query * @property {boolean | undefined} [follow_redirects] should the library follow redirects * @property {string | undefined} [lang] the language of the wiki - * @property {string | number | Array | Array | undefined} [title] + * @property {string | number | Array | Array | undefined} [title] * @property {string | undefined} [Api-User-Agent] the user agent of the application * @property {string | undefined} [origin] the domain or the origin of the request */ @@ -38,7 +38,7 @@ const defaults = { /** * fetches the page from the wiki and returns a Promise with the parsed wiki text - * + * * if you supply it with a single pageID or title it will return a Document object. * if you supply a wiki URL then we will parse it and use the tile and provide a single Document object * if you supply it with an array with pageIDs or an array of titles it will return an array of document objects. @@ -61,14 +61,13 @@ const fetch = function (title, options, callback) { if (typeof title === 'string' && isUrl.test(title)) { options = { ...options, ...parseUrl(title) } } - const url = makeUrl(options) const headers = makeHeaders(options) return unfetch(url, headers) .then((res) => res.json()) .then((res) => { - if (!res){ + if (!res) { throw new Error(`No JSON Data Found For ${url}`) } let data = getResult(res, options) diff --git a/src/_fetch/makeUrl.js b/src/_fetch/makeUrl.js index 1bf998b0..cad0f4df 100644 --- a/src/_fetch/makeUrl.js +++ b/src/_fetch/makeUrl.js @@ -1,11 +1,12 @@ import { isArray } from '../_lib/helpers.js' -const isInterWiki = /(wikibooks|wikidata|wikimedia|wikinews|wikipedia|wikiquote|wikisource|wikispecies|wikiversity|wikivoyage|wiktionary|foundation|meta)\.org/ +const isInterWiki = + /(wikibooks|wikidata|wikimedia|wikinews|wikipedia|wikiquote|wikisource|wikispecies|wikiversity|wikivoyage|wiktionary|foundation|meta)\.org/ const defaults = { action: 'query', prop: 'revisions|pageprops', // we use the 'revisions' api here, instead of the Raw api, for its CORS-rules.. - rvprop: 'content', + rvprop: 'content|ids|timestamp', maxlag: 5, rvslots: 'main', origin: '*', @@ -15,7 +16,7 @@ const defaults = { /** * turns a object into a query string - * + * * @private * @param {Object} obj * @returns {string} QueryString @@ -34,13 +35,12 @@ const toQueryString = function (obj) { * @returns {string} the cleaned title */ const cleanTitle = (page) => { - return page.replace(/ /g, '_') - .trim() + return page.replace(/ /g, '_').trim() } /** * generates the url for fetching the pages - * + * * @private * @param {import('.').fetchDefaults} options * @param {Object} [parameters] @@ -63,7 +63,6 @@ const makeUrl = function (options, parameters = defaults) { return '' } - if (!options.follow_redirects) { delete params.redirects } @@ -84,10 +83,13 @@ const makeUrl = function (options, parameters = defaults) { params.titles = cleanTitle(title) } else if (title !== undefined && isArray(title) && typeof title[0] === 'number') { //pageid array - params.pageids = title.filter(t => t).join('|') + params.pageids = title.filter((t) => t).join('|') } else if (title !== undefined && isArray(title) === true && typeof title[0] === 'string') { //title array - params.titles = title.filter(t => t).map(cleanTitle).join('|') + params.titles = title + .filter((t) => t) + .map(cleanTitle) + .join('|') } else { return '' } diff --git a/src/_fetch/parseDoc.js b/src/_fetch/parseDoc.js index eaac7567..b619ea4c 100644 --- a/src/_fetch/parseDoc.js +++ b/src/_fetch/parseDoc.js @@ -12,7 +12,6 @@ const parseDoc = function (res, title) { // filter out undefined res = res.filter((o) => o) - // put all the responses into Document formats let docs = res.map((o) => { return new Document(o.wiki, o.meta) diff --git a/tests/integration/metadata.test.js b/tests/integration/metadata.test.js new file mode 100644 index 00000000..6065af8a --- /dev/null +++ b/tests/integration/metadata.test.js @@ -0,0 +1,41 @@ +import test from 'tape' +import wtf from '../lib/index.js' + +test('null page metadata', (t) => { + let doc = wtf('oh yeah') + t.equal(doc.revisionID(), null) + t.equal(doc.pageID(), null) + t.equal(doc.description(), null) + t.equal(doc.timestamp(), null) + t.equal(doc.pageImage(), null) + + t.end() +}) + +test('found page metadata', (t) => { + let meta = { + lang: 'en', + wiki: 'wikipedia', + domain: 'wikipedia.org', + follow_redirects: true, + path: 'api.php', + title: 'Grand Bend', + pageID: 865444, + namespace: 0, + revisionID: 1174940601, + timestamp: '2023-09-11T18:28:45Z', + pageImage: 'Grand_Bend_2.JPG', + wikidata: 'Q1542518', + description: 'Place in Ontario, Canada', + } + let doc = wtf('oh yeah', meta) + t.equal(doc.revisionID(), meta.revisionID) + t.equal(doc.pageID(), meta.pageID) + t.equal(doc.description(), meta.description) + t.equal(doc.timestamp(), meta.timestamp) + t.equal(doc.pageImage(), meta.pageImage) + t.equal(doc.domain(), meta.domain) + t.equal(doc.wikidata(), meta.wikidata) + + t.end() +}) From f3151c912ec3f753dd6f485661dc95f4a56d4992 Mon Sep 17 00:00:00 2001 From: spencer kelly Date: Sat, 23 Dec 2023 15:03:18 -0500 Subject: [PATCH 07/16] tests and types for new methods --- scratch.js | 3 +- src/01-document/Document.js | 19 ++-- src/01-document/toJson.js | 7 ++ tests/fetch/fetch.test.js | 19 ++-- tests/integration/metadata.test.js | 18 ++++ tests/unit/getResult.test.js | 143 ++--------------------------- tests/unit/makeUrl.test.js | 90 +++++++++--------- types/index.d.ts | 6 +- 8 files changed, 108 insertions(+), 197 deletions(-) diff --git a/scratch.js b/scratch.js index 601d367c..5229e3be 100644 --- a/scratch.js +++ b/scratch.js @@ -10,8 +10,9 @@ str = ` Chapman {{Foo}} ` // str = `{{Refplease|date=November 2023|reason=Your explanation here}} in [[Jolgeh-ye Musaabad Rural District]],` // let doc = wtf(str) -const doc = await wtf.fetch('Grand Bend') +const doc = await wtf.fetch('Tony Hawk') // console.log(doc.json()) +console.log(doc.wikidata() + '|') // console.log(doc.template().json()) // console.log(doc.text()) diff --git a/src/01-document/Document.js b/src/01-document/Document.js index 4db443ce..856e6951 100644 --- a/src/01-document/Document.js +++ b/src/01-document/Document.js @@ -32,27 +32,28 @@ class Document { constructor(wiki, options) { options = options || {} this._options = options + let userAgent = options.userAgent || options['User-Agent'] || options['Api-User-Agent'] + userAgent = userAgent || 'User of the wtf_wikipedia library' let props = { - pageID: options.pageID || options.id || null, - namespace: options.namespace || options.ns || null, - lang: options.lang || options.language || null, - domain: options.domain || null, title: options.title || null, type: 'page', + userAgent, redirectTo: null, - wikidata: options.wikidata || null, wiki: wiki || '', categories: [], sections: [], coordinates: [], - // userAgent is used for successive calls to the API - userAgent: - options.userAgent || options['User-Agent'] || options['Api-User-Agent'] || 'User of the wtf_wikipedia library', templateFallbackFn: options.templateFallbackFn || null, revisionID: options.revisionID || null, timestamp: options.timestamp || null, + description: options.description || null, + wikidata: options.wikidata || null, + pageImage: options.pageImage || null, + pageID: options.pageID || options.id || null, + namespace: options.namespace || options.ns || null, + lang: options.lang || options.language || null, + domain: options.domain || null, } - // this._missing_templates = {} //for stats+debugging purposes Object.keys(props).forEach((k) => { Object.defineProperty(this, '_' + k, { diff --git a/src/01-document/toJson.js b/src/01-document/toJson.js index 523f2dce..b20e7e36 100644 --- a/src/01-document/toJson.js +++ b/src/01-document/toJson.js @@ -18,6 +18,13 @@ const defaults = { sections: true, pageID: true, categories: true, + wikidata: true, + revisionID: true, + description: true, + timestamp: false, + pageImage: false, + domain: false, + language: false, } /** diff --git a/tests/fetch/fetch.test.js b/tests/fetch/fetch.test.js index 5231bb5e..23f9a802 100644 --- a/tests/fetch/fetch.test.js +++ b/tests/fetch/fetch.test.js @@ -2,13 +2,20 @@ import test from 'tape' import wtf from '../lib/index.js' test('fetch-as-promise', (t) => { - t.plan(1) + t.plan(8) const p = wtf.fetch('Tony Hawk', { lang: 'en', 'Api-User-Agent': 'wtf_wikipedia test script - ', }) p.then(function (doc) { t.ok(doc.sections().length > 0, 'promise returned document') + t.equal(doc.language(), 'en') + t.equal(doc.title(), 'Tony Hawk') + t.equal(doc.pageID(), 87474) + t.equal(doc.wikidata(), 'Q295020') + t.notEqual(doc.revisionID(), null) + t.notEqual(doc.timestamp(), null) + t.notEqual(doc.description(), null) }) p.catch(function (e) { t.throw(e) @@ -112,11 +119,11 @@ test('intensive', (t) => { 'Porcupine', 'Chipmunk', 'Vole', - 'Chinchilla', - 'Gopher', - 'Capybara', - 'Beaver', - 'Hamster', + // 'Chinchilla', + // 'Gopher', + // 'Capybara', + // 'Beaver', + // 'Hamster', ] t.plan(pages.length) const promises = pages.map((page) => diff --git a/tests/integration/metadata.test.js b/tests/integration/metadata.test.js index 6065af8a..1d95e939 100644 --- a/tests/integration/metadata.test.js +++ b/tests/integration/metadata.test.js @@ -36,6 +36,24 @@ test('found page metadata', (t) => { t.equal(doc.pageImage(), meta.pageImage) t.equal(doc.domain(), meta.domain) t.equal(doc.wikidata(), meta.wikidata) + t.equal(doc.language(), meta.lang) + + doc.revisionID('foo') + doc.pageID('foo') + doc.description('foo') + doc.timestamp('foo') + doc.pageImage('foo') + doc.domain('foo') + doc.wikidata('foo') + doc.language('foo') + t.notEqual(doc.revisionID(), meta.revisionID) + t.notEqual(doc.pageID(), meta.pageID) + t.notEqual(doc.description(), meta.description) + t.notEqual(doc.timestamp(), meta.timestamp) + t.notEqual(doc.pageImage(), meta.pageImage) + t.notEqual(doc.domain(), meta.domain) + t.notEqual(doc.wikidata(), meta.wikidata) + t.notEqual(doc.language(), meta.wikidata) t.end() }) diff --git a/tests/unit/getResult.test.js b/tests/unit/getResult.test.js index 59c7bba3..d7cee5a9 100644 --- a/tests/unit/getResult.test.js +++ b/tests/unit/getResult.test.js @@ -1,147 +1,24 @@ import getResult from '../../src/_fetch/getResult.js' import test from 'tape' -test('parse a normal case', (t) => { - const options = { - "domain": "liquipedia.net", - "path": "counterstrike/api.php", - "follow_redirects": true, - "Api-User-Agent": "wtf_wikipedia test script - ", - "title": "Team_Liquid" - } - - const response = { - "warnings": { - "main": { - "*": "Unrecognized parameter: rvslots." - } - }, - "batchcomplete": "", - "query": { - "normalized": [ - { - "from": "Team_Liquid", - "to": "Team Liquid" - } - ], - "pages": { - "19571": { - "pageid": 19571, - "ns": 0, - "title": "Team Liquid", - "revisions": [ - { - "contentformat": "text/x-wiki", - "contentmodel": "wikitext", - "*": "test" - } - ], - "pageprops": { - "displaytitle": "Team Liquid", - "metaimage": "Team Liquid 2020.png", - "metaimageurl": "https://liquipedia.net/commons/images/7/7e/Team_Liquid_2020.png", - } - } - } - } - } - - const expected = [ - { - wiki: "test", - meta: { - "domain": "liquipedia.net", - "path": "counterstrike/api.php", - "follow_redirects": true, - "Api-User-Agent": "wtf_wikipedia test script - ", - "title": "Team Liquid", - pageID: 19571, - namespace: 0, - wikidata: undefined, - description: undefined, - } - } - ] - - const result = getResult(response, options) - t.deepEqual(expected, result) - t.end() -}) - -test('parse a normal case from wikimedia', (t) => { +test('parse a not found case', (t) => { const options = { - "lang": "it", - "wiki": "wiktionary", - "follow_redirects": true, - "path": "api.php", - "title": "casa" + lang: 'en', + wiki: 'wikipedia', + follow_redirects: true, + path: 'api.php', + 'Api-User-Agent': 'wtf_wikipedia test script - ', + title: '165111651dfasfasdfsadfas', } const response = { - "batchcomplete": "", - "query": { - "pages": { - "742": { - "pageid": 742, - "ns": 0, - "title": "casa", - "revisions": [ - { - "slots": { - "main": { - "contentmodel": "wikitext", - "contentformat": "text/x-wiki", - "*": "Italian wiktionary" - } - } - } - ], - "pageprops": { - "page_image_free": "RybnoeDistrict_06-13_Konstantinovo_village_05.jpg" - } - } - } - } + batchcomplete: '', + query: { pages: { '-1': { ns: 0, title: '165111651dfasfasdfsadfas', missing: '' } } }, } - const expected = [ - { - wiki: 'Italian wiktionary', - meta: { - lang: 'it', - wiki: 'wiktionary', - follow_redirects: true, - path: 'api.php', - title: 'casa', - pageID: 742, - namespace: 0, - domain: 'wiktionary.org', - wikidata: undefined, - description: undefined - } - } - ] - - const result = getResult(response, options) - t.deepEqual(expected, result) - t.end() -}) - -test('parse a not found case', (t) => { - const options = { - "lang": "en", - "wiki": "wikipedia", - "follow_redirects": true, - "path": "api.php", - "Api-User-Agent": "wtf_wikipedia test script - ", - "title": "165111651dfasfasdfsadfas" - } - - const response = { "batchcomplete": "", "query": { "pages": { "-1": { "ns": 0, "title": "165111651dfasfasdfsadfas", "missing": "" } } } } - const expected = [null] const result = getResult(response, options) t.deepEqual(expected, result) t.end() -}) \ No newline at end of file +}) diff --git a/tests/unit/makeUrl.test.js b/tests/unit/makeUrl.test.js index afb0836f..5265ace4 100644 --- a/tests/unit/makeUrl.test.js +++ b/tests/unit/makeUrl.test.js @@ -1,87 +1,83 @@ import test from 'tape' import makeUrl from '../../src/_fetch/makeUrl.js' -//makeUrl +//makeUrl const tests = [ { options: { - "lang": "en", - "wiki": "wikipedia", - "follow_redirects": true, - "path": "api.php", - "Api-User-Agent": "wtf_wikipedia test script - ", - "title": [ - "Marina Gilardoni", - "Jessica Kilian", - "Tanja Morel" - ] + lang: 'en', + wiki: 'wikipedia', + follow_redirects: true, + path: 'api.php', + 'Api-User-Agent': 'wtf_wikipedia test script - ', + title: ['Marina Gilardoni', 'Jessica Kilian', 'Tanja Morel'], }, - url: 'https://en.wikipedia.org/w/api.php?action=query&prop=revisions%7Cpageprops&rvprop=content&maxlag=5&rvslots=main&origin=*&format=json&redirects=true&titles=Marina_Gilardoni%7CJessica_Kilian%7CTanja_Morel' + url: 'https://en.wikipedia.org/w/api.php?action=query&prop=revisions%7Cpageprops&rvprop=content%7Cids%7Ctimestamp&maxlag=5&rvslots=main&origin=*&format=json&redirects=true&titles=Marina_Gilardoni%7CJessica_Kilian%7CTanja_Morel', }, { options: { - "lang": "it", - "wiki": "wiktionary", - "follow_redirects": true, - "path": "api.php", - "title": "casa" + lang: 'it', + wiki: 'wiktionary', + follow_redirects: true, + path: 'api.php', + title: 'casa', }, - url: 'https://it.wiktionary.org/w/api.php?action=query&prop=revisions%7Cpageprops&rvprop=content&maxlag=5&rvslots=main&origin=*&format=json&redirects=true&titles=casa' + url: 'https://it.wiktionary.org/w/api.php?action=query&prop=revisions%7Cpageprops&rvprop=content%7Cids%7Ctimestamp&maxlag=5&rvslots=main&origin=*&format=json&redirects=true&titles=casa', }, { options: { - "lang": "nl", - "wiki": "wikipedia", - "follow_redirects": false, - "Api-User-Agent": "wtf_wikipedia test script - ", - "title": 5321546 + lang: 'nl', + wiki: 'wikipedia', + follow_redirects: false, + 'Api-User-Agent': 'wtf_wikipedia test script - ', + title: 5321546, }, - url: 'https://nl.wikipedia.org/w/api.php?action=query&prop=revisions%7Cpageprops&rvprop=content&maxlag=5&rvslots=main&origin=*&format=json&pageids=5321546' + url: 'https://nl.wikipedia.org/w/api.php?action=query&prop=revisions%7Cpageprops&rvprop=content%7Cids%7Ctimestamp&maxlag=5&rvslots=main&origin=*&format=json&pageids=5321546', }, { options: { - "lang": "en", - "wiki": "wikipedia", - "follow_redirects": true, - "Api-User-Agent": "wtf_wikipedia test script - ", - "title": [145422, 3120522] + lang: 'en', + wiki: 'wikipedia', + follow_redirects: true, + 'Api-User-Agent': 'wtf_wikipedia test script - ', + title: [145422, 3120522], }, - url: 'https://en.wikipedia.org/w/api.php?action=query&prop=revisions%7Cpageprops&rvprop=content&maxlag=5&rvslots=main&origin=*&format=json&redirects=true&pageids=145422%7C3120522' + url: 'https://en.wikipedia.org/w/api.php?action=query&prop=revisions%7Cpageprops&rvprop=content%7Cids%7Ctimestamp&maxlag=5&rvslots=main&origin=*&format=json&redirects=true&pageids=145422%7C3120522', }, { options: { - "domain": "liquipedia.net", - "path": "counterstrike/api.php", - "follow_redirects": true, - "Api-User-Agent": "wtf_wikipedia test script - ", - "title": "Team_Liquid" + domain: 'liquipedia.net', + path: 'counterstrike/api.php', + follow_redirects: true, + 'Api-User-Agent': 'wtf_wikipedia test script - ', + title: 'Team_Liquid', }, - url: 'https://liquipedia.net/counterstrike/api.php?action=query&prop=revisions%7Cpageprops&rvprop=content&maxlag=5&rvslots=main&origin=*&format=json&redirects=true&titles=Team_Liquid' + url: 'https://liquipedia.net/counterstrike/api.php?action=query&prop=revisions%7Cpageprops&rvprop=content%7Cids%7Ctimestamp&maxlag=5&rvslots=main&origin=*&format=json&redirects=true&titles=Team_Liquid', }, { options: { - "domain": "en.wikipedia.org", - "follow_redirects": true, - "Api-User-Agent": "wtf_wikipedia test script - ", - "title": [145422, 3120522] + domain: 'en.wikipedia.org', + follow_redirects: true, + 'Api-User-Agent': 'wtf_wikipedia test script - ', + title: [145422, 3120522], }, - url: 'https://en.wikipedia.org/w/api.php?action=query&prop=revisions%7Cpageprops&rvprop=content&maxlag=5&rvslots=main&origin=*&format=json&redirects=true&pageids=145422%7C3120522' + url: 'https://en.wikipedia.org/w/api.php?action=query&prop=revisions%7Cpageprops&rvprop=content%7Cids%7Ctimestamp&maxlag=5&rvslots=main&origin=*&format=json&redirects=true&pageids=145422%7C3120522', }, { options: {}, - url: '' + url: '', }, { options: { - "domain": "en.wikipedia.org", - "follow_redirects": true, - "Api-User-Agent": "wtf_wikipedia test script - " + domain: 'en.wikipedia.org', + follow_redirects: true, + 'Api-User-Agent': 'wtf_wikipedia test script - ', }, - url: '' + url: '', }, ] -tests.forEach(testCase => { +tests.forEach((testCase) => { test('make the correct url', (t) => { const result = makeUrl(testCase.options) t.equal(testCase.url, result) diff --git a/types/index.d.ts b/types/index.d.ts index 02376010..e9250eda 100644 --- a/types/index.d.ts +++ b/types/index.d.ts @@ -47,7 +47,11 @@ declare class Document { url(): string | null wikidata(id?: string): string | null wikitext(): string - revisionID(): string | null + revisionID(id?: number): number | null + description(desc?: string): string | null + timestamp(iso?: string): string | null + pageImage(img?: string): string | null + domain(domain?: string): string | null } declare class Section { From 886ffa99377336d9e88b7871d7783385295b52c7 Mon Sep 17 00:00:00 2001 From: spencer kelly Date: Sat, 23 Dec 2023 15:12:42 -0500 Subject: [PATCH 08/16] fix af wiki template --- scratch.js | 8 ++++++-- src/template/custom/text-only/hardcoded.js | 8 +++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/scratch.js b/scratch.js index 5229e3be..53ad8f42 100644 --- a/scratch.js +++ b/scratch.js @@ -9,10 +9,14 @@ let str = ` str = ` Chapman {{Foo}} ` // str = `{{Refplease|date=November 2023|reason=Your explanation here}} in [[Jolgeh-ye Musaabad Rural District]],` +wtf.extend((models, templates) => { + templates.egiptekas = '{|-\n' +}) // let doc = wtf(str) -const doc = await wtf.fetch('Tony Hawk') +const doc = await wtf.fetch('3de Dinastie van Egipte', 'af') // console.log(doc.json()) -console.log(doc.wikidata() + '|') +console.log(doc.text()) +// console.log(doc.wikidata() + '|') // console.log(doc.template().json()) // console.log(doc.text()) diff --git a/src/template/custom/text-only/hardcoded.js b/src/template/custom/text-only/hardcoded.js index 4c8f01ef..b36179de 100644 --- a/src/template/custom/text-only/hardcoded.js +++ b/src/template/custom/text-only/hardcoded.js @@ -24,7 +24,7 @@ export default { '&': '&', ';': ';', ampersand: '&', - 'dagger': '†', + dagger: '†', 'double-dagger': '‡', snds: ' – ', snd: ' – ', @@ -58,6 +58,8 @@ export default { checked: '✔️', 'thumbs up': '👍', 'thumbs down': '👎', - 'minusplus': '∓', - 'plusminus': '±' + minusplus: '∓', + plusminus: '±', + + egiptekas: '{|-\n', } From ab478736a6085940935f23dbdf3807a4ab37a478 Mon Sep 17 00:00:00 2001 From: spencer kelly Date: Sun, 24 Dec 2023 12:49:03 -0500 Subject: [PATCH 09/16] isstubm, start debug template --- README.md | 16 +++++++----- plugins/debug/README.md | 23 +++++++++++++++++ plugins/debug/package.json | 51 +++++++++++++++++++++++++++++++++++++ plugins/debug/scratch.js | 7 +++++ plugins/debug/src/index.js | 27 ++++++++++++++++++++ scratch.js | 9 +++---- src/01-document/Document.js | 9 +++++++ src/01-document/isStub.js | 6 +++++ types/index.d.ts | 1 + 9 files changed, 136 insertions(+), 13 deletions(-) create mode 100644 plugins/debug/README.md create mode 100644 plugins/debug/package.json create mode 100644 plugins/debug/scratch.js create mode 100644 plugins/debug/src/index.js create mode 100644 src/01-document/isStub.js diff --git a/README.md b/README.md index 07fc058a..17024327 100644 --- a/README.md +++ b/README.md @@ -138,7 +138,7 @@ run it on the client-side: ```html