Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

I think the script doesn't work with a 23k urls sitemap #48

Open
CamTosh opened this issue Mar 30, 2024 · 0 comments
Open

I think the script doesn't work with a 23k urls sitemap #48

CamTosh opened this issue Mar 30, 2024 · 0 comments

Comments

@CamTosh
Copy link

CamTosh commented Mar 30, 2024

Hey,
The script is working fine on 4 of my websites (with less than 500 urls on sitemap each). When I try with a website who have 23k urls I have this error

As the other website the service account was correctly added with the same permissions as the others website. In fact it was working before when I was 1k urls.

If you have an idea tell me, thanks!

 gis mywebsite.app
🔎 Processing site: sc-domain:mywebsite.app
👉 Found 2732 URLs in 2 sitemap

/Users/user1/.nvm/versions/node/v18.17.1/lib/node_modules/google-indexing-script/node_modules/sitemapper/lib/assets/sitemapper.js:1
"use strict";Object.defineProperty(exports,"__esModule",{value:!0}),exports.default=void 0;var _xml2js=require("xml2js"),_got=_interopRequireDefault(require("got")),_zlib=_interopRequireDefault(require("zlib")),_pLimit=_interopRequireDefault(require("p-limit")),_isGzip=_interopRequireDefault(require("is-gzip"));function _interopRequireDefault(a){return a&&a.__esModule?a:{default:a}}function asyncGeneratorStep(a,b,c,d,e,f,g){try{var h=a[f](g),i=h.value}catch(a){return void c(a)}h.done?b(i):Promise.resolve(i).then(d,e)}function _asyncToGenerator(a){return function(){var b=this,c=arguments;return new Promise(function(d,e){function f(a){asyncGeneratorStep(h,d,e,f,g,"next",a)}function g(a){asyncGeneratorStep(h,d,e,f,g,"throw",a)}var h=a.apply(b,c);f(void 0)})}}class Sitemapper{constructor(a){var b=a||{requestHeaders:{}};this.url=b.url,this.timeout=b.timeout||15e3,this.timeoutTable={},this.lastmod=b.lastmod||0,this.requestHeaders=b.requestHeaders,this.debug=b.debug,this.concurrency=b.concurrency||10,this.retries=b.retries||0,this.rejectUnauthorized=!1!==b.rejectUnauthorized}fetch(){var a=arguments,b=this;return _asyncToGenerator(function*(){var c=0<a.length&&a[0]!==void 0?a[0]:b.url,d={url:"",sites:[],errors:[]};b.debug&&b.lastmod&&console.debug("Using minimum lastmod value of ".concat(b.lastmod));try{d=yield b.crawl(c)}catch(a){b.debug&&console.error(a)}return{url:c,sites:d.sites||[],errors:d.errors||[]}})()}static get timeout(){return this.timeout}static set timeout(a){this.timeout=a}static get lastmod(){return this.lastmod}static set lastmod(a){this.lastmod=a}static set url(a){this.url=a}static get url(){return this.url}static set debug(a){this.debug=a}static get debug(){return this.debug}parse(){var a=arguments,b=this;return _asyncToGenerator(function*(){var c=0<a.length&&a[0]!==void 0?a[0]:b.url,d={method:"GET",resolveWithFullResponse:!0,gzip:!0,responseType:"buffer",headers:b.requestHeaders,https:{rejectUnauthorized:b.rejectUnauthorized}};try{var e=_got.default.get(c,d);b.initializeTimeout(c,e);var f=yield e;if(!f||200!==f.statusCode)return clearTimeout(b.timeoutTable[c]),{error:f.error,data:f};var g=(0,_isGzip.default)(f.rawBody)?yield b.decompressResponseBody(f.body):f.body;var h=yield(0,_xml2js.parseStringPromise)(g);return{error:null,data:h}}catch(a){return"CancelError"===a.name?{error:"Request timed out after ".concat(b.timeout," milliseconds for url: '").concat(c,"'"),data:a}:"HTTPError"===a.name?{error:"HTTP Error occurred: ".concat(a.message),data:a}:{error:"Error occurred: ".concat(a.name),data:a}}})()}initializeTimeout(a,b){this.timeoutTable[a]=setTimeout(()=>b.cancel(),this.timeout)}crawl(a){var b=arguments,c=this;return _asyncToGenerator(function*(){var d=1<b.length&&b[1]!==void 0?b[1]:0;try{var{error:k,data:l}=yield c.parse(a);if(clearTimeout(c.timeoutTable[a]),k)return d<c.retries?(c.debug&&console.log("(Retry attempt: ".concat(d+1," / ").concat(c.retries,") ").concat(a," due to ").concat(l.name," on previous request")),c.crawl(a,d+1)):(c.debug&&console.error("Error occurred during \"crawl('".concat(a,"')\":\n\r Error: ").concat(k)),{sites:[],errors:[{type:l.name,message:k,url:a,retries:d}]});if(l&&l.urlset&&l.urlset.url){c.debug&&console.debug("Urlset found during \"crawl('".concat(a,"')\""));var m=l.urlset.url.filter(a=>{if(0===c.lastmod)return!0;if(void 0===a.lastmod)return!1;var b=new Date(a.lastmod[0]).getTime();return b>=c.lastmod}).map(a=>a.loc&&a.loc[0]);return{sites:m,errors:[]}}if(l&&l.sitemapindex){c.debug&&console.debug("Additional sitemap found during \"crawl('".concat(a,"')\""));var e=l.sitemapindex.sitemap.map(a=>a.loc&&a.loc[0]),f=(0,_pLimit.default)(c.concurrency),g=e.map(a=>f(()=>c.crawl(a))),h=yield Promise.all(g),i=h.filter(a=>0===a.errors.length).reduce((a,b)=>{var{sites:c}=b;return[...a,...c]},[]),j=h.filter(a=>0!==a.errors.length).reduce((a,b)=>{var{errors:c}=b;return[...a,...c]},[]);return{sites:i,errors:j}}return d<c.retries?(c.debug&&console.log("(Retry attempt: ".concat(d+1," / ").concat(c.retries,") ").concat(a," due to ").concat(l.name," on previous request")),c.crawl(a,d+1)):(c.debug&&console.error("Unknown state during \"crawl('".concat(a,")'\":"),k,l),{sites:[],errors:[{url:a,type:l.name||"UnknownStateError",message:"An unknown error occurred.",retries:d}]})}catch(a){c.debug&&c.debug&&console.error(a)}})()}getSites(){var a=arguments,b=this;return _asyncToGenerator(function*(){var c=0<a.length&&a[0]!==void 0?a[0]:b.url,d=1<a.length?a[1]:void 0;console.warn("\r\nWarning:","function .getSites() is deprecated, please use the function .fetch()\r\n");var e={},f=[];try{var g=yield b.fetch(c);f=g.sites}catch(a){e=a}return d(e,f)})()}decompressResponseBody(a){return new Promise((b,c)=>{var d=Buffer.from(a);_zlib.default.gunzip(d,(a,d)=>{a?c(a):b(d)})})}}exports.default=Sitemapper,module.exports=exports.default,module.exports.default=exports.default;


TypeError: b.cancel is not a function
    at Timeout._onTimeout (/Users/user1/.nvm/versions/node/v18.17.1/lib/node_modules/google-indexing-script/node_modules/sitemapper/lib/assets/sitemapper.js:1:2619)
    at listOnTimeout (node:internal/timers:569:17)
    at process.processTimers (node:internal/timers:512:7)

Node.js v18.17.1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant