From bce0974e1851515cf440d181332dfef02018eb13 Mon Sep 17 00:00:00 2001 From: Terry Lewis Date: Fri, 24 Dec 2021 09:23:12 +1100 Subject: [PATCH] fix: Use is-gzip to determine if response is gzipped (#93) --- lib/assets/sitemapper.js | 2 +- package-lock.json | 5 +++++ package.json | 1 + src/assets/sitemapper.js | 19 +++---------------- 4 files changed, 10 insertions(+), 17 deletions(-) diff --git a/lib/assets/sitemapper.js b/lib/assets/sitemapper.js index 27405d4..fdb12da 100644 --- a/lib/assets/sitemapper.js +++ b/lib/assets/sitemapper.js @@ -1,2 +1,2 @@ -"use strict";Object.defineProperty(exports,"__esModule",{value:!0}),exports.default=void 0;var _xml2js=require("xml2js"),_got=_interopRequireDefault(require("got")),_zlib=_interopRequireDefault(require("zlib")),_url=_interopRequireDefault(require("url")),_path=_interopRequireDefault(require("path")),_pLimit=_interopRequireDefault(require("p-limit"));function _interopRequireDefault(a){return a&&a.__esModule?a:{default:a}}function asyncGeneratorStep(a,b,c,d,e,f,g){try{var h=a[f](g),i=h.value}catch(a){return void c(a)}h.done?b(i):Promise.resolve(i).then(d,e)}function _asyncToGenerator(a){return function(){var b=this,c=arguments;return new Promise(function(d,e){function f(a){asyncGeneratorStep(h,d,e,f,g,"next",a)}function g(a){asyncGeneratorStep(h,d,e,f,g,"throw",a)}var h=a.apply(b,c);f(void 0)})}}class Sitemapper{constructor(a){var b=a||{requestHeaders:{}};this.url=b.url,this.timeout=b.timeout||15e3,this.timeoutTable={},this.requestHeaders=b.requestHeaders,this.debug=b.debug,this.concurrency=b.concurrency||10,this.retries=b.retries||0}fetch(){var a=arguments,b=this;return _asyncToGenerator(function*(){var c=0b.cancel(),this.timeout)}crawl(a){var b=arguments,c=this;return _asyncToGenerator(function*(){var d=1a.loc&&a.loc[0]);return{sites:m,errors:[]}}if(l&&l.sitemapindex){c.debug&&console.debug("Additional sitemap found during \"crawl('".concat(a,"')\""));var e=l.sitemapindex.sitemap.map(a=>a.loc&&a.loc[0]),f=(0,_pLimit.default)(c.concurrency),g=e.map(a=>f(()=>c.crawl(a))),h=yield Promise.all(g),i=h.filter(a=>0===a.errors.length).reduce((a,b)=>{var{sites:c}=b;return[...a,...c]},[]),j=h.filter(a=>0!==a.errors.length).reduce((a,b)=>{var{errors:c}=b;return[...a,...c]},[]);return{sites:i,errors:j}}return d{var d=Buffer.from(a);_zlib.default.gunzip(d,(a,d)=>{a?c(a):b(d)})})}}exports.default=Sitemapper,module.exports=exports.default,module.exports.default=exports.default; +"use strict";Object.defineProperty(exports,"__esModule",{value:!0}),exports.default=void 0;var _xml2js=require("xml2js"),_got=_interopRequireDefault(require("got")),_zlib=_interopRequireDefault(require("zlib")),_pLimit=_interopRequireDefault(require("p-limit")),_isGzip=_interopRequireDefault(require("is-gzip"));function _interopRequireDefault(a){return a&&a.__esModule?a:{default:a}}function asyncGeneratorStep(a,b,c,d,e,f,g){try{var h=a[f](g),i=h.value}catch(a){return void c(a)}h.done?b(i):Promise.resolve(i).then(d,e)}function _asyncToGenerator(a){return function(){var b=this,c=arguments;return new Promise(function(d,e){function f(a){asyncGeneratorStep(h,d,e,f,g,"next",a)}function g(a){asyncGeneratorStep(h,d,e,f,g,"throw",a)}var h=a.apply(b,c);f(void 0)})}}class Sitemapper{constructor(a){var b=a||{requestHeaders:{}};this.url=b.url,this.timeout=b.timeout||15e3,this.timeoutTable={},this.requestHeaders=b.requestHeaders,this.debug=b.debug,this.concurrency=b.concurrency||10,this.retries=b.retries||0}fetch(){var a=arguments,b=this;return _asyncToGenerator(function*(){var c=0b.cancel(),this.timeout)}crawl(a){var b=arguments,c=this;return _asyncToGenerator(function*(){var d=1a.loc&&a.loc[0]);return{sites:m,errors:[]}}if(l&&l.sitemapindex){c.debug&&console.debug("Additional sitemap found during \"crawl('".concat(a,"')\""));var e=l.sitemapindex.sitemap.map(a=>a.loc&&a.loc[0]),f=(0,_pLimit.default)(c.concurrency),g=e.map(a=>f(()=>c.crawl(a))),h=yield Promise.all(g),i=h.filter(a=>0===a.errors.length).reduce((a,b)=>{var{sites:c}=b;return[...a,...c]},[]),j=h.filter(a=>0!==a.errors.length).reduce((a,b)=>{var{errors:c}=b;return[...a,...c]},[]);return{sites:i,errors:j}}return d{var d=Buffer.from(a);_zlib.default.gunzip(d,(a,d)=>{a?c(a):b(d)})})}}exports.default=Sitemapper,module.exports=exports.default,module.exports.default=exports.default; //# sourceMappingURL=sitemapper.js.map \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index 89c6737..3afa6b0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5073,6 +5073,11 @@ "is-extglob": "^2.1.1" } }, + "is-gzip": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/is-gzip/-/is-gzip-2.0.0.tgz", + "integrity": "sha512-jtO4Njg6q58zDo/Pu4027beSZ0VdsZlt8/5Moco6yAg+DIxb5BK/xUYqYG2+MD4+piKldXJNHxRkhEYI2fvrxA==" + }, "is-hexadecimal": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/is-hexadecimal/-/is-hexadecimal-1.0.4.tgz", diff --git a/package.json b/package.json index 6229e28..e62c39d 100644 --- a/package.json +++ b/package.json @@ -78,6 +78,7 @@ }, "dependencies": { "got": "^11.8.0", + "is-gzip": "2.0.0", "p-limit": "^3.1.0", "xml2js": "^0.4.23" } diff --git a/src/assets/sitemapper.js b/src/assets/sitemapper.js index caa7b6a..cf1cb7e 100644 --- a/src/assets/sitemapper.js +++ b/src/assets/sitemapper.js @@ -9,9 +9,8 @@ import { parseStringPromise } from 'xml2js'; import got from 'got'; import zlib from 'zlib'; -import Url from 'url'; -import path from 'path'; import pLimit from 'p-limit'; +import isGzip from 'is-gzip'; /** * @typedef {Object} Sitemapper @@ -154,7 +153,7 @@ export default class Sitemapper { try { // create a request Promise with the url and request options - const requester = got(url, requestOptions); + const requester = got.get(url, requestOptions); // initialize the timeout method based on the URL, and pass the request object. this.initializeTimeout(url, requester); @@ -170,7 +169,7 @@ export default class Sitemapper { let responseBody; - if (this.isGzip(url)) { + if (isGzip(response.rawBody)) { responseBody = await this.decompressResponseBody(response.body); } else { responseBody = response.body; @@ -341,18 +340,6 @@ export default class Sitemapper { return callback(err, sites); } - /** - * Check to see if the url is a gzipped url - * - * @param {string} url - url to query - * @returns {Boolean} - */ - isGzip(url) { - const parsed = Url.parse(url); - const ext = path.extname(parsed.path); - return ext === '.gz'; - } - /** * Decompress the gzipped response body using zlib.gunzip *