Skip to content

Commit

Permalink
feat: support gzip sitemaps (#73)
Browse files Browse the repository at this point in the history
* feat: support gzip sitemaps

* chore: use async gzip method

* refactor: simplify code and change method name

* chore: cleanup

* chore: cleanup
  • Loading branch information
jasonaibrahim authored Jan 11, 2021
1 parent 43ce56a commit e5b647e
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 9 deletions.
2 changes: 1 addition & 1 deletion lib/assets/sitemapper.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

47 changes: 39 additions & 8 deletions src/assets/sitemapper.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@

import { parseStringPromise } from 'xml2js';
import got from 'got';
import zlib from 'zlib';
import Url from 'url';
import path from 'path';

/**
* @typedef {Object} Sitemapper
Expand All @@ -26,7 +29,7 @@ export default class Sitemapper {
* });
*/
constructor(options) {
const settings = options || {'requestHeaders': {}};
const settings = options || { 'requestHeaders': {} };
this.url = settings.url;
this.timeout = settings.timeout || 15000;
this.timeoutTable = {};
Expand Down Expand Up @@ -58,7 +61,7 @@ export default class Sitemapper {
return {
url,
sites,
}
};
}

/**
Expand Down Expand Up @@ -131,6 +134,7 @@ export default class Sitemapper {
method: 'GET',
resolveWithFullResponse: true,
gzip: true,
responseType: 'buffer',
headers: this.requestHeaders,
};

Expand All @@ -150,25 +154,33 @@ export default class Sitemapper {
return { error: response.error, data: response };
}

let responseBody;

if (this.isGzip(url)) {
responseBody = await this.decompressResponseBody(response.body);
} else {
responseBody = response.body;
}

// otherwise parse the XML that was returned.
const data = await parseStringPromise(response.body);
const data = await parseStringPromise(responseBody);

// return the results
return { error: null, data }
return { error: null, data };
} catch (error) {
// If the request was canceled notify the user of the timeout
if (error.name === 'CancelError') {
return {
error: `Request timed out after ${this.timeout} milliseconds for url: '${url}'`,
data: error
}
};
}

// Otherwise notify of another error
return {
error: error.error,
data: error
}
};
}
}

Expand Down Expand Up @@ -236,7 +248,7 @@ export default class Sitemapper {
return [];
} catch (e) {
if (this.debug) {
this.debug &&console.error(e);
this.debug && console.error(e);
}
}
}
Expand All @@ -249,7 +261,7 @@ export default class Sitemapper {
* @param {string} url - url to query
* @param {getSitesCallback} callback - callback for sites and error
* @callback
*/
*/
async getSites(url = this.url, callback) {
console.warn( // eslint-disable-line no-console
'\r\nWarning:', 'function .getSites() is deprecated, please use the function .fetch()\r\n'
Expand All @@ -265,6 +277,25 @@ export default class Sitemapper {
}
return callback(err, sites);
}

isGzip(url) {
const parsed = Url.parse(url);
const ext = path.extname(parsed.path);
return ext === '.gz';
}

decompressResponseBody(body) {
return new Promise((resolve, reject) => {
const buffer = Buffer.from(body);
zlib.gunzip(buffer, function (err, result) {
if (err) {
reject(err);
} else {
resolve(result);
}
});
});
}
}

/**
Expand Down
17 changes: 17 additions & 0 deletions src/tests/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import 'should';
import isUrl from 'is-url';

import Sitemapper from '../../lib/assets/sitemapper.js';

let sitemapper;

describe('Sitemapper', function () {
Expand Down Expand Up @@ -136,6 +137,22 @@ describe('Sitemapper', function () {
done(error);
});
});

it('https://www.banggood.com/sitemap/products-Toys-Hobbies-and-Robot-5-hu-HU.xml.gz gzip should be a non-empty array', function (done) {
this.timeout(30000);
const url = 'https://www.banggood.com/sitemap/products-Toys-Hobbies-and-Robot-5-hu-HU.xml.gz';
sitemapper.timeout = 10000;
sitemapper.fetch(url)
.then(data => {
data.sites.should.be.Array;
data.sites.length.should.be.greaterThan(0);
done();
})
.catch(error => {
console.error('Test failed');
done(error);
});
});
});

describe('getSites method', function () {
Expand Down

0 comments on commit e5b647e

Please sign in to comment.