Skip to content

Commit

Permalink
Async await (#70)
Browse files Browse the repository at this point in the history
* Moving promises to asyc/await

* Cleaning up file

* Edit examples

* fix up

Co-authored-by: Sean Thomas Burke <[email protected]>
  • Loading branch information
seantomburke and seantomburke authored Nov 25, 2020
1 parent e96d14d commit 6104d68
Show file tree
Hide file tree
Showing 7 changed files with 154 additions and 106 deletions.
2 changes: 1 addition & 1 deletion .eslintrc.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
module.exports = {
extends: 'eslint:recommended',
parserOptions: {
ecmaVersion: 6,
ecmaVersion: 8,
sourceType: 'module',
ecmaFeatures: {},
},
Expand Down
21 changes: 12 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,22 @@ sitemap.fetch('https://wp.seantburke.com/sitemap.xml').then(function(sites) {
```javascript
import Sitemapper from 'sitemapper';

const Google = new Sitemapper({
url: 'https://www.google.com/work/sitemap.xml',
timeout: 15000, // 15 seconds
});

Google.fetch()
.then(data => console.log(data.sites))
.catch(error => console.log(error));
(async () => {
const Google = new Sitemapper({
url: 'https://www.google.com/work/sitemap.xml',
timeout: 15000, // 15 seconds
});

try {
const { sites } = await Google.fetch();
console.log(sites);
catch (error) {
console.log(error);
}
})();

// or


const sitemapper = new Sitemapper();
sitemapper.timeout = 5000;

Expand Down
52 changes: 33 additions & 19 deletions example.es6.js
Original file line number Diff line number Diff line change
@@ -1,27 +1,41 @@
import Sitemapper from 'sitemapper';

const sitemapper = new Sitemapper();
(async () => {
const sitemapper = new Sitemapper();

const Google = new Sitemapper({
url: 'https://www.google.com/work/sitemap.xml',
debug: false,
timeout: 15000, // 15 seconds
});
const Google = new Sitemapper({
url: 'https://www.google.com/work/sitemap.xml',
debug: false,
timeout: 15000, // 15 seconds
});

Google.fetch()
.then(data => console.log(data.sites))
.catch(error => console.log(error));
try {
const data = await Google.fetch();
console.log(data.sites);
} catch(error) {
console.log(error);
}

sitemapper.timeout = 5000;
sitemapper.timeout = 5000;

sitemapper.fetch('https://wp.seantburke.com/sitemap.xml')
.then(({ url, sites }) => console.log(`url:${url}`, 'sites:', sites))
.catch(error => console.log(error));
try {
const { url, sites } = await sitemapper.fetch('https://wp.seantburke.com/sitemap.xml');
console.log(`url:${url}`, 'sites:', sites);
} catch(error) {
console.log(error)
}

sitemapper.fetch('http://www.cnn.com/sitemaps/sitemap-index.xml')
.then(data => console.log(data))
.catch(error => console.log(error));
try {
const { url, sites } = await sitemapper.fetch('http://www.cnn.com/sitemaps/sitemap-index.xml');
console.log(`url:${url}`, 'sites:', sites);
} catch(error) {
console.log(error)
}

sitemapper.fetch('http://www.stubhub.com/new-sitemap/us/sitemap-US-en-index.xml')
.then((data) => console.log(data))
.catch(error => console.log(error));
try {
const { url, sites } = await sitemapper.fetch('http://www.stubhub.com/new-sitemap/us/sitemap-US-en-index.xml');
console.log(`url:${url}`, 'sites:', sites);
} catch(error) {
console.log(error)
}
})();
2 changes: 1 addition & 1 deletion lib/assets/sitemapper.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion lib/examples/index.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

179 changes: 105 additions & 74 deletions src/assets/sitemapper.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,22 @@ export default class Sitemapper {
* @example sitemapper.fetch('example.xml')
* .then((sites) => console.log(sites));
*/
fetch(url = this.url) {
return new Promise(resolve => this.crawl(url).then(sites => resolve({ url, sites })));
async fetch(url = this.url) {
let sites = [];
try {
// crawl the URL
sites = await this.crawl(url);
} catch (e) {
if (this.debug) {
console.error(e);
}
}

// If we run into an error, don't throw, but instead return an empty array
return {
url,
sites,
}
}

/**
Expand Down Expand Up @@ -111,28 +125,51 @@ export default class Sitemapper {
* @param {string} [url] - the Sitemaps url (e.g https://wp.seantburke.com/sitemap.xml)
* @returns {Promise<ParseData>}
*/
parse(url = this.url) {
async parse(url = this.url) {
// setup the response options for the got request
const requestOptions = {
method: 'GET',
resolveWithFullResponse: true,
gzip: true,
headers: this.requestHeaders,
};

return new Promise((resolve) => {
try {
// create a request Promise with the url and request options
const requester = got(url, requestOptions);
requester.then((response) => {
if (!response || response.statusCode !== 200) {
clearTimeout(this.timeoutTable[url]);
return resolve({ error: response.error, data: response });
}
return parseStringPromise(response.body);
})
.then(data => resolve({ error: null, data }))
.catch(response => resolve({ error: response.error, data: response }));

this.initializeTimeout(url, requester, resolve);
});

// initialize the timeout method based on the URL, and pass the request object.
this.initializeTimeout(url, requester);

//
const response = await requester;

// if the response does not have a successful status code then clear the timeout for this url.
if (!response || response.statusCode !== 200) {
clearTimeout(this.timeoutTable[url]);
return { error: response.error, data: response };
}

// otherwise parse the XML that was returned.
const data = await parseStringPromise(response.body);

// return the results
return { error: null, data }
} catch (error) {
// If the request was canceled notify the user of the timeout
if (error.name === 'CancelError') {
return {
error: `Request timed out after ${this.timeout} milliseconds for url: '${url}'`,
data: error
}
}

// Otherwise notify of another error
return {
error: error.error,
data: error
}
}
}

/**
Expand All @@ -142,22 +179,10 @@ export default class Sitemapper {
* @private
* @param {string} url - url to use as a hash in the timeoutTable
* @param {Promise} requester - the promise that creates the web request to the url
* @param {Function} callback - the resolve method is used here to resolve the parent promise
*/
initializeTimeout(url, requester, callback) {
// this resolves instead of rejects in order to allow other requests to continue
this.timeoutTable[url] = setTimeout(() => {
requester.cancel();

if (this.debug) {
console.debug('crawl timed out');
}

callback({
error: `request timed out after ${this.timeout} milliseconds for url: '${url}'`,
data: {},
});
}, this.timeout);
initializeTimeout(url, requester) {
// this will throw a CancelError which will be handled in the parent that calls this method.
this.timeoutTable[url] = setTimeout(() => requester.cancel(), this.timeout);
}

/**
Expand All @@ -168,47 +193,52 @@ export default class Sitemapper {
* @param {string} url - the Sitemaps url (e.g https://wp.seantburke.com/sitemap.xml)
* @returns {Promise<SitesArray> | Promise<ParseData>}
*/
crawl(url) {
return new Promise((resolve) => {
this.parse(url).then(({ error, data }) => {
// The promise resolved, remove the timeout
clearTimeout(this.timeoutTable[url]);
async crawl(url) {
try {
const { error, data } = await this.parse(url);
// The promise resolved, remove the timeout
clearTimeout(this.timeoutTable[url]);

if (error) {
if (this.debug) {
console.error(`Error occurred during "crawl('${url}')":\n\r Error: ${error}`);
}
// Fail silently
return resolve([]);
} else if (data && data.urlset && data.urlset.url) {
if (this.debug) {
console.debug(`Urlset found during "crawl('${url}')"`);
}
const sites = data.urlset.url.map(site => site.loc && site.loc[0]);
return resolve([].concat(sites));
} else if (data && data.sitemapindex) {
if (this.debug) {
console.debug(`Additional sitemap found during "crawl('${url}')"`);
}
// Map each child url into a promise to create an array of promises
const sitemap = data.sitemapindex.sitemap.map(map => map.loc && map.loc[0]);
const promiseArray = sitemap.map(site => this.crawl(site));

// Make sure all the promises resolve then filter and reduce the array
return Promise.all(promiseArray).then(results => {
const sites = results.filter(result => !result.error)
.reduce((prev, curr) => prev.concat(curr), []);

return resolve(sites);
});
}
if (error) {
if (this.debug) {
console.error(`Unknown state during "crawl(${url})":`, error, data);
}
console.error(`Error occurred during "crawl('${url}')":\n\r Error: ${error}`);
}
// Fail silently
return resolve([]);
});
});
return [];
} else if (data && data.urlset && data.urlset.url) {
if (this.debug) {
console.debug(`Urlset found during "crawl('${url}')"`);
}
const sites = data.urlset.url.map(site => site.loc && site.loc[0]);
return [].concat(sites);
} else if (data && data.sitemapindex) {
if (this.debug) {
console.debug(`Additional sitemap found during "crawl('${url}')"`);
}
// Map each child url into a promise to create an array of promises
const sitemap = data.sitemapindex.sitemap.map(map => map.loc && map.loc[0]);
const promiseArray = sitemap.map(site => this.crawl(site));

// Make sure all the promises resolve then filter and reduce the array
const results = await Promise.all(promiseArray);
const sites = results
.filter(result => !result.error)
.reduce((prev, curr) => prev.concat(curr), []);

return sites;
}

if (this.debug) {
console.error(`Unknown state during "crawl('${url})'":`, error, data);
}

// Fail silently
return [];
} catch (e) {
if (this.debug) {
this.debug &&console.error(e);
}
}
}


Expand All @@ -220,18 +250,19 @@ export default class Sitemapper {
* @param {getSitesCallback} callback - callback for sites and error
* @callback
*/
getSites(url = this.url, callback) {
async getSites(url = this.url, callback) {
console.warn( // eslint-disable-line no-console
'\r\nWarning:', 'function .getSites() is deprecated, please use the function .fetch()\r\n'
);

let err = {};
let sites = [];
this.fetch(url).then(response => {
try {
const response = await this.fetch(url);
sites = response.sites;
}).catch(error => {
err = error;
});
} catch (e) {
err = e;
}
return callback(err, sites);
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/examples/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ const exampleURL = 'https://www.walmart.com/sitemap_topic.xml';
const sitemapper = new Sitemapper({
url: exampleURL, // url to crawl
debug: true, // don't show debug logs
timeout: 10000, // 10 seconds
timeout: 1, // 10 seconds
});

/**
Expand Down

0 comments on commit 6104d68

Please sign in to comment.