Skip to content
This repository has been archived by the owner on Jul 15, 2019. It is now read-only.

Commit

Permalink
added path resolution
Browse files Browse the repository at this point in the history
  • Loading branch information
adon committed Nov 6, 2015
1 parent 86f0637 commit 10059f1
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 45 deletions.
46 changes: 25 additions & 21 deletions src/lib/urlFilters.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ _urlFilters.specialSchemeDefaultPort = {'ftp:': '21', 'file:': '', 'gopher:': '7
* @param {boolean} options.imgDataURIs - to allow data scheme with the
* MIME type equal to image/gif, image/jpeg, image/jpg, or image/png, and
* the encoding format as base64
* @param {boolean} options.IDNAtoASCII - convert all domains to its ASCII
* format according to RFC 3492 and RFC 5891 for matching/comparisons. See
* https://nodejs.org/api/punycode.html for details.
* @param {urlFilterFactoryAbsCallback} options.absCallback - if matched,
* called to further process the url, scheme, hostname, non-default port, and
* path
Expand All @@ -75,7 +78,7 @@ _urlFilters.specialSchemeDefaultPort = {'ftp:': '21', 'file:': '', 'gopher:': '7
* no callback is provided, return the matched url or prefix it with
* "unsafe:" for unmatched ones.
*/
function urlFilterFactory (options) {
_urlFilters.yUrlFilterFactory = function (options) {
/*jshint -W030 */
options || (options = {});

Expand All @@ -88,8 +91,9 @@ function urlFilterFactory (options) {
reEscape = /[.*?+\\\[\](){}|\^$]/g,
// the following whitespaces are allowed in origin
reOriginWhitespaces = /[\t\n\r]+/g,
// reIPv4 matches the pattern of IPv4 address and its hex representation
// used only when options.subdomain is set
// reIPv4 matches an IPv4 address or its hex representation, with an
// optional dot in front or behind. used only when options.subdomain
// is set
// Ref: https://url.spec.whatwg.org/#concept-ipv4-parser
reIPv4 = options.subdomain && /^\.?(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?|0[xX][\dA-Fa-f]{1,2})\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?|0[xX][\dA-Fa-f]{1,2})\.?$/,
// reImgDataURIs hardcodes the image data URIs that are known to be safe
Expand Down Expand Up @@ -169,8 +173,10 @@ function urlFilterFactory (options) {
'(?:[^\\x00#\\/:?\\[\\]\\\\]+\\.)*' :
'';

// convert any IDNA domains to ASCII for comparisons if so configured
// escapes t from regexp sensitive chars
arr[i] += t.replace(reEscape, '\\$&');
arr[i] += (options.IDNAtoASCII ? punycode.toASCII(t) : t).
replace(reEscape, '\\$&');
}

// build reAuthHostsPort from the hosts array, must be case insensitive
Expand All @@ -193,17 +199,17 @@ function urlFilterFactory (options) {
// whitespaces to be later stripped
// Ref: https://url.spec.whatwg.org/#port-state
reAuthHostsPort = new RegExp(
'^[\\/\\\\]*(?:([^\\/\\\\?#]*)@)?' + // leading slashes and authority
'(' + arr.join('|') + ')' + // allowed hostnames, in regexp
'(?::?$|:([\\d\\t\\n\\r]+)|([\\/?#\\\\]))', // until an optional colon then EOF, a port, or a delimeter
'i'); // case insensitive required for hostnames
'^[\\/\\\\]*(?:([^\\/\\\\?#]*)@)?' + // leading slashes and authority
'(' + arr.join('|') + ')' + // allowed hostnames, in regexp
'(?::?$|:([\\d\\t\\n\\r]+)|(?=[\\/?#\\\\]))', // until an optional colon then EOF, a port, or a delimeter
'i'); // case insensitive required for hostnames
}
// extract the auth, hostname and port number if options.absCallback is supplied
else if (options.absCallback) {
// the default reAuthHostsPort. see above for details
// hostname must be present, auth/port optional
// accept \t\n\r, which will be later stripped
reAuthHostsPort = /^[\/\\]*(?:([^\/\\?#]*)@)?([^\x00#\/:?\[\]\\]+|\[(?:[^\x00\/?#\\]+)\])(?::?$|:([\d\t\n\r]+)|([\/?#\\]))/;
reAuthHostsPort = /^[\/\\]*(?:([^\/\\?#]*)@)?([^\x00#\/:?\[\]\\]+|\[(?:[^\x00\/?#\\]+)\])(?::?$|:([\d\t\n\r]+)|(?=[\/?#\\]))/;
}

/*
Expand Down Expand Up @@ -269,25 +275,23 @@ function urlFilterFactory (options) {

// if auth, hostname and port are properly validated
if ((authHostPort = reAuthHostsPort.exec(remainingUrl))) {
// spec simply says whitespaces are syntax violation
// stripping them follows browsers' behavior
port = authHostPort[3] ? authHostPort[3].replace(reOriginWhitespaces, empty) : empty;
// spec simply says \t\r\n are syntax violation
// to observe browsers' behavior, strip them in auth/host/port
authHostPort[2] = authHostPort[2].replace(reOriginWhitespaces, empty).toLowerCase(); // host
port = authHostPort[3] ? authHostPort[3].replace(reOriginWhitespaces, empty) : empty; // port

return absCallback(url,
scheme[1],
// spec simply says whitespaces are syntax violation
// stripping them follows browsers' behavior
authHostPort[1] ? authHostPort[1].replace(reOriginWhitespaces, empty) : empty,
authHostPort[2].replace(reOriginWhitespaces, empty).toLowerCase(),
authHostPort[1] ? authHostPort[1].replace(reOriginWhitespaces, empty) : empty, // auth
// convert any IDNA domains to ASCII for comparisons if so configured
options.IDNAtoASCII ? punycode.toASCII(authHostPort[2]) : authHostPort[2],
// pass '' instead of the default port, if given
port === defaultPort ? empty : port,
// minus the delimeter if captured
remainingUrl.slice(authHostPort[0].length - (authHostPort[4] ? 1 : 0)));
remainingUrl.slice(authHostPort[0].length));
}
}

return unsafeCallback(url);
};
};

// export the util to create url filter
_urlFilters.yUrlFilterFactory = urlFilterFactory;
};
89 changes: 71 additions & 18 deletions src/lib/urlResolver.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,41 +36,92 @@ function _absUrlResolver(url, origin, scheme, path, baseOrigin, baseScheme, base
return (scheme === '' ? baseScheme : '') + origin + path;
}

// return 1 for slash, 2 for ?/#, 0 otherwise
function _resolvePathSymbol(path, i) {
var charCode = path.charCodeAt(i);
return charCode === 47 || charCode === 92 ? 1 :
charCode === 35 || charCode === 63 ? 2 :
0;
}

var _resolvePathDoubleDots = /^(?:\.|%2[eE]){2}$/, _resolvePathSingleDot = /^(?:\.|%2[eE])$/;

// This resembles what is requried by the spec except things regarding file scheme
// Ref: https://url.spec.whatwg.org/#path-state
function _resolvePath(path, scheme) {
var i = 1, j = 1, len = path.length, arrPathLen = 0, symbol,
arrPath = [], buffer, slash = scheme === 'file:' ? '\\' : '/';
while (j <= len) {
if (j === len /* EOF */ || (symbol = _resolvePathSymbol(path, j))) {
buffer = path.slice(i, j);

if (_resolvePathDoubleDots.test(buffer)) {
arrPathLen !== 0 && --arrPathLen;
symbol !== 1 && (arrPath[arrPathLen++] = '');
} else if (_resolvePathSingleDot.test(buffer)) {
symbol !== 1 && (arrPath[arrPathLen++] = '');
} else {
arrPath[arrPathLen++] = buffer;
}

// supposedly switch to query or fragment state, which is dont care here
if (symbol === 2) { break; }
// the index of character that is just after the last slash
i = j + 1;
}
j++;
}
// aggregate the path as string + the remaining query/fragment
return slash + arrPath.slice(0, arrPathLen).join(slash) + path.slice(j);
}

// returns position of
// the first # if ? does not exists, or
// the first ? if # does not exists, or
// the first ? or #, whichever is earlier if both exist
// i.e., -1 means none of them exists
function _queryOrFragmentPosition(path) {
var qPos = path.indexOf('?'), hashPos = path.indexOf('#');
return (qPos === -1 || hashPos !== -1 && hashPos < qPos) ? hashPos : qPos;
}

function _relUrlResolver(path, baseOrigin, baseScheme, basePath, options) {
if (path.length === 0) { return baseOrigin + basePath; }
var resolve = options.resolvePath ? _resolvePath : function(p) {return p;};

if (path.length === 0) { return baseOrigin + resolve(basePath, baseScheme); }

var pos = -1, hashPos = -1,
firstCharCode = path.charCodeAt(0);
var pos = -1, firstCharCode = path.charCodeAt(0);

/* / or \ */
if (firstCharCode === 47 || firstCharCode === 92) {
return baseOrigin + path;
return baseOrigin + resolve(path, baseScheme);
}

/* # */
if (firstCharCode === 35) {
if (options.appendFragment) {
pos = basePath.indexOf('#');
} else { return path; }
} else {
// pos set to the first ? if it's before the first #,
// or the first # if ? does not exists
pos = basePath.indexOf('?');
hashPos = basePath.indexOf('#');
if (pos === -1 || hashPos !== -1 && hashPos < pos) {
pos = hashPos;
}

pos = basePath.indexOf('#');
} else { return path; } // no _resolvePath needed
}
/* ? or else */
else {
// the position of ? or #, whichever is earlier
pos = _queryOrFragmentPosition(basePath);

// advance to position of the last / or \\ before the position of ? or #
// +1 to preserve the slash in basePath if it exists
if (firstCharCode !== 63) { // not ?
path = '/' + path;
// remove the fromIndex constraint if no ? nor # was encountered
pos = pos === -1 ? undefined : pos;
pos = Math.max(basePath.lastIndexOf('/', pos),
basePath.lastIndexOf('\\', pos));
pos !== -1 && pos++;
}
}

// replace base path's component, if any, with the new one
return baseOrigin + (pos === -1 ? basePath :
basePath.slice(0, pos)) + path;
return baseOrigin + resolve(
(pos === -1 ? basePath : basePath.slice(0, pos)) + path, baseScheme);
}

function _unsafeUrlResolver(url) {
Expand All @@ -89,6 +140,8 @@ _urlFilters.yUrlResolver = function (options) {
relResolver = typeof options.relResolver === 'function' ? options.relResolver : _relUrlResolver,
unsafeResolver = options.unsafeResolver || _unsafeUrlResolver;

options.resolvePath = options.resolvePath !== false;

bFilter = _urlFilters.yUrlFilterFactory({
relScheme: relScheme,
schemes: schemes,
Expand Down
32 changes: 26 additions & 6 deletions tests/unit/url-filters-yUrlResolver.js
Original file line number Diff line number Diff line change
Expand Up @@ -145,18 +145,18 @@ Authors: Nera Liu <[email protected]>
});

baseURL = 'http://www.yahoo.com/fin\\ance\\hello?world#test/ing?complex#url';
expect(yUrlResolver('asdf', baseURL)).to.eql('http://www.yahoo.com/fin\\ance/asdf');
expect(yUrlResolver('asdf', baseURL)).to.eql('http://www.yahoo.com/fin/ance/asdf');
expect(yUrlResolver('/asdf', baseURL)).to.eql('http://www.yahoo.com/asdf');
expect(yUrlResolver('?asdf', baseURL)).to.eql('http://www.yahoo.com/fin\\ance\\hello?asdf');
expect(yUrlResolver('#asdf', baseURL)).to.eql('http://www.yahoo.com/fin\\ance\\hello?world#asdf');
expect(yUrlResolver('?asdf', baseURL)).to.eql('http://www.yahoo.com/fin/ance/hello?asdf');
expect(yUrlResolver('#asdf', baseURL)).to.eql('http://www.yahoo.com/fin/ance/hello?world#asdf');
absUrls.forEach(function(url, i){
expect(yUrlResolver(url, baseURL)).to.eql(absUrlsAnswers[i]);
});
// last baseURL is being kept
expect(yUrlResolver('asdf')).to.eql('http://www.yahoo.com/fin\\ance/asdf');
expect(yUrlResolver('asdf')).to.eql('http://www.yahoo.com/fin/ance/asdf');
expect(yUrlResolver('/asdf')).to.eql('http://www.yahoo.com/asdf');
expect(yUrlResolver('?asdf')).to.eql('http://www.yahoo.com/fin\\ance\\hello?asdf');
expect(yUrlResolver('#asdf')).to.eql('http://www.yahoo.com/fin\\ance\\hello?world#asdf');
expect(yUrlResolver('?asdf')).to.eql('http://www.yahoo.com/fin/ance/hello?asdf');
expect(yUrlResolver('#asdf')).to.eql('http://www.yahoo.com/fin/ance/hello?world#asdf');
absUrls.forEach(function(url, i){
expect(yUrlResolver(url)).to.eql(absUrlsAnswers[i]);
});
Expand Down Expand Up @@ -208,6 +208,26 @@ Authors: Nera Liu <[email protected]>
expect(yUrlResolver('#abc', baseURL)).to.eql('#abc');
expect(yUrlResolver('hello/world.html', baseURL)).to.eql('http://yahoo.com/hello/world.html');
expect(yUrlResolver('/hello/world.html', baseURL)).to.eql('http://yahoo.com/hello/world.html');

// path resolution
expect(yUrlResolver('../../hello/world.html', baseURL)).to.eql('http://yahoo.com/hello/world.html');
expect(yUrlResolver('/hello/hello2/../', baseURL)).to.eql('http://yahoo.com/hello/');
expect(yUrlResolver('/hello/hello2/..', baseURL)).to.eql('http://yahoo.com/hello/');

expect(yUrlResolver('/hello3/hello2/../../..', baseURL)).to.eql('http://yahoo.com/');
expect(yUrlResolver('/hello3/hello2/../../../', baseURL)).to.eql('http://yahoo.com/');
expect(yUrlResolver('/hello3/hello2/../../../hello', baseURL)).to.eql('http://yahoo.com/hello');
expect(yUrlResolver('/hello3/hello2/../../..?hello', baseURL)).to.eql('http://yahoo.com/?hello');
expect(yUrlResolver('/hello3/hello2/../../..#hello', baseURL)).to.eql('http://yahoo.com/#hello');

expect(yUrlResolver('/hello3/hello2/../../.', baseURL)).to.eql('http://yahoo.com/');
expect(yUrlResolver('/hello3/hello2/../.././', baseURL)).to.eql('http://yahoo.com/');
expect(yUrlResolver('/hello3/hello2/../.././hello', baseURL)).to.eql('http://yahoo.com/hello');
expect(yUrlResolver('/hello3/hello2/../../.?hello', baseURL)).to.eql('http://yahoo.com/?hello');
expect(yUrlResolver('/hello3/hello2/../../.#hello', baseURL)).to.eql('http://yahoo.com/#hello');

expect(yUrlResolver('/hello2/../hello/world.html', baseURL)).to.eql('http://yahoo.com/hello/world.html');
expect(yUrlResolver('/hello2/../hello/./world.html', baseURL)).to.eql('http://yahoo.com/hello/world.html');
});

it('mailto: scheme URLs and any relative paths', function() {
Expand Down

0 comments on commit 10059f1

Please sign in to comment.