// see https://tools.ietf.org/html/rfc1808 var URL_REGEX = /^((?:[a-zA-Z0-9+\-.]+:)?)(\/\/[^\/?#]*)?((?:[^\/\?#]*\/)*.*?)??(;.*?)?(\?.*?)?(#.*?)?$/; var FIRST_SEGMENT_REGEX = /^([^\/?#]*)(.*)$/; var SLASH_DOT_REGEX = /(?:\/|^)\.(?=\/)/g; var SLASH_DOT_DOT_REGEX = /(?:\/|^)\.\.\/(?!\.\.\/).*?(?=\/)/g; var URLToolkit = { // jshint ignore:line // If opts.alwaysNormalize is true then the path will always be normalized even when it starts with / or // // E.g // With opts.alwaysNormalize = false (default, spec compliant) // http://a.com/b/cd + /e/f/../g => http://a.com/e/f/../g // With opts.alwaysNormalize = true (not spec compliant) // http://a.com/b/cd + /e/f/../g => http://a.com/e/g buildAbsoluteURL: function (baseURL, relativeURL, opts) { opts = opts || {}; // remove any remaining space and CRLF baseURL = baseURL.trim(); relativeURL = relativeURL.trim(); if (!relativeURL) { // 2a) If the embedded URL is entirely empty, it inherits the // entire base URL (i.e., is set equal to the base URL) // and we are done. if (!opts.alwaysNormalize) { return baseURL; } var basePartsForNormalise = URLToolkit.parseURL(baseURL); if (!basePartsForNormalise) { throw new Error('Error trying to parse base URL.'); } basePartsForNormalise.path = URLToolkit.normalizePath(basePartsForNormalise.path); return URLToolkit.buildURLFromParts(basePartsForNormalise); } var relativeParts = URLToolkit.parseURL(relativeURL); if (!relativeParts) { throw new Error('Error trying to parse relative URL.'); } if (relativeParts.scheme) { // 2b) If the embedded URL starts with a scheme name, it is // interpreted as an absolute URL and we are done. if (!opts.alwaysNormalize) { return relativeURL; } relativeParts.path = URLToolkit.normalizePath(relativeParts.path); return URLToolkit.buildURLFromParts(relativeParts); } var baseParts = URLToolkit.parseURL(baseURL); if (!baseParts) { throw new Error('Error trying to parse base URL.'); } if (!baseParts.netLoc && baseParts.path && baseParts.path[0] !== '/') { // If netLoc missing and path doesn't start with '/', assume everthing before the first '/' is the netLoc // This causes 'example.com/a' to be handled as '//example.com/a' instead of '/example.com/a' var pathParts = FIRST_SEGMENT_REGEX.exec(baseParts.path); baseParts.netLoc = pathParts[1]; baseParts.path = pathParts[2]; } if (baseParts.netLoc && !baseParts.path) { baseParts.path = '/'; } var builtParts = { // 2c) Otherwise, the embedded URL inherits the scheme of // the base URL. scheme: baseParts.scheme, netLoc: relativeParts.netLoc, path: null, params: relativeParts.params, query: relativeParts.query, fragment: relativeParts.fragment }; if (!relativeParts.netLoc) { // 3) If the embedded URL's is non-empty, we skip to // Step 7. Otherwise, the embedded URL inherits the // (if any) of the base URL. builtParts.netLoc = baseParts.netLoc; // 4) If the embedded URL path is preceded by a slash "/", the // path is not relative and we skip to Step 7. if (relativeParts.path[0] !== '/') { if (!relativeParts.path) { // 5) If the embedded URL path is empty (and not preceded by a // slash), then the embedded URL inherits the base URL path builtParts.path = baseParts.path; // 5a) if the embedded URL's is non-empty, we skip to // step 7; otherwise, it inherits the of the base // URL (if any) and if (!relativeParts.params) { builtParts.params = baseParts.params; // 5b) if the embedded URL's is non-empty, we skip to // step 7; otherwise, it inherits the of the base // URL (if any) and we skip to step 7. if (!relativeParts.query) { builtParts.query = baseParts.query; } } } else { // 6) The last segment of the base URL's path (anything // following the rightmost slash "/", or the entire path if no // slash is present) is removed and the embedded URL's path is // appended in its place. var baseURLPath = baseParts.path; var newPath = baseURLPath.substring(0, baseURLPath.lastIndexOf('/') + 1) + relativeParts.path; builtParts.path = URLToolkit.normalizePath(newPath); } } } if (builtParts.path === null) { builtParts.path = opts.alwaysNormalize ? URLToolkit.normalizePath(relativeParts.path) : relativeParts.path; } return URLToolkit.buildURLFromParts(builtParts); }, parseURL: function (url) { var parts = URL_REGEX.exec(url); if (!parts) { return null; } return { scheme: parts[1] || '', netLoc: parts[2] || '', path: parts[3] || '', params: parts[4] || '', query: parts[5] || '', fragment: parts[6] || '' }; }, normalizePath: function (path) { // The following operations are // then applied, in order, to the new path: // 6a) All occurrences of "./", where "." is a complete path // segment, are removed. // 6b) If the path ends with "." as a complete path segment, // that "." is removed. path = path.split('').reverse().join('').replace(SLASH_DOT_REGEX, ''); // 6c) All occurrences of "/../", where is a // complete path segment not equal to "..", are removed. // Removal of these path segments is performed iteratively, // removing the leftmost matching pattern on each iteration, // until no matching pattern remains. // 6d) If the path ends with "/..", where is a // complete path segment not equal to "..", that // "/.." is removed. while (path.length !== (path = path.replace(SLASH_DOT_DOT_REGEX, '')).length) { } // jshint ignore:line return path.split('').reverse().join(''); }, buildURLFromParts: function (parts) { return parts.scheme + parts.netLoc + parts.path + parts.params + parts.query + parts.fragment; } }; export default URLToolkit;