/** * URL Parsing and Manipulation Utilities * * Provides comprehensive URL processing functions: * - URL parsing and component extraction * - Query string processing * - Cookie parsing * - URL sanitization and filtering * - Path segment analysis */ /** * Parse a URL into its components * @param {string} url - URL to parse * @returns {Object} Parsed URL components */ function parseUrl(url) { if (!url) return {}; // URL parsing regex that matches: // protocol://host/path?query#hash const urlRegex = /^(([^:/?#]+):)?(\/\/([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?$/; const match = url.match(urlRegex); if (!match) return {}; const queryString = match[6] || ""; const hash = match[8] || ""; return { host: match[4], path: match[5], protocol: match[2], search: queryString, hash: hash, relative: match[5] + queryString + hash, }; } /** * Strip query string and fragment from URL * @param {string} url - URL to process * @returns {string} URL without query and fragment */ function stripUrlQueryAndFragment(url) { return url.split(/[\?#]/)[0]; } /** * Count the number of path segments in a URL * @param {string} url - URL to analyze * @returns {number} Number of path segments */ function getNumberOfUrlSegments(url) { return url .split(/\\?\//) .filter((segment) => segment.length > 0 && segment !== ",").length; } /** * Create a sanitized URL string for logging/display * Removes sensitive information like credentials * @param {Object} urlComponents - Parsed URL components * @returns {string} Sanitized URL string */ function getSanitizedUrlString(urlComponents) { const { protocol, host, path } = urlComponents; // Sanitize host to remove credentials let sanitizedHost = (host && host .replace(/^.*@/, "[filtered]:[filtered]@") // Remove user:pass@ .replace(/(:80)$/, "") // Remove default HTTP port .replace(/(:443)$/, "")) || ""; // Remove default HTTPS port return `${protocol ? `${protocol}://` : ""}${sanitizedHost}${path || ""}`; } /** * Parse cookie string into object * @param {string} cookieString - Cookie header string * @returns {Object} Parsed cookies object */ function parseCookie(cookieString) { const cookies = {}; let position = 0; while (position < cookieString.length) { const equalIndex = cookieString.indexOf("=", position); if (equalIndex === -1) break; let semicolonIndex = cookieString.indexOf(";", position); if (semicolonIndex === -1) { semicolonIndex = cookieString.length; } else if (semicolonIndex < equalIndex) { position = cookieString.lastIndexOf(";", equalIndex - 1) + 1; continue; } const name = cookieString.slice(position, equalIndex).trim(); if (cookies[name] === undefined) { let value = cookieString.slice(equalIndex + 1, semicolonIndex).trim(); // Remove quotes if present if (value.charCodeAt(0) === 34) { value = value.slice(1, -1); } try { cookies[name] = value.indexOf("%") !== -1 ? decodeURIComponent(value) : value; } catch (error) { cookies[name] = value; } } position = semicolonIndex + 1; } return cookies; } /** * Extract request data from various request object formats * @param {Object} request - Request object (Express, Koa, etc.) * @param {Object} options - Extraction options * @returns {Object} Extracted request data */ function extractRequestData(request, options = {}) { const { include = DEFAULT_INCLUDES, deps } = options; const extracted = {}; const headers = request.headers || {}; const method = request.method; const hostname = headers.host || request.hostname || request.host || ""; const protocol = request.protocol === "https" || (request.socket && request.socket.encrypted) ? "https" : "http"; const originalUrl = request.originalUrl || request.url || ""; const fullUrl = originalUrl.startsWith(protocol) ? originalUrl : `${protocol}://${hostname}${originalUrl}`; include.forEach((field) => { switch (field) { case "headers": extracted.headers = headers; if (!include.includes("cookies")) { delete extracted.headers.cookie; } break; case "method": extracted.method = method; break; case "url": extracted.url = fullUrl; break; case "cookies": extracted.cookies = request.cookies || (headers.cookie && parseCookie(headers.cookie)) || {}; break; case "query_string": extracted.query_string = extractQueryString(request, deps); break; case "data": if (method === "GET" || method === "HEAD") break; if (request.body !== undefined) { extracted.data = typeof request.body === "string" ? request.body : JSON.stringify(normalizeRequestBody(request.body)); } break; default: if ({}.hasOwnProperty.call(request, field)) { extracted[field] = request[field]; } } }); return extracted; } /** * Extract query string from request * @param {Object} request - Request object * @param {Object} deps - Dependencies (URL parser, etc.) * @returns {string|undefined} Query string */ function extractQueryString(request, deps) { const originalUrl = request.originalUrl || request.url || ""; if (!originalUrl) return undefined; // Ensure URL is properly formatted for parsing let fullUrl = originalUrl; if (originalUrl.startsWith("/")) { fullUrl = `http://example.com${originalUrl}`; } try { // Try different methods to extract query string if (request.query) { return request.query; } if (typeof URL !== "undefined") { return new URL(fullUrl).search.slice(1); } if (deps && deps.url && deps.url.parse) { return deps.url.parse(fullUrl).query; } return undefined; } catch (error) { return undefined; } } /** * Normalize request body for serialization * @param {any} body - Request body * @returns {any} Normalized body */ function normalizeRequestBody(body) { if (typeof body === "string" || body instanceof Buffer) { return body; } if (body && typeof body === "object") { try { return JSON.parse(JSON.stringify(body)); } catch (error) { return "[Non-serializable body]"; } } return body; } /** * Convert WinterCG Headers to dictionary * @param {Headers} headers - WinterCG Headers object * @returns {Object} Headers dictionary */ function winterCGHeadersToDict(headers) { const result = {}; try { headers.forEach((value, key) => { if (typeof value === "string") { result[key] = value; } }); } catch (error) { console.warn("Failed to extract headers from request object"); } return result; } /** * Convert WinterCG Request to request data * @param {Request} request - WinterCG Request object * @returns {Object} Request data */ function winterCGRequestToRequestData(request) { const headers = winterCGHeadersToDict(request.headers); return { method: request.method, url: request.url, headers: headers, }; } /** * Default fields to include in request data extraction */ const DEFAULT_INCLUDES = [ "cookies", "data", "headers", "method", "query_string", "url", ]; /** * Validate and clean URL for security * @param {string} url - URL to validate * @param {Object} options - Validation options * @returns {Object} Validation result */ function validateUrl(url, options = {}) { const { allowedProtocols = ["http:", "https:"], allowedDomains = null, blockedDomains = [], maxLength = 2048, } = options; const result = { isValid: false, url: url, issues: [], }; if (!url || typeof url !== "string") { result.issues.push("Invalid URL type"); return result; } if (url.length > maxLength) { result.issues.push(`URL too long (max ${maxLength})`); return result; } try { const parsed = parseUrl(url); if (parsed.protocol && !allowedProtocols.includes(parsed.protocol + ":")) { result.issues.push(`Protocol '${parsed.protocol}' not allowed`); } if ( allowedDomains && parsed.host && !allowedDomains.includes(parsed.host) ) { result.issues.push(`Domain '${parsed.host}' not in allowlist`); } if (parsed.host && blockedDomains.includes(parsed.host)) { result.issues.push(`Domain '${parsed.host}' is blocked`); } result.isValid = result.issues.length === 0; result.parsed = parsed; } catch (error) { result.issues.push(`Parse error: ${error.message}`); } return result; } module.exports = { parseUrl, stripUrlQueryAndFragment, getNumberOfUrlSegments, getSanitizedUrlString, parseCookie, extractRequestData, extractQueryString, winterCGHeadersToDict, winterCGRequestToRequestData, validateUrl, normalizeRequestBody, DEFAULT_INCLUDES, };