From 7a74f0573b9cc107fd1e18316bd0ef409e8bdc86 Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Wed, 22 Jan 2025 19:44:06 -0500 Subject: [PATCH] move implementation to header files --- include/ada.h | 2 + include/ada/helpers.h | 1 - include/ada/implementation-inl.h | 32 +++ include/ada/implementation.h | 10 +- include/ada/parser-inl.h | 278 ++++++++++++++++++++++ include/ada/parser.h | 8 +- include/ada/url.h | 17 +- include/ada/url_aggregator-inl.h | 1 - include/ada/url_aggregator.h | 6 +- include/ada/url_base-inl.h | 5 - include/ada/url_base.h | 1 - include/ada/url_components.h | 5 - include/ada/url_pattern-inl.h | 327 +++++++++++++++++++++++++- include/ada/url_pattern.h | 1 - include/ada/url_pattern_helpers-inl.h | 295 ++++++++++++++++++++++- include/ada/url_pattern_helpers.h | 5 + src/implementation.cpp | 36 +-- src/parser.cpp | 259 +------------------- src/url_pattern.cpp | 326 ------------------------- src/url_pattern_helpers.cpp | 290 ----------------------- 20 files changed, 969 insertions(+), 936 deletions(-) create mode 100644 include/ada/implementation-inl.h create mode 100644 include/ada/parser-inl.h diff --git a/include/ada.h b/include/ada.h index 30a884fe9..d13a81d20 100644 --- a/include/ada.h +++ b/include/ada.h @@ -13,6 +13,7 @@ #include "ada/encoding_type.h" #include "ada/helpers.h" #include "ada/parser.h" +#include "ada/parser-inl.h" #include "ada/scheme-inl.h" #include "ada/serializers.h" #include "ada/state.h" @@ -35,5 +36,6 @@ // Public API #include "ada/ada_version.h" #include "ada/implementation.h" +#include "ada/implementation-inl.h" #endif // ADA_H diff --git a/include/ada/helpers.h b/include/ada/helpers.h index 96c4b5e15..f7e710746 100644 --- a/include/ada/helpers.h +++ b/include/ada/helpers.h @@ -6,7 +6,6 @@ #define ADA_HELPERS_H #include "ada/common_defs.h" -#include "ada/state.h" #include "ada/url_base.h" #include diff --git a/include/ada/implementation-inl.h b/include/ada/implementation-inl.h new file mode 100644 index 000000000..8857546ad --- /dev/null +++ b/include/ada/implementation-inl.h @@ -0,0 +1,32 @@ +/** + * @file implementation-inl.h + */ +#ifndef ADA_IMPLEMENTATION_INL_H +#define ADA_IMPLEMENTATION_INL_H + +#include "ada/url_pattern_regex.h" +#include "ada/expected.h" +#include "ada/implementation.h" + + +namespace ada { +template +ada_warn_unused tl::expected, errors> +parse_url_pattern(std::variant input, + const std::string_view* base_url, + const url_pattern_options* options, + std::optional provider) { + return parser::parse_url_pattern_impl( + std::move(input), base_url, options, + provider.value_or(url_pattern_regex::std_regex_provider())); +} + +extern template ada_warn_unused + tl::expected, errors> + parse_url_pattern( + std::variant input, + const std::string_view* base_url, const url_pattern_options* options, + std::optional provider); +} // namespace ada + +#endif // ADA_IMPLEMENTATION_INL_H diff --git a/include/ada/implementation.h b/include/ada/implementation.h index ae3ef2080..c3401dfb3 100644 --- a/include/ada/implementation.h +++ b/include/ada/implementation.h @@ -9,10 +9,8 @@ #include #include -#include "ada/parser.h" #include "ada/common_defs.h" #include "ada/url.h" -#include "ada/url_pattern_regex.h" namespace ada { enum class errors : uint8_t { type_error }; @@ -54,7 +52,7 @@ bool can_parse(std::string_view input, * @param input valid UTF-8 string or URLPatternInit struct * @param base_url an optional valid UTF-8 string * @param options an optional url_pattern_options struct - * @param regex_provider an optional regex provider. if not provided, it will + * @param provider an optional regex provider. if not provided, it will * use ada::url_pattern_regex::std_regex_provider * @return url_pattern instance */ @@ -66,6 +64,12 @@ parse_url_pattern(std::variant input, const url_pattern_options* options = nullptr, std::optional provider = std::nullopt); +extern template ada_warn_unused tl::expected, errors> +parse_url_pattern(std::variant input, + const std::string_view* base_url, + const url_pattern_options* options, + std::optional provider); + /** * Computes a href string from a file path. The function assumes * that the input is a valid ASCII or UTF-8 string. diff --git a/include/ada/parser-inl.h b/include/ada/parser-inl.h new file mode 100644 index 000000000..5cdd4f763 --- /dev/null +++ b/include/ada/parser-inl.h @@ -0,0 +1,278 @@ +/** + * @file parser-inl.h + */ +#ifndef ADA_PARSER_INL_H +#define ADA_PARSER_INL_H + +#include "ada/expected.h" +#include "ada/url_pattern.h" +#include "ada/url_pattern_helpers.h" +#include "ada/parser.h" + +namespace ada::parser { +template +tl::expected, errors> parse_url_pattern_impl( + std::variant input, + const std::string_view* base_url, const url_pattern_options* options, + regex_provider&& provider) { + // Let init be null. + url_pattern_init init; + + // If input is a scalar value string then: + if (std::holds_alternative(input)) { + // Set init to the result of running parse a constructor string given input. + auto parse_result = + url_pattern_helpers::constructor_string_parser::parse( + std::get(input), provider); + if (!parse_result) { + ada_log("constructor_string_parser::parse failed"); + return tl::unexpected(parse_result.error()); + } + init = std::move(*parse_result); + // If baseURL is null and init["protocol"] does not exist, then throw a + // TypeError. + if (!base_url && !init.protocol) { + ada_log("base url is null and protocol is not set"); + return tl::unexpected(errors::type_error); + } + + // If baseURL is not null, set init["baseURL"] to baseURL. + if (base_url) { + init.base_url = std::string(*base_url); + } + } else { + // Assert: input is a URLPatternInit. + ADA_ASSERT_TRUE(std::holds_alternative(input)); + // If baseURL is not null, then throw a TypeError. + if (base_url) { + ada_log("base url is not null"); + return tl::unexpected(errors::type_error); + } + // Optimization: Avoid copy by moving the input value. + // Set init to input. + init = std::move(std::get(input)); + } + + // Let processedInit be the result of process a URLPatternInit given init, + // "pattern", null, null, null, null, null, null, null, and null. + // TODO: Make "pattern" an enum to avoid creating a string everytime. + auto processed_init = url_pattern_init::process(init, "pattern"); + if (!processed_init) { + ada_log("url_pattern_init::process failed for init and 'pattern'"); + return tl::unexpected(processed_init.error()); + } + + // For each componentName of « "protocol", "username", "password", "hostname", + // "port", "pathname", "search", "hash" If processedInit[componentName] does + // not exist, then set processedInit[componentName] to "*". + ADA_ASSERT_TRUE(processed_init.has_value()); + if (!processed_init->protocol) processed_init->protocol = "*"; + if (!processed_init->username) processed_init->username = "*"; + if (!processed_init->password) processed_init->password = "*"; + if (!processed_init->hostname) processed_init->hostname = "*"; + if (!processed_init->port) processed_init->port = "*"; + if (!processed_init->pathname) processed_init->pathname = "*"; + if (!processed_init->search) processed_init->search = "*"; + if (!processed_init->hash) processed_init->hash = "*"; + + ada_log("-- processed_init->protocol: ", processed_init->protocol.value()); + ada_log("-- processed_init->username: ", processed_init->username.value()); + ada_log("-- processed_init->password: ", processed_init->password.value()); + ada_log("-- processed_init->hostname: ", processed_init->hostname.value()); + ada_log("-- processed_init->port: ", processed_init->port.value()); + ada_log("-- processed_init->pathname: ", processed_init->pathname.value()); + ada_log("-- processed_init->search: ", processed_init->search.value()); + ada_log("-- processed_init->hash: ", processed_init->hash.value()); + + // If processedInit["protocol"] is a special scheme and processedInit["port"] + // is a string which represents its corresponding default port in radix-10 + // using ASCII digits then set processedInit["port"] to the empty string. + // TODO: Optimization opportunity. + if (scheme::is_special(*processed_init->protocol)) { + std::string_view port = processed_init->port.value(); + helpers::trim_c0_whitespace(port); + if (std::to_string(scheme::get_special_port(*processed_init->protocol)) == + port) { + processed_init->port->clear(); + } + } + + // Let urlPattern be a new URL pattern. + url_pattern url_pattern_(std::move(provider)); + + // Set urlPattern’s protocol component to the result of compiling a component + // given processedInit["protocol"], canonicalize a protocol, and default + // options. + auto protocol_component = url_pattern_component::compile( + processed_init->protocol.value(), + url_pattern_helpers::canonicalize_protocol, + url_pattern_compile_component_options::DEFAULT, provider); + if (!protocol_component) { + ada_log("url_pattern_component::compile failed for protocol ", + processed_init->protocol.value()); + return tl::unexpected(protocol_component.error()); + } + url_pattern_.protocol_component = std::move(*protocol_component); + + // Set urlPattern’s username component to the result of compiling a component + // given processedInit["username"], canonicalize a username, and default + // options. + auto username_component = url_pattern_component::compile( + processed_init->username.value(), + url_pattern_helpers::canonicalize_username, + url_pattern_compile_component_options::DEFAULT, provider); + if (!username_component) { + ada_log("url_pattern_component::compile failed for username ", + processed_init->username.value()); + return tl::unexpected(username_component.error()); + } + url_pattern_.username_component = std::move(*username_component); + + // Set urlPattern’s password component to the result of compiling a component + // given processedInit["password"], canonicalize a password, and default + // options. + auto password_component = url_pattern_component::compile( + processed_init->password.value(), + url_pattern_helpers::canonicalize_password, + url_pattern_compile_component_options::DEFAULT, provider); + if (!password_component) { + ada_log("url_pattern_component::compile failed for password ", + processed_init->password.value()); + return tl::unexpected(password_component.error()); + } + url_pattern_.password_component = std::move(*password_component); + + // TODO: Optimization opportunity. The following if statement can be + // simplified. + // If the result running hostname pattern is an IPv6 address given + // processedInit["hostname"] is true, then set urlPattern’s hostname component + // to the result of compiling a component given processedInit["hostname"], + // canonicalize an IPv6 hostname, and hostname options. + if (url_pattern_helpers::is_ipv6_address(processed_init->hostname.value())) { + ada_log("processed_init->hostname is ipv6 address"); + // then set urlPattern’s hostname component to the result of compiling a + // component given processedInit["hostname"], canonicalize an IPv6 hostname, + // and hostname options. + auto hostname_component = url_pattern_component::compile( + processed_init->hostname.value(), + url_pattern_helpers::canonicalize_ipv6_hostname, + url_pattern_compile_component_options::DEFAULT, provider); + if (!hostname_component) { + ada_log("url_pattern_component::compile failed for ipv6 hostname ", + processed_init->hostname.value()); + return tl::unexpected(hostname_component.error()); + } + url_pattern_.hostname_component = std::move(*hostname_component); + } else { + // Otherwise, set urlPattern’s hostname component to the result of compiling + // a component given processedInit["hostname"], canonicalize a hostname, and + // hostname options. + auto hostname_component = url_pattern_component::compile( + processed_init->hostname.value(), + url_pattern_helpers::canonicalize_hostname, + url_pattern_compile_component_options::HOSTNAME, provider); + if (!hostname_component) { + ada_log("url_pattern_component::compile failed for hostname ", + processed_init->hostname.value()); + return tl::unexpected(hostname_component.error()); + } + url_pattern_.hostname_component = std::move(*hostname_component); + } + + // Set urlPattern’s port component to the result of compiling a component + // given processedInit["port"], canonicalize a port, and default options. + auto port_component = url_pattern_component::compile( + processed_init->port.value(), url_pattern_helpers::canonicalize_port, + url_pattern_compile_component_options::DEFAULT, provider); + if (!port_component) { + ada_log("url_pattern_component::compile failed for port ", + processed_init->port.value()); + return tl::unexpected(port_component.error()); + } + url_pattern_.port_component = std::move(*port_component); + + // Let compileOptions be a copy of the default options with the ignore case + // property set to options["ignoreCase"]. + auto compile_options = url_pattern_compile_component_options::DEFAULT; + if (options) { + compile_options.ignore_case = options->ignore_case; + } + + // TODO: Optimization opportunity: Simplify this if statement. + // If the result of running protocol component matches a special scheme given + // urlPattern’s protocol component is true, then: + if (url_pattern_helpers::protocol_component_matches_special_scheme< + regex_provider>(url_pattern_.protocol_component)) { + // Let pathCompileOptions be copy of the pathname options with the ignore + // case property set to options["ignoreCase"]. + auto path_compile_options = url_pattern_compile_component_options::PATHNAME; + if (options) { + path_compile_options.ignore_case = options->ignore_case; + } + + // Set urlPattern’s pathname component to the result of compiling a + // component given processedInit["pathname"], canonicalize a pathname, and + // pathCompileOptions. + auto pathname_component = url_pattern_component::compile( + processed_init->pathname.value(), + url_pattern_helpers::canonicalize_pathname, path_compile_options, + provider); + if (!pathname_component) { + ada_log("url_pattern_component::compile failed for pathname ", + processed_init->pathname.value()); + return tl::unexpected(pathname_component.error()); + } + url_pattern_.pathname_component = std::move(*pathname_component); + } else { + // Otherwise set urlPattern’s pathname component to the result of compiling + // a component given processedInit["pathname"], canonicalize an opaque + // pathname, and compileOptions. + auto pathname_component = url_pattern_component::compile( + processed_init->pathname.value(), + url_pattern_helpers::canonicalize_opaque_pathname, compile_options, + provider); + if (!pathname_component) { + ada_log("url_pattern_component::compile failed for opaque pathname ", + processed_init->pathname.value()); + return tl::unexpected(pathname_component.error()); + } + url_pattern_.pathname_component = std::move(*pathname_component); + } + + // Set urlPattern’s search component to the result of compiling a component + // given processedInit["search"], canonicalize a search, and compileOptions. + auto search_component = url_pattern_component::compile( + processed_init->search.value(), url_pattern_helpers::canonicalize_search, + compile_options, provider); + if (!search_component) { + ada_log("url_pattern_component::compile failed for search ", + processed_init->search.value()); + return tl::unexpected(search_component.error()); + } + url_pattern_.search_component = std::move(*search_component); + + // Set urlPattern’s hash component to the result of compiling a component + // given processedInit["hash"], canonicalize a hash, and compileOptions. + auto hash_component = url_pattern_component::compile( + processed_init->hash.value(), url_pattern_helpers::canonicalize_hash, + compile_options, provider); + if (!hash_component) { + ada_log("url_pattern_component::compile failed for hash ", + processed_init->hash.value()); + return tl::unexpected(hash_component.error()); + } + url_pattern_.hash_component = std::move(*hash_component); + + // Return urlPattern. + return url_pattern_; +} + +template tl::expected, + errors> +parse_url_pattern_impl(std::variant input, + const std::string_view* base_url, + const url_pattern_options* options, + url_pattern_regex::std_regex_provider&& provider); +} // namespace ada::parser + +#endif // ADA_PARSER_INL_H diff --git a/include/ada/parser.h b/include/ada/parser.h index 02b7dadf1..fdb1c6df1 100644 --- a/include/ada/parser.h +++ b/include/ada/parser.h @@ -11,11 +11,12 @@ #include "ada/expected.h" #include "ada/url_pattern_regex.h" + /** * @private */ namespace ada { -struct url_aggregator; +class url_aggregator; struct url; template class url_pattern; @@ -59,6 +60,11 @@ tl::expected, errors> parse_url_pattern_impl( const std::string_view* base_url, const url_pattern_options* options, regex_provider&& provider); +extern template tl::expected, errors> parse_url_pattern_impl( + std::variant input, + const std::string_view* base_url, const url_pattern_options* options, + url_pattern_regex::std_regex_provider&& provider); + } // namespace ada::parser #endif // ADA_PARSER_H diff --git a/include/ada/url.h b/include/ada/url.h index 09279f447..0f361636b 100644 --- a/include/ada/url.h +++ b/include/ada/url.h @@ -6,24 +6,29 @@ #define ADA_URL_H #include -#include -#include #include #include #include #include "ada/checkers.h" #include "ada/common_defs.h" -#include "ada/log.h" -#include "ada/scheme.h" -#include "ada/serializers.h" -#include "ada/unicode.h" #include "ada/url_base.h" #include "ada/url_components.h" #include "ada/helpers.h" namespace ada { +class url_aggregator; + +// namespace parser { +// template +// result_type parse_url(std::string_view user_input, +// const result_type* base_url = nullptr); +// template +// result_type parse_url_impl(std::string_view user_input, +// const result_type* base_url = nullptr); +// } + /** * @brief Generic URL struct reliant on std::string instantiation. * diff --git a/include/ada/url_aggregator-inl.h b/include/ada/url_aggregator-inl.h index 2012b79d4..01bd80665 100644 --- a/include/ada/url_aggregator-inl.h +++ b/include/ada/url_aggregator-inl.h @@ -7,7 +7,6 @@ #include "ada/character_sets.h" #include "ada/character_sets-inl.h" -#include "ada/checkers.h" #include "ada/helpers.h" #include "ada/unicode-inl.h" #include "ada/url_aggregator.h" diff --git a/include/ada/url_aggregator.h b/include/ada/url_aggregator.h index 053e577fa..77650be23 100644 --- a/include/ada/url_aggregator.h +++ b/include/ada/url_aggregator.h @@ -8,14 +8,16 @@ #include #include -#include "ada/url_pattern.h" #include "ada/common_defs.h" #include "ada/url_base.h" #include "ada/url_components.h" -#include "ada/parser.h" namespace ada { +namespace parser { + +} + /** * @brief Lightweight URL struct. * diff --git a/include/ada/url_base-inl.h b/include/ada/url_base-inl.h index 948205eb2..892d14bda 100644 --- a/include/ada/url_base-inl.h +++ b/include/ada/url_base-inl.h @@ -5,15 +5,10 @@ #ifndef ADA_URL_BASE_INL_H #define ADA_URL_BASE_INL_H -#include "ada/url_aggregator.h" -#include "ada/url_components.h" #include "ada/scheme.h" -#include "ada/scheme-inl.h" -#include "ada/log.h" #include "ada/checkers.h" #include "ada/url.h" -#include #include #if ADA_REGULAR_VISUAL_STUDIO #include diff --git a/include/ada/url_base.h b/include/ada/url_base.h index 9d2461d7a..334b860e0 100644 --- a/include/ada/url_base.h +++ b/include/ada/url_base.h @@ -6,7 +6,6 @@ #define ADA_URL_BASE_H #include "ada/common_defs.h" -#include "ada/url_components.h" #include "ada/scheme.h" #include diff --git a/include/ada/url_components.h b/include/ada/url_components.h index a72767bb0..3c7594540 100644 --- a/include/ada/url_components.h +++ b/include/ada/url_components.h @@ -5,11 +5,6 @@ #ifndef ADA_URL_COMPONENTS_H #define ADA_URL_COMPONENTS_H -#include "ada/common_defs.h" - -#include -#include - namespace ada { /** diff --git a/include/ada/url_pattern-inl.h b/include/ada/url_pattern-inl.h index 7e621bc86..41dd71478 100644 --- a/include/ada/url_pattern-inl.h +++ b/include/ada/url_pattern-inl.h @@ -6,7 +6,7 @@ #define ADA_URL_PATTERN_INL_H #include "ada/common_defs.h" -#include "ada/url_pattern.h" +#include "ada/url_pattern_helpers.h" #include @@ -163,6 +163,331 @@ inline std::string_view url_pattern_compile_component_options::get_prefix() } return {}; } + +template +template +tl::expected, errors> +url_pattern_component::compile( + std::string_view input, F& encoding_callback, + url_pattern_compile_component_options& options, + const regex_provider& provider) { + ada_log("url_pattern_component::compile input: ", input); + // Let part list be the result of running parse a pattern string given input, + // options, and encoding callback. + auto part_list = url_pattern_helpers::parse_pattern_string(input, options, + encoding_callback); + + if (!part_list) { + ada_log("parse_pattern_string failed"); + return tl::unexpected(part_list.error()); + } + + // Let (regular expression string, name list) be the result of running + // generate a regular expression and name list given part list and options. + auto [regular_expression_string, name_list] = + url_pattern_helpers::generate_regular_expression_and_name_list(*part_list, + options); + + ada_log("regular expression string: ", regular_expression_string); + + // Let pattern string be the result of running generate a pattern + // string given part list and options. + auto pattern_string = + url_pattern_helpers::generate_pattern_string(*part_list, options); + + // Let regular expression be RegExpCreate(regular expression string, + // flags). If this throws an exception, catch it, and throw a + // TypeError. + std::optional regular_expression = + provider.create_instance(regular_expression_string, options.ignore_case); + + if (!regular_expression) { + return tl::unexpected(errors::type_error); + } + + // For each part of part list: + // - If part’s type is "regexp", then set has regexp groups to true. + const auto has_regexp = [](const auto& part) { return part.is_regexp(); }; + const bool has_regexp_groups = std::ranges::any_of(*part_list, has_regexp); + + ada_log("has regexp groups: ", has_regexp_groups); + + // Return a new component whose pattern string is pattern string, regular + // expression is regular expression, group name list is name list, and has + // regexp groups is has regexp groups. + return url_pattern_component( + std::move(pattern_string), std::move(*regular_expression), + std::move(name_list), has_regexp_groups); +} + +template +result> url_pattern::exec( + const url_pattern_input& input, std::string_view* base_url) { + // Return the result of match given this's associated URL pattern, input, and + // baseURL if given. + return match(input, base_url); +} + +template +result url_pattern::test(const url_pattern_input& input, + std::string_view* base_url) { + // TODO: Optimization opportunity. Rather than returning `url_pattern_result` + // Implement a fast path just like `can_parse()` in ada_url. + // Let result be the result of match given this's associated URL pattern, + // input, and baseURL if given. + // If result is null, return false. + if (auto result = match(input, base_url); result.has_value()) { + return result->has_value(); + } + return tl::unexpected(errors::type_error); +} + +template +result> url_pattern::match( + const url_pattern_input& input, std::string_view* base_url_string) { + std::string protocol{}; + std::string username{}; + std::string password{}; + std::string hostname{}; + std::string port{}; + std::string pathname{}; + std::string search{}; + std::string hash{}; + + // Let inputs be an empty list. + // Append input to inputs. + std::vector inputs{input}; + + // If input is a URLPatternInit then: + if (std::holds_alternative(input)) { + ada_log( + "url_pattern::match called with url_pattern_init and base_url_string=", + base_url_string); + // If baseURLString was given, throw a TypeError. + if (base_url_string) { + ada_log("failed to match because base_url_string was given"); + return tl::unexpected(errors::type_error); + } + + // Let applyResult be the result of process a URLPatternInit given input, + // "url", protocol, username, password, hostname, port, pathname, search, + // and hash. + auto apply_result = url_pattern_init::process( + std::get(input), "url", protocol, username, password, + hostname, port, pathname, search, hash); + + // If this throws an exception, catch it, and return null. + if (!apply_result.has_value()) { + ada_log("match returned std::nullopt because process threw"); + return std::nullopt; + } + + // Set protocol to applyResult["protocol"]. + ADA_ASSERT_TRUE(apply_result->protocol.has_value()); + protocol = apply_result->protocol.value(); + + // Set username to applyResult["username"]. + ADA_ASSERT_TRUE(apply_result->username.has_value()); + username = apply_result->username.value(); + + // Set password to applyResult["password"]. + ADA_ASSERT_TRUE(apply_result->password.has_value()); + password = apply_result->password.value(); + + // Set hostname to applyResult["hostname"]. + ADA_ASSERT_TRUE(apply_result->hostname.has_value()); + hostname = apply_result->hostname.value(); + + // Set port to applyResult["port"]. + ADA_ASSERT_TRUE(apply_result->port.has_value()); + port = apply_result->port.value(); + + // Set pathname to applyResult["pathname"]. + ADA_ASSERT_TRUE(apply_result->pathname.has_value()); + pathname = apply_result->pathname.value(); + + // Set search to applyResult["search"]. + ADA_ASSERT_TRUE(apply_result->search.has_value()); + if (apply_result->search->starts_with("?")) { + search = apply_result->search->substr(1); + } else { + search = apply_result->search.value(); + } + + // Set hash to applyResult["hash"]. + ADA_ASSERT_TRUE(apply_result->hash.has_value()); + ADA_ASSERT_TRUE(!apply_result->hash->starts_with("#")); + hash = apply_result->hash.value(); + } else { + ADA_ASSERT_TRUE(std::holds_alternative(input)); + + // Let baseURL be null. + result base_url; + + // If baseURLString was given, then: + if (base_url_string) { + // Let baseURL be the result of parsing baseURLString. + base_url = ada::parse(*base_url_string, nullptr); + + // If baseURL is failure, return null. + if (!base_url) { + ada_log("match returned std::nullopt because failed to parse base_url=", + *base_url_string); + return std::nullopt; + } + + // Append baseURLString to inputs. + inputs.emplace_back(*base_url_string); + } + + url_aggregator* base_url_value = + base_url.has_value() ? &base_url.value() : nullptr; + + // Set url to the result of parsing input given baseURL. + auto url = ada::parse(std::get(input), + base_url_value); + + // If url is failure, return null. + if (!url) { + ada_log("match returned std::nullopt because url failed"); + return std::nullopt; + } + + // Set protocol to url’s scheme. + // IMPORTANT: Not documented on the URLPattern spec, but protocol suffix ':' + // is removed. Similar work was done on workerd: + // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2038 + protocol = url->get_protocol().substr(0, url->get_protocol().size() - 1); + // Set username to url’s username. + username = url->get_username(); + // Set password to url’s password. + password = url->get_password(); + // Set hostname to url’s host, serialized, or the empty string if the value + // is null. + hostname = url->get_hostname(); + // Set port to url’s port, serialized, or the empty string if the value is + // null. + port = url->get_port(); + // Set pathname to the result of URL path serializing url. + pathname = url->get_pathname(); + // Set search to url’s query or the empty string if the value is null. + // IMPORTANT: Not documented on the URLPattern spec, but search prefix '?' + // is removed. Similar work was done on workerd: + // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2232 + if (url->has_search()) { + ADA_ASSERT_TRUE(url->get_search().starts_with("?")); + search = url->get_search().substr(1); + } else { + search = ""; + } + // Set hash to url’s fragment or the empty string if the value is null. + // IMPORTANT: Not documented on the URLPattern spec, but hash prefix '#' is + // removed. Similar work was done on workerd: + // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2242 + if (url->has_hash()) { + ADA_ASSERT_TRUE(url->get_hash().starts_with("#")); + hash = url->get_hash().substr(1); + } else { + hash = ""; + } + } + + // Let protocolExecResult be RegExpBuiltinExec(urlPattern’s protocol + // component's regular expression, protocol). + auto protocol_exec_result = + regex_provider::regex_search(protocol, protocol_component.regexp); + + // Let usernameExecResult be RegExpBuiltinExec(urlPattern’s username + // component's regular expression, username). + auto username_exec_result = + regex_provider::regex_search(username, username_component.regexp); + + // Let passwordExecResult be RegExpBuiltinExec(urlPattern’s password + // component's regular expression, password). + auto password_exec_result = + regex_provider::regex_search(password, password_component.regexp); + + // Let hostnameExecResult be RegExpBuiltinExec(urlPattern’s hostname + // component's regular expression, hostname). + auto hostname_exec_result = + regex_provider::regex_search(hostname, hostname_component.regexp); + + // Let portExecResult be RegExpBuiltinExec(urlPattern’s port component's + // regular expression, port). + auto port_exec_result = + regex_provider::regex_search(port, port_component.regexp); + + // Let pathnameExecResult be RegExpBuiltinExec(urlPattern’s pathname + // component's regular expression, pathname). + auto pathname_exec_result = + regex_provider::regex_search(pathname, pathname_component.regexp); + + // Let searchExecResult be RegExpBuiltinExec(urlPattern’s search component's + // regular expression, search). + auto search_exec_result = + regex_provider::regex_search(search, search_component.regexp); + + // Let hashExecResult be RegExpBuiltinExec(urlPattern’s hash component's + // regular expression, hash). + auto hash_exec_result = + regex_provider::regex_search(hash, hash_component.regexp); + + // If protocolExecResult, usernameExecResult, passwordExecResult, + // hostnameExecResult, portExecResult, pathnameExecResult, searchExecResult, + // or hashExecResult are null then return null. + if (!protocol_exec_result || !username_exec_result || !password_exec_result || + !hostname_exec_result || !port_exec_result || !pathname_exec_result || + !search_exec_result || !hash_exec_result) { + return std::nullopt; + } + + // Let result be a new URLPatternResult. + auto result = url_pattern_result{}; + // Set result["inputs"] to inputs. + result.inputs = std::move(inputs); + // Set result["protocol"] to the result of creating a component match result + // given urlPattern’s protocol component, protocol, and protocolExecResult. + result.protocol = protocol_component.create_component_match_result( + protocol, std::move(*protocol_exec_result)); + + // Set result["username"] to the result of creating a component match result + // given urlPattern’s username component, username, and usernameExecResult. + result.username = username_component.create_component_match_result( + username, std::move(*username_exec_result)); + + // Set result["password"] to the result of creating a component match result + // given urlPattern’s password component, password, and passwordExecResult. + result.password = password_component.create_component_match_result( + password, std::move(*password_exec_result)); + + // Set result["hostname"] to the result of creating a component match result + // given urlPattern’s hostname component, hostname, and hostnameExecResult. + result.hostname = hostname_component.create_component_match_result( + hostname, std::move(*hostname_exec_result)); + + // Set result["port"] to the result of creating a component match result given + // urlPattern’s port component, port, and portExecResult. + result.port = port_component.create_component_match_result( + port, std::move(*port_exec_result)); + + // Set result["pathname"] to the result of creating a component match result + // given urlPattern’s pathname component, pathname, and pathnameExecResult. + result.pathname = pathname_component.create_component_match_result( + pathname, std::move(*pathname_exec_result)); + + // Set result["search"] to the result of creating a component match result + // given urlPattern’s search component, search, and searchExecResult. + result.search = search_component.create_component_match_result( + search, std::move(*search_exec_result)); + + // Set result["hash"] to the result of creating a component match result given + // urlPattern’s hash component, hash, and hashExecResult. + result.hash = hash_component.create_component_match_result( + hash, std::move(*hash_exec_result)); + + return result; +} + } // namespace ada #endif diff --git a/include/ada/url_pattern.h b/include/ada/url_pattern.h index bfa9e4fdb..2488a96ea 100644 --- a/include/ada/url_pattern.h +++ b/include/ada/url_pattern.h @@ -7,7 +7,6 @@ #include "ada/implementation.h" #include "ada/expected.h" -#include "ada/url_pattern_regex.h" #include #include diff --git a/include/ada/url_pattern_helpers-inl.h b/include/ada/url_pattern_helpers-inl.h index 3e0b3a07b..c2faa2726 100644 --- a/include/ada/url_pattern_helpers-inl.h +++ b/include/ada/url_pattern_helpers-inl.h @@ -5,9 +5,11 @@ #ifndef ADA_URL_PATTERN_HELPERS_INL_H #define ADA_URL_PATTERN_HELPERS_INL_H +#include +#include + #include "ada/common_defs.h" #include "ada/expected.h" -#include "ada/url_pattern.h" #include "ada/url_pattern_helpers.h" #include "ada/implementation.h" @@ -780,6 +782,297 @@ tl::expected, errors> parse_pattern_string( return parser.parts; } +template +bool protocol_component_matches_special_scheme( + url_pattern_component& component) { + auto regex = component.regexp; + return regex_provider::regex_match("http", regex) || + regex_provider::regex_match("https", regex) || + regex_provider::regex_match("ws", regex) || + regex_provider::regex_match("wss", regex) || + regex_provider::regex_match("ftp", regex); +} + +template +inline std::optional constructor_string_parser:: + compute_protocol_matches_special_scheme_flag(regex_provider provider) { + ada_log( + "constructor_string_parser::compute_protocol_matches_special_scheme_" + "flag"); + // Let protocol string be the result of running make a component string given + // parser. + auto protocol_string = make_component_string(); + // Let protocol component be the result of compiling a component given + // protocol string, canonicalize a protocol, and default options. + auto protocol_component = url_pattern_component::compile( + protocol_string, canonicalize_protocol, + url_pattern_compile_component_options::DEFAULT, provider); + if (!protocol_component) { + ada_log("url_pattern_component::compile failed for protocol_string ", + protocol_string); + return protocol_component.error(); + } + // If the result of running protocol component matches a special scheme given + // protocol component is true, then set parser’s protocol matches a special + // scheme flag to true. + if (protocol_component_matches_special_scheme(*protocol_component)) { + protocol_matches_a_special_scheme_flag = true; + } + return std::nullopt; +} + +template +tl::expected +constructor_string_parser::parse(std::string_view input, + regex_provider provider) { + ada_log("constructor_string_parser::parse input=", input); + // Let parser be a new constructor string parser whose input is input and + // token list is the result of running tokenize given input and "lenient". + auto token_list = tokenize(input, token_policy::LENIENT); + if (!token_list) { + return tl::unexpected(token_list.error()); + } + auto parser = constructor_string_parser(input, std::move(*token_list)); + + // While parser’s token index is less than parser’s token list size: + while (parser.token_index < parser.token_list.size()) { + // Set parser’s token increment to 1. + parser.token_increment = 1; + + // If parser’s token list[parser’s token index]'s type is "end" then: + if (parser.token_list[parser.token_index].type == token_type::END) { + // If parser’s state is "init": + if (parser.state == State::INIT) { + // Run rewind given parser. + parser.rewind(); + // If the result of running is a hash prefix given parser is true, then + // run change state given parser, "hash" and 1. + if (parser.is_hash_prefix()) { + parser.change_state(State::HASH, 1); + } else if (parser.is_search_prefix()) { + // Otherwise if the result of running is a search prefix given parser + // is true: Run change state given parser, "search" and 1. + parser.change_state(State::SEARCH, 1); + } else { + // Run change state given parser, "pathname" and 0. + parser.change_state(State::PATHNAME, 0); + } + // Increment parser’s token index by parser’s token increment. + parser.token_index += parser.token_increment; + // Continue. + continue; + } + + if (parser.state == State::AUTHORITY) { + // If parser’s state is "authority": + // Run rewind and set state given parser, and "hostname". + parser.rewind(); + parser.change_state(State::HOSTNAME, 0); + // Increment parser’s token index by parser’s token increment. + parser.token_index += parser.token_increment; + // Continue. + continue; + } + + // Run change state given parser, "done" and 0. + parser.change_state(State::DONE, 0); + // Break. + break; + } + + // If the result of running is a group open given parser is true: + if (parser.is_group_open()) { + // Increment parser’s group depth by 1. + parser.group_depth += 1; + // Increment parser’s token index by parser’s token increment. + parser.token_index += parser.token_increment; + } + + // If parser’s group depth is greater than 0: + if (parser.group_depth > 0) { + // If the result of running is a group close given parser is true, then + // decrement parser’s group depth by 1. + if (parser.is_group_close()) { + parser.group_depth -= 1; + } else { + // Increment parser’s token index by parser’s token increment. + parser.token_index += parser.token_increment; + continue; + } + } + + // Switch on parser’s state and run the associated steps: + switch (parser.state) { + case State::INIT: { + // If the result of running is a protocol suffix given parser is true: + if (parser.is_protocol_suffix()) { + // Run rewind and set state given parser and "protocol". + parser.rewind(); + parser.change_state(State::PROTOCOL, 0); + } + break; + } + case State::PROTOCOL: { + // If the result of running is a protocol suffix given parser is true: + if (parser.is_protocol_suffix()) { + // Run compute protocol matches a special scheme flag given parser. + if (const auto error = + parser.compute_protocol_matches_special_scheme_flag( + provider)) { + ada_log("compute_protocol_matches_special_scheme_flag failed"); + return tl::unexpected(*error); + } + // Let next state be "pathname". + auto next_state = State::PATHNAME; + // Let skip be 1. + auto skip = 1; + // If the result of running next is authority slashes given parser is + // true: + if (parser.next_is_authority_slashes()) { + // Set next state to "authority". + next_state = State::AUTHORITY; + // Set skip to 3. + skip = 3; + } else if (parser.protocol_matches_a_special_scheme_flag) { + // Otherwise if parser’s protocol matches a special scheme flag is + // true, then set next state to "authority". + next_state = State::AUTHORITY; + } + + // Run change state given parser, next state, and skip. + parser.change_state(next_state, skip); + } + break; + } + case State::AUTHORITY: { + // If the result of running is an identity terminator given parser is + // true, then run rewind and set state given parser and "username". + if (parser.is_an_identity_terminator()) { + parser.rewind(); + parser.change_state(State::USERNAME, 0); + } else if (parser.is_pathname_start() || parser.is_search_prefix() || + parser.is_hash_prefix()) { + // Otherwise if any of the following are true: + // - the result of running is a pathname start given parser; + // - the result of running is a search prefix given parser; or + // - the result of running is a hash prefix given parser, + // then run rewind and set state given parser and "hostname". + parser.rewind(); + parser.change_state(State::HOSTNAME, 0); + } + break; + } + case State::USERNAME: { + // If the result of running is a password prefix given parser is true, + // then run change state given parser, "password", and 1. + if (parser.is_password_prefix()) { + parser.change_state(State::PASSWORD, 1); + } else if (parser.is_an_identity_terminator()) { + // Otherwise if the result of running is an identity terminator given + // parser is true, then run change state given parser, "hostname", + // and 1. + parser.change_state(State::HOSTNAME, 1); + } + break; + } + case State::PASSWORD: { + // If the result of running is an identity terminator given parser is + // true, then run change state given parser, "hostname", and 1. + if (parser.is_an_identity_terminator()) { + parser.change_state(State::HOSTNAME, 1); + } + break; + } + case State::HOSTNAME: { + // If the result of running is an IPv6 open given parser is true, then + // increment parser’s hostname IPv6 bracket depth by 1. + if (parser.is_an_ipv6_open()) { + parser.hostname_ipv6_bracket_depth += 1; + } else if (parser.is_an_ipv6_close()) { + // Otherwise if the result of running is an IPv6 close given parser is + // true, then decrement parser’s hostname IPv6 bracket depth by 1. + parser.hostname_ipv6_bracket_depth -= 1; + } else if (parser.is_port_prefix() && + parser.hostname_ipv6_bracket_depth == 0) { + // Otherwise if the result of running is a port prefix given parser is + // true and parser’s hostname IPv6 bracket depth is zero, then run + // change state given parser, "port", and 1. + parser.change_state(State::PORT, 1); + } else if (parser.is_pathname_start()) { + // Otherwise if the result of running is a pathname start given parser + // is true, then run change state given parser, "pathname", and 0. + parser.change_state(State::PATHNAME, 0); + } else if (parser.is_search_prefix()) { + // Otherwise if the result of running is a search prefix given parser + // is true, then run change state given parser, "search", and 1. + parser.change_state(State::SEARCH, 1); + } else if (parser.is_hash_prefix()) { + // Otherwise if the result of running is a hash prefix given parser is + // true, then run change state given parser, "hash", and 1. + parser.change_state(State::HASH, 1); + } + + break; + } + case State::PORT: { + // If the result of running is a pathname start given parser is true, + // then run change state given parser, "pathname", and 0. + if (parser.is_pathname_start()) { + parser.change_state(State::PATHNAME, 0); + } else if (parser.is_search_prefix()) { + // Otherwise if the result of running is a search prefix given parser + // is true, then run change state given parser, "search", and 1. + parser.change_state(State::SEARCH, 1); + } else if (parser.is_hash_prefix()) { + // Otherwise if the result of running is a hash prefix given parser is + // true, then run change state given parser, "hash", and 1. + parser.change_state(State::HASH, 1); + } + break; + } + case State::PATHNAME: { + // If the result of running is a search prefix given parser is true, + // then run change state given parser, "search", and 1. + if (parser.is_search_prefix()) { + parser.change_state(State::SEARCH, 1); + } else if (parser.is_hash_prefix()) { + // Otherwise if the result of running is a hash prefix given parser is + // true, then run change state given parser, "hash", and 1. + parser.change_state(State::HASH, 1); + } + break; + } + case State::SEARCH: { + // If the result of running is a hash prefix given parser is true, then + // run change state given parser, "hash", and 1. + if (parser.is_hash_prefix()) { + parser.change_state(State::HASH, 1); + } + } + case State::HASH: { + // Do nothing + break; + } + default: { + // Assert: This step is never reached. + unreachable(); + } + } + + // Increment parser’s token index by parser’s token increment. + parser.token_index += parser.token_increment; + } + + // If parser’s result contains "hostname" and not "port", then set parser’s + // result["port"] to the empty string. + if (parser.result.hostname && !parser.result.port) { + parser.result.port = ""; + } + + // Return parser’s result. + return parser.result; +} + } // namespace ada::url_pattern_helpers #endif diff --git a/include/ada/url_pattern_helpers.h b/include/ada/url_pattern_helpers.h index d62c0e612..93859522c 100644 --- a/include/ada/url_pattern_helpers.h +++ b/include/ada/url_pattern_helpers.h @@ -6,12 +6,17 @@ #define ADA_URL_PATTERN_HELPERS_H #include "ada/expected.h" +#include "ada/common_defs.h" #include "ada/url_pattern.h" #include #include #include +namespace ada { +enum class errors : uint8_t; +} + namespace ada::url_pattern_helpers { // @see https://urlpattern.spec.whatwg.org/#token diff --git a/src/implementation.cpp b/src/implementation.cpp index a4553600b..1fd6dc534 100644 --- a/src/implementation.cpp +++ b/src/implementation.cpp @@ -1,10 +1,11 @@ +#include "ada/implementation-inl.h" + #include #include "ada/common_defs.h" #include "ada/parser.h" #include "ada/url.h" #include "ada/url_aggregator.h" -#include "ada/url_pattern.h" namespace ada { @@ -79,37 +80,4 @@ ada_warn_unused std::string to_string(ada::encoding_type type) { } } -template -ada_warn_unused tl::expected, errors> -parse_url_pattern(std::variant input, - const std::string_view* base_url, - const url_pattern_options* options, - std::optional provider) { - return parser::parse_url_pattern_impl( - std::move(input), base_url, options, - provider.value_or(url_pattern_regex::std_regex_provider())); -} - -template ada_warn_unused - tl::expected, errors> - parse_url_pattern( - std::variant input, - const std::string_view* base_url, const url_pattern_options* options, - std::optional provider); - -template result> -url_pattern::exec( - const url_pattern_input& input, std::string_view* base_url); - -template result url_pattern::test( - const url_pattern_input& input, std::string_view* base_url); -namespace parser { -template tl::expected, - errors> -parse_url_pattern_impl(std::variant input, - const std::string_view* base_url, - const url_pattern_options* options, - url_pattern_regex::std_regex_provider&& provider); -} // namespace parser - } // namespace ada diff --git a/src/parser.cpp b/src/parser.cpp index b3816d61c..f7354169e 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -1,4 +1,4 @@ -#include "ada/parser.h" +#include "ada/parser-inl.h" #include @@ -898,263 +898,6 @@ result_type parse_url_impl(std::string_view user_input, return url; } -template -tl::expected, errors> parse_url_pattern_impl( - std::variant input, - const std::string_view* base_url, const url_pattern_options* options, - regex_provider&& provider) { - // Let init be null. - url_pattern_init init; - - // If input is a scalar value string then: - if (std::holds_alternative(input)) { - // Set init to the result of running parse a constructor string given input. - auto parse_result = - url_pattern_helpers::constructor_string_parser::parse( - std::get(input), provider); - if (!parse_result) { - ada_log("constructor_string_parser::parse failed"); - return tl::unexpected(parse_result.error()); - } - init = std::move(*parse_result); - // If baseURL is null and init["protocol"] does not exist, then throw a - // TypeError. - if (!base_url && !init.protocol) { - ada_log("base url is null and protocol is not set"); - return tl::unexpected(errors::type_error); - } - - // If baseURL is not null, set init["baseURL"] to baseURL. - if (base_url) { - init.base_url = std::string(*base_url); - } - } else { - // Assert: input is a URLPatternInit. - ADA_ASSERT_TRUE(std::holds_alternative(input)); - // If baseURL is not null, then throw a TypeError. - if (base_url) { - ada_log("base url is not null"); - return tl::unexpected(errors::type_error); - } - // Optimization: Avoid copy by moving the input value. - // Set init to input. - init = std::move(std::get(input)); - } - - // Let processedInit be the result of process a URLPatternInit given init, - // "pattern", null, null, null, null, null, null, null, and null. - // TODO: Make "pattern" an enum to avoid creating a string everytime. - auto processed_init = url_pattern_init::process(init, "pattern"); - if (!processed_init) { - ada_log("url_pattern_init::process failed for init and 'pattern'"); - return tl::unexpected(processed_init.error()); - } - - // For each componentName of « "protocol", "username", "password", "hostname", - // "port", "pathname", "search", "hash" If processedInit[componentName] does - // not exist, then set processedInit[componentName] to "*". - ADA_ASSERT_TRUE(processed_init.has_value()); - if (!processed_init->protocol) processed_init->protocol = "*"; - if (!processed_init->username) processed_init->username = "*"; - if (!processed_init->password) processed_init->password = "*"; - if (!processed_init->hostname) processed_init->hostname = "*"; - if (!processed_init->port) processed_init->port = "*"; - if (!processed_init->pathname) processed_init->pathname = "*"; - if (!processed_init->search) processed_init->search = "*"; - if (!processed_init->hash) processed_init->hash = "*"; - - ada_log("-- processed_init->protocol: ", processed_init->protocol.value()); - ada_log("-- processed_init->username: ", processed_init->username.value()); - ada_log("-- processed_init->password: ", processed_init->password.value()); - ada_log("-- processed_init->hostname: ", processed_init->hostname.value()); - ada_log("-- processed_init->port: ", processed_init->port.value()); - ada_log("-- processed_init->pathname: ", processed_init->pathname.value()); - ada_log("-- processed_init->search: ", processed_init->search.value()); - ada_log("-- processed_init->hash: ", processed_init->hash.value()); - - // If processedInit["protocol"] is a special scheme and processedInit["port"] - // is a string which represents its corresponding default port in radix-10 - // using ASCII digits then set processedInit["port"] to the empty string. - // TODO: Optimization opportunity. - if (scheme::is_special(*processed_init->protocol)) { - std::string_view port = processed_init->port.value(); - helpers::trim_c0_whitespace(port); - if (std::to_string(scheme::get_special_port(*processed_init->protocol)) == - port) { - processed_init->port->clear(); - } - } - - // Let urlPattern be a new URL pattern. - url_pattern url_pattern_(std::move(provider)); - - // Set urlPattern’s protocol component to the result of compiling a component - // given processedInit["protocol"], canonicalize a protocol, and default - // options. - auto protocol_component = url_pattern_component::compile( - processed_init->protocol.value(), - url_pattern_helpers::canonicalize_protocol, - url_pattern_compile_component_options::DEFAULT, provider); - if (!protocol_component) { - ada_log("url_pattern_component::compile failed for protocol ", - processed_init->protocol.value()); - return tl::unexpected(protocol_component.error()); - } - url_pattern_.protocol_component = std::move(*protocol_component); - - // Set urlPattern’s username component to the result of compiling a component - // given processedInit["username"], canonicalize a username, and default - // options. - auto username_component = url_pattern_component::compile( - processed_init->username.value(), - url_pattern_helpers::canonicalize_username, - url_pattern_compile_component_options::DEFAULT, provider); - if (!username_component) { - ada_log("url_pattern_component::compile failed for username ", - processed_init->username.value()); - return tl::unexpected(username_component.error()); - } - url_pattern_.username_component = std::move(*username_component); - - // Set urlPattern’s password component to the result of compiling a component - // given processedInit["password"], canonicalize a password, and default - // options. - auto password_component = url_pattern_component::compile( - processed_init->password.value(), - url_pattern_helpers::canonicalize_password, - url_pattern_compile_component_options::DEFAULT, provider); - if (!password_component) { - ada_log("url_pattern_component::compile failed for password ", - processed_init->password.value()); - return tl::unexpected(password_component.error()); - } - url_pattern_.password_component = std::move(*password_component); - - // TODO: Optimization opportunity. The following if statement can be - // simplified. - // If the result running hostname pattern is an IPv6 address given - // processedInit["hostname"] is true, then set urlPattern’s hostname component - // to the result of compiling a component given processedInit["hostname"], - // canonicalize an IPv6 hostname, and hostname options. - if (url_pattern_helpers::is_ipv6_address(processed_init->hostname.value())) { - ada_log("processed_init->hostname is ipv6 address"); - // then set urlPattern’s hostname component to the result of compiling a - // component given processedInit["hostname"], canonicalize an IPv6 hostname, - // and hostname options. - auto hostname_component = url_pattern_component::compile( - processed_init->hostname.value(), - url_pattern_helpers::canonicalize_ipv6_hostname, - url_pattern_compile_component_options::DEFAULT, provider); - if (!hostname_component) { - ada_log("url_pattern_component::compile failed for ipv6 hostname ", - processed_init->hostname.value()); - return tl::unexpected(hostname_component.error()); - } - url_pattern_.hostname_component = std::move(*hostname_component); - } else { - // Otherwise, set urlPattern’s hostname component to the result of compiling - // a component given processedInit["hostname"], canonicalize a hostname, and - // hostname options. - auto hostname_component = url_pattern_component::compile( - processed_init->hostname.value(), - url_pattern_helpers::canonicalize_hostname, - url_pattern_compile_component_options::HOSTNAME, provider); - if (!hostname_component) { - ada_log("url_pattern_component::compile failed for hostname ", - processed_init->hostname.value()); - return tl::unexpected(hostname_component.error()); - } - url_pattern_.hostname_component = std::move(*hostname_component); - } - - // Set urlPattern’s port component to the result of compiling a component - // given processedInit["port"], canonicalize a port, and default options. - auto port_component = url_pattern_component::compile( - processed_init->port.value(), url_pattern_helpers::canonicalize_port, - url_pattern_compile_component_options::DEFAULT, provider); - if (!port_component) { - ada_log("url_pattern_component::compile failed for port ", - processed_init->port.value()); - return tl::unexpected(port_component.error()); - } - url_pattern_.port_component = std::move(*port_component); - - // Let compileOptions be a copy of the default options with the ignore case - // property set to options["ignoreCase"]. - auto compile_options = url_pattern_compile_component_options::DEFAULT; - if (options) { - compile_options.ignore_case = options->ignore_case; - } - - // TODO: Optimization opportunity: Simplify this if statement. - // If the result of running protocol component matches a special scheme given - // urlPattern’s protocol component is true, then: - if (url_pattern_helpers::protocol_component_matches_special_scheme< - regex_provider>(url_pattern_.protocol_component)) { - // Let pathCompileOptions be copy of the pathname options with the ignore - // case property set to options["ignoreCase"]. - auto path_compile_options = url_pattern_compile_component_options::PATHNAME; - if (options) { - path_compile_options.ignore_case = options->ignore_case; - } - - // Set urlPattern’s pathname component to the result of compiling a - // component given processedInit["pathname"], canonicalize a pathname, and - // pathCompileOptions. - auto pathname_component = url_pattern_component::compile( - processed_init->pathname.value(), - url_pattern_helpers::canonicalize_pathname, path_compile_options, - provider); - if (!pathname_component) { - ada_log("url_pattern_component::compile failed for pathname ", - processed_init->pathname.value()); - return tl::unexpected(pathname_component.error()); - } - url_pattern_.pathname_component = std::move(*pathname_component); - } else { - // Otherwise set urlPattern’s pathname component to the result of compiling - // a component given processedInit["pathname"], canonicalize an opaque - // pathname, and compileOptions. - auto pathname_component = url_pattern_component::compile( - processed_init->pathname.value(), - url_pattern_helpers::canonicalize_opaque_pathname, compile_options, - provider); - if (!pathname_component) { - ada_log("url_pattern_component::compile failed for opaque pathname ", - processed_init->pathname.value()); - return tl::unexpected(pathname_component.error()); - } - url_pattern_.pathname_component = std::move(*pathname_component); - } - - // Set urlPattern’s search component to the result of compiling a component - // given processedInit["search"], canonicalize a search, and compileOptions. - auto search_component = url_pattern_component::compile( - processed_init->search.value(), url_pattern_helpers::canonicalize_search, - compile_options, provider); - if (!search_component) { - ada_log("url_pattern_component::compile failed for search ", - processed_init->search.value()); - return tl::unexpected(search_component.error()); - } - url_pattern_.search_component = std::move(*search_component); - - // Set urlPattern’s hash component to the result of compiling a component - // given processedInit["hash"], canonicalize a hash, and compileOptions. - auto hash_component = url_pattern_component::compile( - processed_init->hash.value(), url_pattern_helpers::canonicalize_hash, - compile_options, provider); - if (!hash_component) { - ada_log("url_pattern_component::compile failed for hash ", - processed_init->hash.value()); - return tl::unexpected(hash_component.error()); - } - url_pattern_.hash_component = std::move(*hash_component); - - // Return urlPattern. - return url_pattern_; -} - template url parse_url_impl(std::string_view user_input, const url* base_url = nullptr); template url_aggregator parse_url_impl( diff --git a/src/url_pattern.cpp b/src/url_pattern.cpp index 7096b5019..3877d35f6 100644 --- a/src/url_pattern.cpp +++ b/src/url_pattern.cpp @@ -1,5 +1,3 @@ -#include "ada/url_pattern-inl.h" - #include #include #include @@ -449,328 +447,4 @@ std::string url_pattern_init::to_string() const { return answer; } -template -template -tl::expected, errors> -url_pattern_component::compile( - std::string_view input, F& encoding_callback, - url_pattern_compile_component_options& options, - const regex_provider& provider) { - ada_log("url_pattern_component::compile input: ", input); - // Let part list be the result of running parse a pattern string given input, - // options, and encoding callback. - auto part_list = url_pattern_helpers::parse_pattern_string(input, options, - encoding_callback); - - if (!part_list) { - ada_log("parse_pattern_string failed"); - return tl::unexpected(part_list.error()); - } - - // Let (regular expression string, name list) be the result of running - // generate a regular expression and name list given part list and options. - auto [regular_expression_string, name_list] = - url_pattern_helpers::generate_regular_expression_and_name_list(*part_list, - options); - - ada_log("regular expression string: ", regular_expression_string); - - // Let pattern string be the result of running generate a pattern - // string given part list and options. - auto pattern_string = - url_pattern_helpers::generate_pattern_string(*part_list, options); - - // Let regular expression be RegExpCreate(regular expression string, - // flags). If this throws an exception, catch it, and throw a - // TypeError. - std::optional regular_expression = - provider.create_instance(regular_expression_string, options.ignore_case); - - if (!regular_expression) { - return tl::unexpected(errors::type_error); - } - - // For each part of part list: - // - If part’s type is "regexp", then set has regexp groups to true. - const auto has_regexp = [](const auto& part) { return part.is_regexp(); }; - const bool has_regexp_groups = std::ranges::any_of(*part_list, has_regexp); - - ada_log("has regexp groups: ", has_regexp_groups); - - // Return a new component whose pattern string is pattern string, regular - // expression is regular expression, group name list is name list, and has - // regexp groups is has regexp groups. - return url_pattern_component( - std::move(pattern_string), std::move(*regular_expression), - std::move(name_list), has_regexp_groups); -} - -template -result> url_pattern::exec( - const url_pattern_input& input, std::string_view* base_url) { - // Return the result of match given this's associated URL pattern, input, and - // baseURL if given. - return match(input, base_url); -} - -template -result url_pattern::test(const url_pattern_input& input, - std::string_view* base_url) { - // TODO: Optimization opportunity. Rather than returning `url_pattern_result` - // Implement a fast path just like `can_parse()` in ada_url. - // Let result be the result of match given this's associated URL pattern, - // input, and baseURL if given. - // If result is null, return false. - if (auto result = match(input, base_url); result.has_value()) { - return result->has_value(); - } - return tl::unexpected(errors::type_error); -} - -template -result> url_pattern::match( - const url_pattern_input& input, std::string_view* base_url_string) { - std::string protocol{}; - std::string username{}; - std::string password{}; - std::string hostname{}; - std::string port{}; - std::string pathname{}; - std::string search{}; - std::string hash{}; - - // Let inputs be an empty list. - // Append input to inputs. - std::vector inputs{input}; - - // If input is a URLPatternInit then: - if (std::holds_alternative(input)) { - ada_log( - "url_pattern::match called with url_pattern_init and base_url_string=", - base_url_string); - // If baseURLString was given, throw a TypeError. - if (base_url_string) { - ada_log("failed to match because base_url_string was given"); - return tl::unexpected(errors::type_error); - } - - // Let applyResult be the result of process a URLPatternInit given input, - // "url", protocol, username, password, hostname, port, pathname, search, - // and hash. - auto apply_result = url_pattern_init::process( - std::get(input), "url", protocol, username, password, - hostname, port, pathname, search, hash); - - // If this throws an exception, catch it, and return null. - if (!apply_result.has_value()) { - ada_log("match returned std::nullopt because process threw"); - return std::nullopt; - } - - // Set protocol to applyResult["protocol"]. - ADA_ASSERT_TRUE(apply_result->protocol.has_value()); - protocol = apply_result->protocol.value(); - - // Set username to applyResult["username"]. - ADA_ASSERT_TRUE(apply_result->username.has_value()); - username = apply_result->username.value(); - - // Set password to applyResult["password"]. - ADA_ASSERT_TRUE(apply_result->password.has_value()); - password = apply_result->password.value(); - - // Set hostname to applyResult["hostname"]. - ADA_ASSERT_TRUE(apply_result->hostname.has_value()); - hostname = apply_result->hostname.value(); - - // Set port to applyResult["port"]. - ADA_ASSERT_TRUE(apply_result->port.has_value()); - port = apply_result->port.value(); - - // Set pathname to applyResult["pathname"]. - ADA_ASSERT_TRUE(apply_result->pathname.has_value()); - pathname = apply_result->pathname.value(); - - // Set search to applyResult["search"]. - ADA_ASSERT_TRUE(apply_result->search.has_value()); - if (apply_result->search->starts_with("?")) { - search = apply_result->search->substr(1); - } else { - search = apply_result->search.value(); - } - - // Set hash to applyResult["hash"]. - ADA_ASSERT_TRUE(apply_result->hash.has_value()); - ADA_ASSERT_TRUE(!apply_result->hash->starts_with("#")); - hash = apply_result->hash.value(); - } else { - ADA_ASSERT_TRUE(std::holds_alternative(input)); - - // Let baseURL be null. - result base_url; - - // If baseURLString was given, then: - if (base_url_string) { - // Let baseURL be the result of parsing baseURLString. - base_url = ada::parse(*base_url_string, nullptr); - - // If baseURL is failure, return null. - if (!base_url) { - ada_log("match returned std::nullopt because failed to parse base_url=", - *base_url_string); - return std::nullopt; - } - - // Append baseURLString to inputs. - inputs.emplace_back(*base_url_string); - } - - url_aggregator* base_url_value = - base_url.has_value() ? &base_url.value() : nullptr; - - // Set url to the result of parsing input given baseURL. - auto url = ada::parse(std::get(input), - base_url_value); - - // If url is failure, return null. - if (!url) { - ada_log("match returned std::nullopt because url failed"); - return std::nullopt; - } - - // Set protocol to url’s scheme. - // IMPORTANT: Not documented on the URLPattern spec, but protocol suffix ':' - // is removed. Similar work was done on workerd: - // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2038 - protocol = url->get_protocol().substr(0, url->get_protocol().size() - 1); - // Set username to url’s username. - username = url->get_username(); - // Set password to url’s password. - password = url->get_password(); - // Set hostname to url’s host, serialized, or the empty string if the value - // is null. - hostname = url->get_hostname(); - // Set port to url’s port, serialized, or the empty string if the value is - // null. - port = url->get_port(); - // Set pathname to the result of URL path serializing url. - pathname = url->get_pathname(); - // Set search to url’s query or the empty string if the value is null. - // IMPORTANT: Not documented on the URLPattern spec, but search prefix '?' - // is removed. Similar work was done on workerd: - // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2232 - if (url->has_search()) { - ADA_ASSERT_TRUE(url->get_search().starts_with("?")); - search = url->get_search().substr(1); - } else { - search = ""; - } - // Set hash to url’s fragment or the empty string if the value is null. - // IMPORTANT: Not documented on the URLPattern spec, but hash prefix '#' is - // removed. Similar work was done on workerd: - // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2242 - if (url->has_hash()) { - ADA_ASSERT_TRUE(url->get_hash().starts_with("#")); - hash = url->get_hash().substr(1); - } else { - hash = ""; - } - } - - // Let protocolExecResult be RegExpBuiltinExec(urlPattern’s protocol - // component's regular expression, protocol). - auto protocol_exec_result = - regex_provider::regex_search(protocol, protocol_component.regexp); - - // Let usernameExecResult be RegExpBuiltinExec(urlPattern’s username - // component's regular expression, username). - auto username_exec_result = - regex_provider::regex_search(username, username_component.regexp); - - // Let passwordExecResult be RegExpBuiltinExec(urlPattern’s password - // component's regular expression, password). - auto password_exec_result = - regex_provider::regex_search(password, password_component.regexp); - - // Let hostnameExecResult be RegExpBuiltinExec(urlPattern’s hostname - // component's regular expression, hostname). - auto hostname_exec_result = - regex_provider::regex_search(hostname, hostname_component.regexp); - - // Let portExecResult be RegExpBuiltinExec(urlPattern’s port component's - // regular expression, port). - auto port_exec_result = - regex_provider::regex_search(port, port_component.regexp); - - // Let pathnameExecResult be RegExpBuiltinExec(urlPattern’s pathname - // component's regular expression, pathname). - auto pathname_exec_result = - regex_provider::regex_search(pathname, pathname_component.regexp); - - // Let searchExecResult be RegExpBuiltinExec(urlPattern’s search component's - // regular expression, search). - auto search_exec_result = - regex_provider::regex_search(search, search_component.regexp); - - // Let hashExecResult be RegExpBuiltinExec(urlPattern’s hash component's - // regular expression, hash). - auto hash_exec_result = - regex_provider::regex_search(hash, hash_component.regexp); - - // If protocolExecResult, usernameExecResult, passwordExecResult, - // hostnameExecResult, portExecResult, pathnameExecResult, searchExecResult, - // or hashExecResult are null then return null. - if (!protocol_exec_result || !username_exec_result || !password_exec_result || - !hostname_exec_result || !port_exec_result || !pathname_exec_result || - !search_exec_result || !hash_exec_result) { - return std::nullopt; - } - - // Let result be a new URLPatternResult. - auto result = url_pattern_result{}; - // Set result["inputs"] to inputs. - result.inputs = std::move(inputs); - // Set result["protocol"] to the result of creating a component match result - // given urlPattern’s protocol component, protocol, and protocolExecResult. - result.protocol = protocol_component.create_component_match_result( - protocol, std::move(*protocol_exec_result)); - - // Set result["username"] to the result of creating a component match result - // given urlPattern’s username component, username, and usernameExecResult. - result.username = username_component.create_component_match_result( - username, std::move(*username_exec_result)); - - // Set result["password"] to the result of creating a component match result - // given urlPattern’s password component, password, and passwordExecResult. - result.password = password_component.create_component_match_result( - password, std::move(*password_exec_result)); - - // Set result["hostname"] to the result of creating a component match result - // given urlPattern’s hostname component, hostname, and hostnameExecResult. - result.hostname = hostname_component.create_component_match_result( - hostname, std::move(*hostname_exec_result)); - - // Set result["port"] to the result of creating a component match result given - // urlPattern’s port component, port, and portExecResult. - result.port = port_component.create_component_match_result( - port, std::move(*port_exec_result)); - - // Set result["pathname"] to the result of creating a component match result - // given urlPattern’s pathname component, pathname, and pathnameExecResult. - result.pathname = pathname_component.create_component_match_result( - pathname, std::move(*pathname_exec_result)); - - // Set result["search"] to the result of creating a component match result - // given urlPattern’s search component, search, and searchExecResult. - result.search = search_component.create_component_match_result( - search, std::move(*search_exec_result)); - - // Set result["hash"] to the result of creating a component match result given - // urlPattern’s hash component, hash, and hashExecResult. - result.hash = hash_component.create_component_match_result( - hash, std::move(*hash_exec_result)); - - return result; -} - } // namespace ada diff --git a/src/url_pattern_helpers.cpp b/src/url_pattern_helpers.cpp index ea9138791..abfdc27e0 100644 --- a/src/url_pattern_helpers.cpp +++ b/src/url_pattern_helpers.cpp @@ -189,44 +189,6 @@ std::string generate_segment_wildcard_regexp( ada_log("generate_segment_wildcard_regexp result: ", result); return result; } -template -bool protocol_component_matches_special_scheme( - url_pattern_component& component) { - auto regex = component.regexp; - return regex_provider::regex_match("http", regex) || - regex_provider::regex_match("https", regex) || - regex_provider::regex_match("ws", regex) || - regex_provider::regex_match("wss", regex) || - regex_provider::regex_match("ftp", regex); -} - -template -inline std::optional constructor_string_parser:: - compute_protocol_matches_special_scheme_flag(regex_provider provider) { - ada_log( - "constructor_string_parser::compute_protocol_matches_special_scheme_" - "flag"); - // Let protocol string be the result of running make a component string given - // parser. - auto protocol_string = make_component_string(); - // Let protocol component be the result of compiling a component given - // protocol string, canonicalize a protocol, and default options. - auto protocol_component = url_pattern_component::compile( - protocol_string, canonicalize_protocol, - url_pattern_compile_component_options::DEFAULT, provider); - if (!protocol_component) { - ada_log("url_pattern_component::compile failed for protocol_string ", - protocol_string); - return protocol_component.error(); - } - // If the result of running protocol component matches a special scheme given - // protocol component is true, then set parser’s protocol matches a special - // scheme flag to true. - if (protocol_component_matches_special_scheme(*protocol_component)) { - protocol_matches_a_special_scheme_flag = true; - } - return std::nullopt; -} tl::expected canonicalize_protocol( std::string_view input) { @@ -473,258 +435,6 @@ tl::expected canonicalize_hash(std::string_view input) { return tl::unexpected(errors::type_error); } -template -tl::expected -constructor_string_parser::parse(std::string_view input, - regex_provider provider) { - ada_log("constructor_string_parser::parse input=", input); - // Let parser be a new constructor string parser whose input is input and - // token list is the result of running tokenize given input and "lenient". - auto token_list = tokenize(input, token_policy::LENIENT); - if (!token_list) { - return tl::unexpected(token_list.error()); - } - auto parser = constructor_string_parser(input, std::move(*token_list)); - - // While parser’s token index is less than parser’s token list size: - while (parser.token_index < parser.token_list.size()) { - // Set parser’s token increment to 1. - parser.token_increment = 1; - - // If parser’s token list[parser’s token index]'s type is "end" then: - if (parser.token_list[parser.token_index].type == token_type::END) { - // If parser’s state is "init": - if (parser.state == State::INIT) { - // Run rewind given parser. - parser.rewind(); - // If the result of running is a hash prefix given parser is true, then - // run change state given parser, "hash" and 1. - if (parser.is_hash_prefix()) { - parser.change_state(State::HASH, 1); - } else if (parser.is_search_prefix()) { - // Otherwise if the result of running is a search prefix given parser - // is true: Run change state given parser, "search" and 1. - parser.change_state(State::SEARCH, 1); - } else { - // Run change state given parser, "pathname" and 0. - parser.change_state(State::PATHNAME, 0); - } - // Increment parser’s token index by parser’s token increment. - parser.token_index += parser.token_increment; - // Continue. - continue; - } - - if (parser.state == State::AUTHORITY) { - // If parser’s state is "authority": - // Run rewind and set state given parser, and "hostname". - parser.rewind(); - parser.change_state(State::HOSTNAME, 0); - // Increment parser’s token index by parser’s token increment. - parser.token_index += parser.token_increment; - // Continue. - continue; - } - - // Run change state given parser, "done" and 0. - parser.change_state(State::DONE, 0); - // Break. - break; - } - - // If the result of running is a group open given parser is true: - if (parser.is_group_open()) { - // Increment parser’s group depth by 1. - parser.group_depth += 1; - // Increment parser’s token index by parser’s token increment. - parser.token_index += parser.token_increment; - } - - // If parser’s group depth is greater than 0: - if (parser.group_depth > 0) { - // If the result of running is a group close given parser is true, then - // decrement parser’s group depth by 1. - if (parser.is_group_close()) { - parser.group_depth -= 1; - } else { - // Increment parser’s token index by parser’s token increment. - parser.token_index += parser.token_increment; - continue; - } - } - - // Switch on parser’s state and run the associated steps: - switch (parser.state) { - case State::INIT: { - // If the result of running is a protocol suffix given parser is true: - if (parser.is_protocol_suffix()) { - // Run rewind and set state given parser and "protocol". - parser.rewind(); - parser.change_state(State::PROTOCOL, 0); - } - break; - } - case State::PROTOCOL: { - // If the result of running is a protocol suffix given parser is true: - if (parser.is_protocol_suffix()) { - // Run compute protocol matches a special scheme flag given parser. - if (const auto error = - parser.compute_protocol_matches_special_scheme_flag( - provider)) { - ada_log("compute_protocol_matches_special_scheme_flag failed"); - return tl::unexpected(*error); - } - // Let next state be "pathname". - auto next_state = State::PATHNAME; - // Let skip be 1. - auto skip = 1; - // If the result of running next is authority slashes given parser is - // true: - if (parser.next_is_authority_slashes()) { - // Set next state to "authority". - next_state = State::AUTHORITY; - // Set skip to 3. - skip = 3; - } else if (parser.protocol_matches_a_special_scheme_flag) { - // Otherwise if parser’s protocol matches a special scheme flag is - // true, then set next state to "authority". - next_state = State::AUTHORITY; - } - - // Run change state given parser, next state, and skip. - parser.change_state(next_state, skip); - } - break; - } - case State::AUTHORITY: { - // If the result of running is an identity terminator given parser is - // true, then run rewind and set state given parser and "username". - if (parser.is_an_identity_terminator()) { - parser.rewind(); - parser.change_state(State::USERNAME, 0); - } else if (parser.is_pathname_start() || parser.is_search_prefix() || - parser.is_hash_prefix()) { - // Otherwise if any of the following are true: - // - the result of running is a pathname start given parser; - // - the result of running is a search prefix given parser; or - // - the result of running is a hash prefix given parser, - // then run rewind and set state given parser and "hostname". - parser.rewind(); - parser.change_state(State::HOSTNAME, 0); - } - break; - } - case State::USERNAME: { - // If the result of running is a password prefix given parser is true, - // then run change state given parser, "password", and 1. - if (parser.is_password_prefix()) { - parser.change_state(State::PASSWORD, 1); - } else if (parser.is_an_identity_terminator()) { - // Otherwise if the result of running is an identity terminator given - // parser is true, then run change state given parser, "hostname", - // and 1. - parser.change_state(State::HOSTNAME, 1); - } - break; - } - case State::PASSWORD: { - // If the result of running is an identity terminator given parser is - // true, then run change state given parser, "hostname", and 1. - if (parser.is_an_identity_terminator()) { - parser.change_state(State::HOSTNAME, 1); - } - break; - } - case State::HOSTNAME: { - // If the result of running is an IPv6 open given parser is true, then - // increment parser’s hostname IPv6 bracket depth by 1. - if (parser.is_an_ipv6_open()) { - parser.hostname_ipv6_bracket_depth += 1; - } else if (parser.is_an_ipv6_close()) { - // Otherwise if the result of running is an IPv6 close given parser is - // true, then decrement parser’s hostname IPv6 bracket depth by 1. - parser.hostname_ipv6_bracket_depth -= 1; - } else if (parser.is_port_prefix() && - parser.hostname_ipv6_bracket_depth == 0) { - // Otherwise if the result of running is a port prefix given parser is - // true and parser’s hostname IPv6 bracket depth is zero, then run - // change state given parser, "port", and 1. - parser.change_state(State::PORT, 1); - } else if (parser.is_pathname_start()) { - // Otherwise if the result of running is a pathname start given parser - // is true, then run change state given parser, "pathname", and 0. - parser.change_state(State::PATHNAME, 0); - } else if (parser.is_search_prefix()) { - // Otherwise if the result of running is a search prefix given parser - // is true, then run change state given parser, "search", and 1. - parser.change_state(State::SEARCH, 1); - } else if (parser.is_hash_prefix()) { - // Otherwise if the result of running is a hash prefix given parser is - // true, then run change state given parser, "hash", and 1. - parser.change_state(State::HASH, 1); - } - - break; - } - case State::PORT: { - // If the result of running is a pathname start given parser is true, - // then run change state given parser, "pathname", and 0. - if (parser.is_pathname_start()) { - parser.change_state(State::PATHNAME, 0); - } else if (parser.is_search_prefix()) { - // Otherwise if the result of running is a search prefix given parser - // is true, then run change state given parser, "search", and 1. - parser.change_state(State::SEARCH, 1); - } else if (parser.is_hash_prefix()) { - // Otherwise if the result of running is a hash prefix given parser is - // true, then run change state given parser, "hash", and 1. - parser.change_state(State::HASH, 1); - } - break; - } - case State::PATHNAME: { - // If the result of running is a search prefix given parser is true, - // then run change state given parser, "search", and 1. - if (parser.is_search_prefix()) { - parser.change_state(State::SEARCH, 1); - } else if (parser.is_hash_prefix()) { - // Otherwise if the result of running is a hash prefix given parser is - // true, then run change state given parser, "hash", and 1. - parser.change_state(State::HASH, 1); - } - break; - } - case State::SEARCH: { - // If the result of running is a hash prefix given parser is true, then - // run change state given parser, "hash", and 1. - if (parser.is_hash_prefix()) { - parser.change_state(State::HASH, 1); - } - } - case State::HASH: { - // Do nothing - break; - } - default: { - // Assert: This step is never reached. - unreachable(); - } - } - - // Increment parser’s token index by parser’s token increment. - parser.token_index += parser.token_increment; - } - - // If parser’s result contains "hostname" and not "port", then set parser’s - // result["port"] to the empty string. - if (parser.result.hostname && !parser.result.port) { - parser.result.port = ""; - } - - // Return parser’s result. - return parser.result; -} - tl::expected, errors> tokenize(std::string_view input, token_policy policy) { ada_log("tokenize input: ", input);