diff --git a/include/ada/implementation.h b/include/ada/implementation.h index 117b0ff6b..9b22c1eeb 100644 --- a/include/ada/implementation.h +++ b/include/ada/implementation.h @@ -61,11 +61,13 @@ bool can_parse(std::string_view input, * use ada::url_pattern_regex::std_regex_provider * @return url_pattern instance */ -ada_warn_unused tl::expected parse_url_pattern( +template +ada_warn_unused tl::expected, errors> parse_url_pattern( std::variant input, const std::string_view* base_url = nullptr, const url_pattern_options* options = nullptr, - std::optional regex_provider = std::nullopt); + std::optional> regex_provider = + std::nullopt); /** * Computes a href string from a file path. The function assumes diff --git a/include/ada/parser.h b/include/ada/parser.h index 60bf69122..1179e8c83 100644 --- a/include/ada/parser.h +++ b/include/ada/parser.h @@ -17,6 +17,7 @@ namespace ada { struct url_aggregator; struct url; +template class url_pattern; struct url_pattern_options; struct url_pattern_init; @@ -52,10 +53,11 @@ extern template url_aggregator parse_url_impl( extern template url parse_url_impl(std::string_view user_input, const url* base_url); -tl::expected parse_url_pattern_impl( +template +tl::expected, errors> parse_url_pattern_impl( std::variant input, const std::string_view* base_url, const url_pattern_options* options, - url_pattern_regex::provider&& regex_provider); + url_pattern_regex::provider&& regex_provider); } // namespace ada::parser diff --git a/include/ada/url_aggregator.h b/include/ada/url_aggregator.h index 66f7991c3..4c5ac1d1a 100644 --- a/include/ada/url_aggregator.h +++ b/include/ada/url_aggregator.h @@ -222,7 +222,8 @@ struct url_aggregator : url_base { friend url_aggregator parser::parse_url_impl( std::string_view, const url_aggregator *); // url_pattern methods - friend tl::expected parse_url_pattern_impl( + template + friend tl::expected, errors> parse_url_pattern_impl( std::variant input, const std::string_view *base_url, const url_pattern_options *options); diff --git a/include/ada/url_pattern-inl.h b/include/ada/url_pattern-inl.h index 2ad9e0af4..6567dd146 100644 --- a/include/ada/url_pattern-inl.h +++ b/include/ada/url_pattern-inl.h @@ -8,6 +8,7 @@ #include "ada/common_defs.h" #include "ada/url_pattern.h" +#include <_regex.h> #include namespace ada { @@ -24,7 +25,8 @@ inline bool url_pattern_component_result::operator==( return input == other.input && groups == other.groups; } -inline std::string url_pattern_component::to_string() const { +template +std::string url_pattern_component::to_string() const { #ifdef ADA_HAS_FORMAT return std::format(R"({{"pattern": "{}", "has_regexp_groups": {}}})", pattern, has_regexp_groups ? "true" : "false" //, @@ -34,8 +36,9 @@ inline std::string url_pattern_component::to_string() const { #endif } -inline url_pattern_component_result -url_pattern_component::create_component_match_result( +template +url_pattern_component_result +url_pattern_component::create_component_match_result( std::string_view input, const std::smatch& exec_result) { // Let result be a new URLPatternComponentResult. // Set result["input"] to input. @@ -70,7 +73,8 @@ url_pattern_component::create_component_match_result( return result; } -inline std::string url_pattern::to_string() const { +template +std::string url_pattern::to_string() const { #ifdef ADA_HAS_FORMAT return std::format( R"({{"protocol_component": "{}", "username_component": {}, "password_component": {}, "hostname_component": {}, "port_component": {}, "pathname_component": {}, "search_component": {}, "hash_component": {}, "ignore_case": {}}})", @@ -84,42 +88,58 @@ inline std::string url_pattern::to_string() const { #endif } -inline std::string_view url_pattern::get_protocol() const ada_lifetime_bound { +template +std::string_view url_pattern::get_protocol() const + ada_lifetime_bound { // Return this's associated URL pattern's protocol component's pattern string. return protocol_component.pattern; } -inline std::string_view url_pattern::get_username() const ada_lifetime_bound { +template +std::string_view url_pattern::get_username() const + ada_lifetime_bound { // Return this's associated URL pattern's username component's pattern string. return username_component.pattern; } -inline std::string_view url_pattern::get_password() const ada_lifetime_bound { +template +std::string_view url_pattern::get_password() const + ada_lifetime_bound { // Return this's associated URL pattern's password component's pattern string. return password_component.pattern; } -inline std::string_view url_pattern::get_hostname() const ada_lifetime_bound { +template +std::string_view url_pattern::get_hostname() const + ada_lifetime_bound { // Return this's associated URL pattern's hostname component's pattern string. return hostname_component.pattern; } -inline std::string_view url_pattern::get_port() const ada_lifetime_bound { +template +std::string_view url_pattern::get_port() const ada_lifetime_bound { // Return this's associated URL pattern's port component's pattern string. return port_component.pattern; } -inline std::string_view url_pattern::get_pathname() const ada_lifetime_bound { +template +std::string_view url_pattern::get_pathname() const + ada_lifetime_bound { // Return this's associated URL pattern's pathname component's pattern string. return pathname_component.pattern; } -inline std::string_view url_pattern::get_search() const ada_lifetime_bound { +template +std::string_view url_pattern::get_search() const + ada_lifetime_bound { // Return this's associated URL pattern's search component's pattern string. return search_component.pattern; } -inline std::string_view url_pattern::get_hash() const ada_lifetime_bound { +template +std::string_view url_pattern::get_hash() const ada_lifetime_bound { // Return this's associated URL pattern's hash component's pattern string. return hash_component.pattern; } - -inline bool url_pattern::ignore_case() const { return ignore_case_; } - -inline bool url_pattern::has_regexp_groups() const { +template +bool url_pattern::ignore_case() const { + return ignore_case_; +} +template +bool url_pattern::has_regexp_groups() const { // If this's associated URL pattern's has regexp groups, then return true. return protocol_component.has_regexp_groups || username_component.has_regexp_groups || diff --git a/include/ada/url_pattern.h b/include/ada/url_pattern.h index 3a5d0fcb4..843643241 100644 --- a/include/ada/url_pattern.h +++ b/include/ada/url_pattern.h @@ -9,6 +9,7 @@ #include "ada/expected.h" #include "ada/url_pattern_regex.h" +#include <_regex.h> #include #include #include @@ -19,11 +20,11 @@ namespace ada { namespace parser { template + typename url_pattern_options, class regex_type> tl::expected parse_url_pattern_impl( std::variant input, const std::string_view* base_url, const url_pattern_options* options, - url_pattern_regex::provider&& regex_provider); + url_pattern_regex::provider&& regex_provider); } // Important: C++20 allows us to use concept rather than `using` or `typedef @@ -207,19 +208,18 @@ struct url_pattern_component_result { #endif // ADA_TESTING }; +template class url_pattern_component { public: url_pattern_component() = default; // This function explicitly takes a std::string because it is moved. // To avoid unnecessary copy, move each value while calling the constructor. - url_pattern_component(std::string&& new_pattern, std::regex&& new_regexp, - std::regex_constants::syntax_option_type new_flags, + url_pattern_component(std::string&& new_pattern, regex_type&& new_regexp, std::vector&& new_group_name_list, bool new_has_regexp_groups) : regexp(std::move(new_regexp)), pattern(std::move(new_pattern)), - flags(new_flags), group_name_list(new_group_name_list), has_regexp_groups(new_has_regexp_groups) {} @@ -227,7 +227,8 @@ class url_pattern_component { template static tl::expected compile( std::string_view input, F& encoding_callback, - url_pattern_compile_component_options& options); + url_pattern_compile_component_options& options, + const url_pattern_regex::provider& regex_provider); // @see https://urlpattern.spec.whatwg.org/#create-a-component-match-result url_pattern_component_result create_component_match_result( @@ -235,9 +236,8 @@ class url_pattern_component { std::string to_string() const; - std::regex regexp{}; + regex_type regexp{}; std::string pattern{}; - std::regex_constants::syntax_option_type flags = std::regex::ECMAScript; std::vector group_name_list{}; bool has_regexp_groups = false; }; @@ -270,9 +270,10 @@ struct url_pattern_options { // defined in https://wicg.github.io/urlpattern. // More information about the URL Pattern syntax can be found at // https://developer.mozilla.org/en-US/docs/Web/API/URL_Pattern_API +template class url_pattern { public: - explicit url_pattern(url_pattern_regex::provider&& regex_provider) + explicit url_pattern(url_pattern_regex::provider&& regex_provider) : regex_provider_(std::move(regex_provider)) {} /** @@ -319,23 +320,23 @@ class url_pattern { std::string to_string() const; - url_pattern_component protocol_component{}; - url_pattern_component username_component{}; - url_pattern_component password_component{}; - url_pattern_component hostname_component{}; - url_pattern_component port_component{}; - url_pattern_component pathname_component{}; - url_pattern_component search_component{}; - url_pattern_component hash_component{}; + url_pattern_component protocol_component{}; + url_pattern_component username_component{}; + url_pattern_component password_component{}; + url_pattern_component hostname_component{}; + url_pattern_component port_component{}; + url_pattern_component pathname_component{}; + url_pattern_component search_component{}; + url_pattern_component hash_component{}; bool ignore_case_ = false; - url_pattern_regex::provider regex_provider_; + url_pattern_regex::provider regex_provider_; template + typename url_pattern_options, typename regex_provider_type> friend tl::expected parser::parse_url_pattern_impl( std::variant input, const std::string_view* base_url, const url_pattern_options* options, - url_pattern_regex::provider&& regex_provider); + url_pattern_regex::provider&& regex_provider); }; } // namespace ada diff --git a/include/ada/url_pattern_helpers-inl.h b/include/ada/url_pattern_helpers-inl.h index 37311bb2b..dc108d9e7 100644 --- a/include/ada/url_pattern_helpers-inl.h +++ b/include/ada/url_pattern_helpers-inl.h @@ -39,20 +39,23 @@ inline std::string to_string(token_type type) { } } -inline void constructor_string_parser::rewind() { +template +inline void constructor_string_parser::rewind() { // Set parser’s token index to parser’s component start. token_index = component_start; // Set parser’s token increment to 0. token_increment = 0; } -inline bool constructor_string_parser::is_hash_prefix() { +template +inline bool constructor_string_parser::is_hash_prefix() { // Return the result of running is a non-special pattern char given parser, // parser’s token index and "#". return is_non_special_pattern_char(token_index, "#"); } -inline bool constructor_string_parser::is_search_prefix() { +template +inline bool constructor_string_parser::is_search_prefix() { // If result of running is a non-special pattern char given parser, parser’s // token index and "?" is true, then return true. if (is_non_special_pattern_char(token_index, "?")) { @@ -84,7 +87,8 @@ inline bool constructor_string_parser::is_search_prefix() { previous_token->type == token_type::ASTERISK); } -inline bool constructor_string_parser::is_non_special_pattern_char( +template +inline bool constructor_string_parser::is_non_special_pattern_char( size_t index, std::string_view value) { // Let token be the result of running get a safe token given parser and index. auto token = get_safe_token(index); @@ -105,7 +109,9 @@ inline bool constructor_string_parser::is_non_special_pattern_char( token->type == token_type::INVALID_CHAR; } -inline const Token* constructor_string_parser::get_safe_token(size_t index) { +template +inline const Token* constructor_string_parser::get_safe_token( + size_t index) { // If index is less than parser’s token list's size, then return parser’s // token list[index]. if (index < token_list.size()) [[likely]] { @@ -123,19 +129,22 @@ inline const Token* constructor_string_parser::get_safe_token(size_t index) { return &token_list.back(); } -inline bool constructor_string_parser::is_group_open() const { +template +inline bool constructor_string_parser::is_group_open() const { // If parser’s token list[parser’s token index]'s type is "open", then return // true. return token_list[token_index].type == token_type::OPEN; } -inline bool constructor_string_parser::is_group_close() const { +template +inline bool constructor_string_parser::is_group_close() const { // If parser’s token list[parser’s token index]'s type is "close", then return // true. return token_list[token_index].type == token_type::CLOSE; } -inline bool constructor_string_parser::next_is_authority_slashes() { +template +inline bool constructor_string_parser::next_is_authority_slashes() { // If the result of running is a non-special pattern char given parser, // parser’s token index + 1, and "/" is false, then return false. if (!is_non_special_pattern_char(token_index + 1, "/")) { @@ -149,14 +158,16 @@ inline bool constructor_string_parser::next_is_authority_slashes() { return true; } -inline bool constructor_string_parser::is_protocol_suffix() { +template +inline bool constructor_string_parser::is_protocol_suffix() { // Return the result of running is a non-special pattern char given parser, // parser’s token index, and ":". return is_non_special_pattern_char(token_index, ":"); } -inline void constructor_string_parser::change_state(State new_state, - size_t skip) { +template +inline void constructor_string_parser::change_state(State new_state, + size_t skip) { // If parser’s state is not "init", not "authority", and not "done", then set // parser’s result[parser’s state] to the result of running make a component // string given parser. @@ -254,7 +265,9 @@ inline void constructor_string_parser::change_state(State new_state, token_increment = 0; } -inline std::string constructor_string_parser::make_component_string() { +template +inline std::string +constructor_string_parser::make_component_string() { // Assert: parser’s token index is less than parser’s token list's size. ADA_ASSERT_TRUE(token_index < token_list.size()); @@ -273,37 +286,43 @@ inline std::string constructor_string_parser::make_component_string() { end_index - component_start_input_index); } -inline bool constructor_string_parser::is_an_identity_terminator() { +template +inline bool constructor_string_parser::is_an_identity_terminator() { // Return the result of running is a non-special pattern char given parser, // parser’s token index, and "@". return is_non_special_pattern_char(token_index, "@"); } -inline bool constructor_string_parser::is_pathname_start() { +template +inline bool constructor_string_parser::is_pathname_start() { // Return the result of running is a non-special pattern char given parser, // parser’s token index, and "/". return is_non_special_pattern_char(token_index, "/"); } -inline bool constructor_string_parser::is_password_prefix() { +template +inline bool constructor_string_parser::is_password_prefix() { // Return the result of running is a non-special pattern char given parser, // parser’s token index, and ":". return is_non_special_pattern_char(token_index, ":"); } -inline bool constructor_string_parser::is_an_ipv6_open() { +template +inline bool constructor_string_parser::is_an_ipv6_open() { // Return the result of running is a non-special pattern char given parser, // parser’s token index, and "[". return is_non_special_pattern_char(token_index, "["); } -inline bool constructor_string_parser::is_an_ipv6_close() { +template +inline bool constructor_string_parser::is_an_ipv6_close() { // Return the result of running is a non-special pattern char given parser, // parser’s token index, and "]". return is_non_special_pattern_char(token_index, "]"); } -inline bool constructor_string_parser::is_port_prefix() { +template +inline bool constructor_string_parser::is_port_prefix() { // Return the result of running is a non-special pattern char given parser, // parser’s token index, and ":". return is_non_special_pattern_char(token_index, ":"); diff --git a/include/ada/url_pattern_helpers.h b/include/ada/url_pattern_helpers.h index 4d9c29f65..f2945bfc1 100644 --- a/include/ada/url_pattern_helpers.h +++ b/include/ada/url_pattern_helpers.h @@ -139,6 +139,7 @@ class Tokenizer { }; // @see https://urlpattern.spec.whatwg.org/#constructor-string-parser +template struct constructor_string_parser { explicit constructor_string_parser(std::string_view new_input, std::vector&& new_token_list) @@ -321,8 +322,9 @@ bool is_ipv6_address(std::string_view input) noexcept; // @see // https://urlpattern.spec.whatwg.org/#protocol-component-matches-a-special-scheme +template bool protocol_component_matches_special_scheme( - ada::url_pattern_component& input); + ada::url_pattern_component& input); // @see https://urlpattern.spec.whatwg.org/#convert-a-modifier-to-a-string std::string convert_modifier_to_string(url_pattern_part_modifier modifier); diff --git a/include/ada/url_pattern_regex.h b/include/ada/url_pattern_regex.h index 725efd8fc..160db0abe 100644 --- a/include/ada/url_pattern_regex.h +++ b/include/ada/url_pattern_regex.h @@ -9,19 +9,28 @@ namespace ada::url_pattern_regex { +template class provider { - struct type {}; - - std::optional create_regex_instance(std::string_view pattern, - bool ignore_case); - - std::optional> regex_search(std::string_view input, std::string_view pattern); + public: + virtual ~provider() = default; + virtual std::optional create_instance(std::string_view pattern, + bool ignore_case) = 0; + virtual std::optional> regex_search( + std::string_view input, const T& pattern) = 0; + virtual bool regex_match(std::string_view input, const T& pattern) = 0; }; -class std_regex_provider : public provider { - +class std_regex_provider : public virtual provider { + public: + std_regex_provider() = default; + ~std_regex_provider() override = default; + std::optional create_instance(std::string_view pattern, + bool ignore_case) override; + std::optional> regex_search( + std::string_view input, const std::regex& pattern) override; + bool regex_match(std::string_view input, const std::regex& pattern) override; }; } // namespace ada::url_pattern_regex -#endif // ADA_URL_PATTERN_REGEX_H +#endif // ADA_URL_PATTERN_REGEX_H diff --git a/src/implementation.cpp b/src/implementation.cpp index 14476f5c9..175f94b32 100644 --- a/src/implementation.cpp +++ b/src/implementation.cpp @@ -79,10 +79,11 @@ ada_warn_unused std::string to_string(ada::encoding_type type) { } } -ada_warn_unused tl::expected parse_url_pattern( +template +ada_warn_unused tl::expected, errors> parse_url_pattern( std::variant input, const std::string_view* base_url, const url_pattern_options* options, - std::optional regex_provider) { + std::optional> regex_provider) { return parser::parse_url_pattern_impl( std::move(input), base_url, options, regex_provider.value_or(url_pattern_regex::std_regex_provider())); diff --git a/src/parser.cpp b/src/parser.cpp index 31a283019..6bea8bea2 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -898,18 +898,20 @@ result_type parse_url_impl(std::string_view user_input, return url; } -tl::expected parse_url_pattern_impl( +template +tl::expected, errors> parse_url_pattern_impl( std::variant input, const std::string_view* base_url, const url_pattern_options* options, - url_pattern_regex::provider&& regex_provider) { + url_pattern_regex::provider&& regex_provider) { // Let init be null. url_pattern_init init; // If input is a scalar value string then: if (std::holds_alternative(input)) { // Set init to the result of running parse a constructor string given input. - auto parse_result = url_pattern_helpers::constructor_string_parser::parse( - std::get(input)); + auto parse_result = + url_pattern_helpers::constructor_string_parser::parse( + std::get(input)); if (!parse_result) { ada_log("constructor_string_parser::parse failed"); return tl::unexpected(parse_result.error()); @@ -989,7 +991,7 @@ tl::expected parse_url_pattern_impl( // Set urlPattern’s protocol component to the result of compiling a component // given processedInit["protocol"], canonicalize a protocol, and default // options. - auto protocol_component = url_pattern_component::compile( + auto protocol_component = url_pattern_component::compile( processed_init->protocol.value(), url_pattern_helpers::canonicalize_protocol, url_pattern_compile_component_options::DEFAULT); @@ -1003,7 +1005,7 @@ tl::expected parse_url_pattern_impl( // Set urlPattern’s username component to the result of compiling a component // given processedInit["username"], canonicalize a username, and default // options. - auto username_component = url_pattern_component::compile( + auto username_component = url_pattern_component::compile( processed_init->username.value(), url_pattern_helpers::canonicalize_username, url_pattern_compile_component_options::DEFAULT); @@ -1017,7 +1019,7 @@ tl::expected parse_url_pattern_impl( // Set urlPattern’s password component to the result of compiling a component // given processedInit["password"], canonicalize a password, and default // options. - auto password_component = url_pattern_component::compile( + auto password_component = url_pattern_component::compile( processed_init->password.value(), url_pattern_helpers::canonicalize_password, url_pattern_compile_component_options::DEFAULT); @@ -1039,7 +1041,7 @@ tl::expected parse_url_pattern_impl( // then set urlPattern’s hostname component to the result of compiling a // component given processedInit["hostname"], canonicalize an IPv6 hostname, // and hostname options. - auto hostname_component = url_pattern_component::compile( + auto hostname_component = url_pattern_component::compile( processed_init->hostname.value(), url_pattern_helpers::canonicalize_ipv6_hostname, url_pattern_compile_component_options::DEFAULT); @@ -1053,7 +1055,7 @@ tl::expected parse_url_pattern_impl( // Otherwise, set urlPattern’s hostname component to the result of compiling // a component given processedInit["hostname"], canonicalize a hostname, and // hostname options. - auto hostname_component = url_pattern_component::compile( + auto hostname_component = url_pattern_component::compile( processed_init->hostname.value(), url_pattern_helpers::canonicalize_hostname, url_pattern_compile_component_options::HOSTNAME); @@ -1067,7 +1069,7 @@ tl::expected parse_url_pattern_impl( // Set urlPattern’s port component to the result of compiling a component // given processedInit["port"], canonicalize a port, and default options. - auto port_component = url_pattern_component::compile( + auto port_component = url_pattern_component::compile( processed_init->port.value(), url_pattern_helpers::canonicalize_port, url_pattern_compile_component_options::DEFAULT); if (!port_component) { @@ -1099,7 +1101,7 @@ tl::expected parse_url_pattern_impl( // Set urlPattern’s pathname component to the result of compiling a // component given processedInit["pathname"], canonicalize a pathname, and // pathCompileOptions. - auto pathname_component = url_pattern_component::compile( + auto pathname_component = url_pattern_component::compile( processed_init->pathname.value(), url_pattern_helpers::canonicalize_pathname, path_compile_options); if (!pathname_component) { @@ -1112,7 +1114,7 @@ tl::expected parse_url_pattern_impl( // Otherwise set urlPattern’s pathname component to the result of compiling // a component given processedInit["pathname"], canonicalize an opaque // pathname, and compileOptions. - auto pathname_component = url_pattern_component::compile( + auto pathname_component = url_pattern_component::compile( processed_init->pathname.value(), url_pattern_helpers::canonicalize_opaque_pathname, compile_options); if (!pathname_component) { @@ -1125,7 +1127,7 @@ tl::expected parse_url_pattern_impl( // Set urlPattern’s search component to the result of compiling a component // given processedInit["search"], canonicalize a search, and compileOptions. - auto search_component = url_pattern_component::compile( + auto search_component = url_pattern_component::compile( processed_init->search.value(), url_pattern_helpers::canonicalize_search, compile_options); if (!search_component) { @@ -1137,7 +1139,7 @@ tl::expected parse_url_pattern_impl( // Set urlPattern’s hash component to the result of compiling a component // given processedInit["hash"], canonicalize a hash, and compileOptions. - auto hash_component = url_pattern_component::compile( + auto hash_component = url_pattern_component::compile( processed_init->hash.value(), url_pattern_helpers::canonicalize_hash, compile_options); if (!hash_component) { diff --git a/src/url_pattern.cpp b/src/url_pattern.cpp index 95ec41ded..a5f101bd0 100644 --- a/src/url_pattern.cpp +++ b/src/url_pattern.cpp @@ -450,10 +450,13 @@ std::string url_pattern_init::to_string() const { return answer; } +template template -tl::expected url_pattern_component::compile( +tl::expected, errors> +url_pattern_component::compile( std::string_view input, F& encoding_callback, - url_pattern_compile_component_options& options) { + url_pattern_compile_component_options& options, + const url_pattern_regex::provider& regex_provider) { ada_log("url_pattern_component::compile input: ", input); // Let part list be the result of running parse a pattern string given input, // options, and encoding callback. @@ -473,13 +476,6 @@ tl::expected url_pattern_component::compile( ada_log("regular expression string: ", regular_expression_string); - // Let flags be an empty string. - // If options’s ignore case is true then set flags to "vi". - // Otherwise set flags to "v" - auto flags = options.ignore_case - ? std::regex::icase | std::regex_constants::ECMAScript - : std::regex_constants::ECMAScript; - // Let pattern string be the result of running generate a pattern // string given part list and options. auto pattern_string = @@ -488,12 +484,10 @@ tl::expected url_pattern_component::compile( // Let regular expression be RegExpCreate(regular expression string, // flags). If this throws an exception, catch it, and throw a // TypeError. - std::regex regular_expression; - try { - regular_expression = std::regex(regular_expression_string, flags); - } catch (std::regex_error& error) { - (void)error; - ada_log("std::regex_error: ", error.what()); + auto regular_expression = regex_provider.create_instance( + regular_expression_string, options.ignore_case); + + if (!regular_expression) { return tl::unexpected(errors::type_error); } @@ -507,20 +501,22 @@ tl::expected url_pattern_component::compile( // Return a new component whose pattern string is pattern string, regular // expression is regular expression, group name list is name list, and has // regexp groups is has regexp groups. - return url_pattern_component(std::move(pattern_string), - std::move(regular_expression), flags, - std::move(name_list), has_regexp_groups); + return url_pattern_component( + std::move(pattern_string), std::move(regular_expression), + std::move(name_list), has_regexp_groups); } -result> url_pattern::exec( - const url_pattern_input& input, std::string_view* base_url = nullptr) { +template +result> url_pattern::exec( + const url_pattern_input& input, std::string_view* base_url) { // Return the result of match given this's associated URL pattern, input, and // baseURL if given. return match(input, base_url); } -result url_pattern::test(const url_pattern_input& input, - std::string_view* base_url = nullptr) { +template +result url_pattern::test(const url_pattern_input& input, + std::string_view* base_url) { // TODO: Optimization opportunity. Rather than returning `url_pattern_result` // Implement a fast path just like `can_parse()` in ada_url. // Let result be the result of match given this's associated URL pattern, @@ -532,7 +528,8 @@ result url_pattern::test(const url_pattern_input& input, return tl::unexpected(errors::type_error); } -result> url_pattern::match( +template +result> url_pattern::match( const url_pattern_input& input, std::string_view* base_url_string) { std::string protocol{}; std::string username{}; diff --git a/src/url_pattern_helpers.cpp b/src/url_pattern_helpers.cpp index 56927635b..db8ba4838 100644 --- a/src/url_pattern_helpers.cpp +++ b/src/url_pattern_helpers.cpp @@ -190,16 +190,19 @@ std::string generate_segment_wildcard_regexp( return result; } +template bool protocol_component_matches_special_scheme( - url_pattern_component& component) { + url_pattern_component& component) { auto regex = component.regexp; + // TODO: Use provider.regex_match return std::regex_match("http", regex) || std::regex_match("https", regex) || std::regex_match("ws", regex) || std::regex_match("wss", regex) || std::regex_match("ftp", regex); } -inline std::optional -constructor_string_parser::compute_protocol_matches_special_scheme_flag() { +template +inline std::optional constructor_string_parser< + regex_type>::compute_protocol_matches_special_scheme_flag() { ada_log( "constructor_string_parser::compute_protocol_matches_special_scheme_" "flag"); @@ -208,7 +211,7 @@ constructor_string_parser::compute_protocol_matches_special_scheme_flag() { auto protocol_string = make_component_string(); // Let protocol component be the result of compiling a component given // protocol string, canonicalize a protocol, and default options. - auto protocol_component = url_pattern_component::compile( + auto protocol_component = url_pattern_component::compile( protocol_string, canonicalize_protocol, url_pattern_compile_component_options::DEFAULT); if (!protocol_component) { @@ -470,8 +473,9 @@ tl::expected canonicalize_hash(std::string_view input) { return tl::unexpected(errors::type_error); } -tl::expected constructor_string_parser::parse( - std::string_view input) { +template +tl::expected +constructor_string_parser::parse(std::string_view input) { ada_log("constructor_string_parser::parse input=", input); // Let parser be a new constructor string parser whose input is input and // token list is the result of running tokenize given input and "lenient". @@ -564,7 +568,8 @@ tl::expected constructor_string_parser::parse( if (parser.is_protocol_suffix()) { // Run compute protocol matches a special scheme flag given parser. if (const auto error = - parser.compute_protocol_matches_special_scheme_flag()) { + parser.template compute_protocol_matches_special_scheme_flag< + regex_type>()) { ada_log("compute_protocol_matches_special_scheme_flag failed"); return tl::unexpected(*error); } diff --git a/src/url_pattern_regex.cpp b/src/url_pattern_regex.cpp index e69de29bb..7528e000a 100644 --- a/src/url_pattern_regex.cpp +++ b/src/url_pattern_regex.cpp @@ -0,0 +1,34 @@ +#include +#include "ada/url_pattern_regex.h" + +namespace ada::url_pattern_regex { +std::optional std_regex_provider::create_instance( + std::string_view pattern, bool ignore_case) { + // Let flags be an empty string. + // If options’s ignore case is true then set flags to "vi". + // Otherwise set flags to "v" + auto flags = ignore_case + ? std::regex::icase | std::regex_constants::ECMAScript + : std::regex_constants::ECMAScript; + try { + return std::regex(pattern.data(), pattern.size(), flags); + } catch (const std::regex_error& e) { + (void)e; + ada_log("std_regex_provider::create_instance failed:", e.what()); + return std::nullopt; + } +} + +std::optional> std_regex_provider::regex_search( + std::string_view input, const std::regex& pattern) { + (void)input; + (void)pattern; + return {}; +} + +bool std_regex_provider::regex_match(std::string_view input, + const std::regex& pattern) { + return std::regex_match(input.data(), input.begin(), pattern); +} + +} // namespace ada::url_pattern_regex diff --git a/tests/wpt_urlpattern_tests.cpp b/tests/wpt_urlpattern_tests.cpp index 89bea3a13..f37f6bdfd 100644 --- a/tests/wpt_urlpattern_tests.cpp +++ b/tests/wpt_urlpattern_tests.cpp @@ -29,9 +29,10 @@ TEST(wpt_urlpattern_tests, parse_pattern_string_basic_tests) { } TEST(wpt_urlpattern_tests, compile_basic_tests) { - auto protocol_component = ada::url_pattern_component::compile( + auto provider = ada::url_pattern_regex::std_regex_provider(); + auto protocol_component = ada::url_pattern_component::compile( "*", ada::url_pattern_helpers::canonicalize_protocol, - ada::url_pattern_compile_component_options::DEFAULT); + ada::url_pattern_compile_component_options::DEFAULT, provider); ASSERT_TRUE(protocol_component); } @@ -224,7 +225,7 @@ parse_pattern_field(ondemand::array& patterns) { return std::tuple(*init_str, base_url, options); } -tl::expected parse_pattern( +tl::expected, ada::errors> parse_pattern( std::variant& init_variant, std::optional& base_url, std::optional& options) {