diff --git a/include/ada/helpers.h b/include/ada/helpers.h index 2c33de47a..42a2c4fe2 100644 --- a/include/ada/helpers.h +++ b/include/ada/helpers.h @@ -80,6 +80,13 @@ ada_really_inline void parse_prepared_path(std::string_view input, */ ada_really_inline void remove_ascii_tab_or_newline(std::string& input) noexcept; +/** + * @private + * Create a new string that all ASCII tab or newline characters are removed. + */ +[[nodiscard]] ada_really_inline std::string get_ascii_tab_or_newline_removed( + std::string_view input); + /** * @private * Return the substring from input going from index pos to the end. diff --git a/src/helpers.cpp b/src/helpers.cpp index 11193800b..c7f3ffe8a 100644 --- a/src/helpers.cpp +++ b/src/helpers.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include namespace ada::helpers { @@ -155,12 +156,21 @@ ada_really_inline void remove_ascii_tab_or_newline( // if this ever becomes a performance issue, we could use an approach similar // to has_tabs_or_newline input.erase(std::remove_if(input.begin(), input.end(), - [](char c) { - return ada::unicode::is_ascii_tab_or_newline(c); - }), + ada::unicode::is_ascii_tab_or_newline), input.end()); } +ada_really_inline std::string get_ascii_tab_or_newline_removed( + std::string_view input) { + std::string res; + res.reserve(input.size()); + + std::copy_if(input.begin(), input.end(), std::back_insert_iterator{res}, + std::not_fn(ada::unicode::is_ascii_tab_or_newline)); + + return res; +} + ada_really_inline std::string_view substring(std::string_view input, size_t pos) noexcept { ADA_ASSERT_TRUE(pos <= input.size()); diff --git a/src/implementation.cpp b/src/implementation.cpp index e5ad039cd..9dcef1d7d 100644 --- a/src/implementation.cpp +++ b/src/implementation.cpp @@ -28,8 +28,7 @@ std::string href_from_file(std::string_view input) { std::string tmp_buffer; std::string_view internal_input; if (unicode::has_tabs_or_newline(input)) { - tmp_buffer = input; - helpers::remove_ascii_tab_or_newline(tmp_buffer); + tmp_buffer = helpers::get_ascii_tab_or_newline_removed(input); internal_input = tmp_buffer; } else { internal_input = input; diff --git a/src/parser.cpp b/src/parser.cpp index 680ce35e4..f02fa563c 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -1,6 +1,7 @@ #include "ada.h" #include "ada/common_defs.h" #include "ada/character_sets-inl.h" +#include "ada/helpers.h" #include "ada/unicode.h" #include "ada/url-inl.h" #include "ada/log.h" @@ -72,10 +73,7 @@ result_type parse_url(std::string_view user_input, std::string tmp_buffer; std::string_view internal_input; if (unicode::has_tabs_or_newline(user_input)) { - tmp_buffer = user_input; - // Optimization opportunity: Instead of copying and then pruning, we could - // just directly build the string from user_input. - helpers::remove_ascii_tab_or_newline(tmp_buffer); + tmp_buffer = helpers::get_ascii_tab_or_newline_removed(user_input); internal_input = tmp_buffer; } else { internal_input = user_input; diff --git a/src/url.cpp b/src/url.cpp index c4e1c487e..9dfb9f9e5 100644 --- a/src/url.cpp +++ b/src/url.cpp @@ -496,10 +496,7 @@ ada_really_inline void url::parse_path(std::string_view input) { std::string tmp_buffer; std::string_view internal_input; if (unicode::has_tabs_or_newline(input)) { - tmp_buffer = input; - // Optimization opportunity: Instead of copying and then pruning, we could - // just directly build the string from user_input. - helpers::remove_ascii_tab_or_newline(tmp_buffer); + tmp_buffer = helpers::get_ascii_tab_or_newline_removed(input); internal_input = tmp_buffer; } else { internal_input = input; diff --git a/src/url_aggregator.cpp b/src/url_aggregator.cpp index 2b761ad34..6236a40f1 100644 --- a/src/url_aggregator.cpp +++ b/src/url_aggregator.cpp @@ -333,10 +333,7 @@ ada_really_inline void url_aggregator::parse_path(std::string_view input) { std::string tmp_buffer; std::string_view internal_input; if (unicode::has_tabs_or_newline(input)) { - tmp_buffer = input; - // Optimization opportunity: Instead of copying and then pruning, we could - // just directly build the string from user_input. - helpers::remove_ascii_tab_or_newline(tmp_buffer); + tmp_buffer = helpers::get_ascii_tab_or_newline_removed(input); internal_input = tmp_buffer; } else { internal_input = input;