From 84f9ba1661ab6e2a7a422af7d1b44faa504cd151 Mon Sep 17 00:00:00 2001 From: PragmaTwice Date: Sat, 14 Oct 2023 00:22:09 +0900 Subject: [PATCH 1/3] Build the string directly instead of copying and then pruning tabs or newlines --- include/ada/helpers.h | 7 +++++++ src/helpers.cpp | 16 +++++++++++++--- src/implementation.cpp | 5 +---- src/parser.cpp | 8 ++------ src/url.cpp | 7 +------ src/url_aggregator.cpp | 7 +------ 6 files changed, 25 insertions(+), 25 deletions(-) diff --git a/include/ada/helpers.h b/include/ada/helpers.h index 2c33de47a..7971d7656 100644 --- a/include/ada/helpers.h +++ b/include/ada/helpers.h @@ -80,6 +80,13 @@ ada_really_inline void parse_prepared_path(std::string_view input, */ ada_really_inline void remove_ascii_tab_or_newline(std::string& input) noexcept; +/** + * @private + * Create a new string that all ASCII tab or newline characters are removed. + */ +ada_really_inline std::string get_ascii_tab_or_newline_removed( + std::string_view input) noexcept; + /** * @private * Return the substring from input going from index pos to the end. diff --git a/src/helpers.cpp b/src/helpers.cpp index 11193800b..e74524607 100644 --- a/src/helpers.cpp +++ b/src/helpers.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include namespace ada::helpers { @@ -155,12 +156,21 @@ ada_really_inline void remove_ascii_tab_or_newline( // if this ever becomes a performance issue, we could use an approach similar // to has_tabs_or_newline input.erase(std::remove_if(input.begin(), input.end(), - [](char c) { - return ada::unicode::is_ascii_tab_or_newline(c); - }), + ada::unicode::is_ascii_tab_or_newline), input.end()); } +ada_really_inline std::string get_ascii_tab_or_newline_removed( + std::string_view input) noexcept { + std::string res; + res.reserve(input.size()); + + std::copy_if(input.begin(), input.end(), std::back_insert_iterator{res}, + std::not_fn(ada::unicode::is_ascii_tab_or_newline)); + + return res; +} + ada_really_inline std::string_view substring(std::string_view input, size_t pos) noexcept { ADA_ASSERT_TRUE(pos <= input.size()); diff --git a/src/implementation.cpp b/src/implementation.cpp index e5ad039cd..b24beaf5e 100644 --- a/src/implementation.cpp +++ b/src/implementation.cpp @@ -25,12 +25,9 @@ template ada::result parse( std::string href_from_file(std::string_view input) { // This is going to be much faster than constructing a URL. - std::string tmp_buffer; std::string_view internal_input; if (unicode::has_tabs_or_newline(input)) { - tmp_buffer = input; - helpers::remove_ascii_tab_or_newline(tmp_buffer); - internal_input = tmp_buffer; + internal_input = helpers::get_ascii_tab_or_newline_removed(input); } else { internal_input = input; } diff --git a/src/parser.cpp b/src/parser.cpp index 680ce35e4..987186d2b 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -1,6 +1,7 @@ #include "ada.h" #include "ada/common_defs.h" #include "ada/character_sets-inl.h" +#include "ada/helpers.h" #include "ada/unicode.h" #include "ada/url-inl.h" #include "ada/log.h" @@ -69,14 +70,9 @@ result_type parse_url(std::string_view user_input, // // } - std::string tmp_buffer; std::string_view internal_input; if (unicode::has_tabs_or_newline(user_input)) { - tmp_buffer = user_input; - // Optimization opportunity: Instead of copying and then pruning, we could - // just directly build the string from user_input. - helpers::remove_ascii_tab_or_newline(tmp_buffer); - internal_input = tmp_buffer; + internal_input = helpers::get_ascii_tab_or_newline_removed(user_input); } else { internal_input = user_input; } diff --git a/src/url.cpp b/src/url.cpp index c4e1c487e..0351b1e1e 100644 --- a/src/url.cpp +++ b/src/url.cpp @@ -493,14 +493,9 @@ ada_really_inline bool url::parse_host(std::string_view input) { ada_really_inline void url::parse_path(std::string_view input) { ada_log("parse_path ", input); - std::string tmp_buffer; std::string_view internal_input; if (unicode::has_tabs_or_newline(input)) { - tmp_buffer = input; - // Optimization opportunity: Instead of copying and then pruning, we could - // just directly build the string from user_input. - helpers::remove_ascii_tab_or_newline(tmp_buffer); - internal_input = tmp_buffer; + internal_input = helpers::get_ascii_tab_or_newline_removed(user_input); } else { internal_input = input; } diff --git a/src/url_aggregator.cpp b/src/url_aggregator.cpp index 2b761ad34..52fec6758 100644 --- a/src/url_aggregator.cpp +++ b/src/url_aggregator.cpp @@ -330,14 +330,9 @@ ada_really_inline void url_aggregator::parse_path(std::string_view input) { ada_log("url_aggregator::parse_path ", input); ADA_ASSERT_TRUE(validate()); ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer)); - std::string tmp_buffer; std::string_view internal_input; if (unicode::has_tabs_or_newline(input)) { - tmp_buffer = input; - // Optimization opportunity: Instead of copying and then pruning, we could - // just directly build the string from user_input. - helpers::remove_ascii_tab_or_newline(tmp_buffer); - internal_input = tmp_buffer; + internal_input = helpers::get_ascii_tab_or_newline_removed(user_input); } else { internal_input = input; } From 53be27219b4c796a250f369525a1c6bd079fecd9 Mon Sep 17 00:00:00 2001 From: PragmaTwice Date: Sat, 14 Oct 2023 00:26:07 +0900 Subject: [PATCH 2/3] fix --- include/ada/helpers.h | 4 ++-- src/helpers.cpp | 2 +- src/url.cpp | 2 +- src/url_aggregator.cpp | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/ada/helpers.h b/include/ada/helpers.h index 7971d7656..42a2c4fe2 100644 --- a/include/ada/helpers.h +++ b/include/ada/helpers.h @@ -84,8 +84,8 @@ ada_really_inline void remove_ascii_tab_or_newline(std::string& input) noexcept; * @private * Create a new string that all ASCII tab or newline characters are removed. */ -ada_really_inline std::string get_ascii_tab_or_newline_removed( - std::string_view input) noexcept; +[[nodiscard]] ada_really_inline std::string get_ascii_tab_or_newline_removed( + std::string_view input); /** * @private diff --git a/src/helpers.cpp b/src/helpers.cpp index e74524607..c7f3ffe8a 100644 --- a/src/helpers.cpp +++ b/src/helpers.cpp @@ -161,7 +161,7 @@ ada_really_inline void remove_ascii_tab_or_newline( } ada_really_inline std::string get_ascii_tab_or_newline_removed( - std::string_view input) noexcept { + std::string_view input) { std::string res; res.reserve(input.size()); diff --git a/src/url.cpp b/src/url.cpp index 0351b1e1e..d4c521c1b 100644 --- a/src/url.cpp +++ b/src/url.cpp @@ -495,7 +495,7 @@ ada_really_inline void url::parse_path(std::string_view input) { ada_log("parse_path ", input); std::string_view internal_input; if (unicode::has_tabs_or_newline(input)) { - internal_input = helpers::get_ascii_tab_or_newline_removed(user_input); + internal_input = helpers::get_ascii_tab_or_newline_removed(input); } else { internal_input = input; } diff --git a/src/url_aggregator.cpp b/src/url_aggregator.cpp index 52fec6758..936af833e 100644 --- a/src/url_aggregator.cpp +++ b/src/url_aggregator.cpp @@ -332,7 +332,7 @@ ada_really_inline void url_aggregator::parse_path(std::string_view input) { ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer)); std::string_view internal_input; if (unicode::has_tabs_or_newline(input)) { - internal_input = helpers::get_ascii_tab_or_newline_removed(user_input); + internal_input = helpers::get_ascii_tab_or_newline_removed(input); } else { internal_input = input; } From 3ed78713acb448811c112e29b844ee4d1dba66ba Mon Sep 17 00:00:00 2001 From: PragmaTwice Date: Sat, 14 Oct 2023 10:31:33 +0900 Subject: [PATCH 3/3] fix --- src/implementation.cpp | 4 +++- src/parser.cpp | 4 +++- src/url.cpp | 4 +++- src/url_aggregator.cpp | 4 +++- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/implementation.cpp b/src/implementation.cpp index b24beaf5e..9dcef1d7d 100644 --- a/src/implementation.cpp +++ b/src/implementation.cpp @@ -25,9 +25,11 @@ template ada::result parse( std::string href_from_file(std::string_view input) { // This is going to be much faster than constructing a URL. + std::string tmp_buffer; std::string_view internal_input; if (unicode::has_tabs_or_newline(input)) { - internal_input = helpers::get_ascii_tab_or_newline_removed(input); + tmp_buffer = helpers::get_ascii_tab_or_newline_removed(input); + internal_input = tmp_buffer; } else { internal_input = input; } diff --git a/src/parser.cpp b/src/parser.cpp index 987186d2b..f02fa563c 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -70,9 +70,11 @@ result_type parse_url(std::string_view user_input, // // } + std::string tmp_buffer; std::string_view internal_input; if (unicode::has_tabs_or_newline(user_input)) { - internal_input = helpers::get_ascii_tab_or_newline_removed(user_input); + tmp_buffer = helpers::get_ascii_tab_or_newline_removed(user_input); + internal_input = tmp_buffer; } else { internal_input = user_input; } diff --git a/src/url.cpp b/src/url.cpp index d4c521c1b..9dfb9f9e5 100644 --- a/src/url.cpp +++ b/src/url.cpp @@ -493,9 +493,11 @@ ada_really_inline bool url::parse_host(std::string_view input) { ada_really_inline void url::parse_path(std::string_view input) { ada_log("parse_path ", input); + std::string tmp_buffer; std::string_view internal_input; if (unicode::has_tabs_or_newline(input)) { - internal_input = helpers::get_ascii_tab_or_newline_removed(input); + tmp_buffer = helpers::get_ascii_tab_or_newline_removed(input); + internal_input = tmp_buffer; } else { internal_input = input; } diff --git a/src/url_aggregator.cpp b/src/url_aggregator.cpp index 936af833e..6236a40f1 100644 --- a/src/url_aggregator.cpp +++ b/src/url_aggregator.cpp @@ -330,9 +330,11 @@ ada_really_inline void url_aggregator::parse_path(std::string_view input) { ada_log("url_aggregator::parse_path ", input); ADA_ASSERT_TRUE(validate()); ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer)); + std::string tmp_buffer; std::string_view internal_input; if (unicode::has_tabs_or_newline(input)) { - internal_input = helpers::get_ascii_tab_or_newline_removed(input); + tmp_buffer = helpers::get_ascii_tab_or_newline_removed(input); + internal_input = tmp_buffer; } else { internal_input = input; }