Skip to content

Commit b767e14

Browse files
committed
Add RE2 regex backend
1 parent d6a49b9 commit b767e14

File tree

4 files changed

+190
-1
lines changed

4 files changed

+190
-1
lines changed

src/Makefile.am

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,8 @@ UTILS = \
247247

248248
REGEX = \
249249
regex/regex.cc \
250-
regex/backend/pcre.cc
250+
regex/backend/pcre.cc \
251+
regex/backend/re2.cc
251252

252253

253254
COLLECTION = \

src/regex/backend/re2.cc

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
/*
2+
* ModSecurity, http://www.modsecurity.org/
3+
* Copyright (c) 2019
4+
*
5+
* You may not use this file except in compliance with
6+
* the License. You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* If any of the files related to licensing are missing or if you have any
11+
* other questions related to licensing please contact Trustwave Holdings, Inc.
12+
* directly using the email address [email protected].
13+
*
14+
*/
15+
#include <iostream>
16+
#include <fstream>
17+
#include <string>
18+
#include <list>
19+
20+
#include "src/regex/backend/re2.h"
21+
#include "src/regex/regex_match.h"
22+
23+
namespace modsecurity {
24+
namespace regex {
25+
namespace backend {
26+
27+
#ifdef WITH_RE2
28+
29+
static RE2::Options get_re2_options() {
30+
RE2::Options res;
31+
32+
res.set_dot_nl(true);
33+
34+
return res;
35+
}
36+
37+
38+
Re2::Re2(const std::string& pattern_)
39+
: re(pattern_.empty() ? ".*" : pattern_, get_re2_options())
40+
{
41+
}
42+
43+
static bool do_match(
44+
const RE2 &re,
45+
const char *s,
46+
size_t n,
47+
RegexMatch *m,
48+
ssize_t max_groups,
49+
size_t offset)
50+
{
51+
if (m == nullptr) {
52+
max_groups = 0;
53+
}
54+
55+
// "+1" is required for full match (aka group 0)
56+
size_t ngroups = re.NumberOfCapturingGroups() + 1;
57+
if (max_groups >= 0 && max_groups < ngroups) {
58+
ngroups = max_groups;
59+
}
60+
re2::StringPiece submatches[ngroups];
61+
62+
if (re.Match(re2::StringPiece(s, n), offset, n, RE2::UNANCHORED,
63+
&submatches[0], ngroups)) {
64+
if (ngroups != 0) {
65+
RegexMatch::MatchGroupContainer groups;
66+
groups.reserve(ngroups);
67+
for (size_t i = 0; i < ngroups; i++) {
68+
size_t start = submatches[i].data() - s;
69+
std::string group = submatches[i].as_string();
70+
groups.push_back(MatchGroup{start, std::move(group)});
71+
}
72+
*m = RegexMatch(std::move(groups));
73+
}
74+
return true;
75+
}
76+
return false;
77+
}
78+
79+
std::vector<RegexMatch> Re2::searchAll(const std::string& s, bool overlapping) const {
80+
std::vector<RegexMatch> res;
81+
size_t offset = 0;
82+
83+
while (1) {
84+
RegexMatch m;
85+
bool match = do_match(re, s.data(), s.size(), &m, -1, offset);
86+
if (!match) break;
87+
88+
if (overlapping) {
89+
// start just after the beginning of the last match
90+
offset = m.group(0).offset + 1;
91+
} else {
92+
// start just at the end of the last match
93+
offset = m.group(0).offset + m.group(0).string.size();
94+
if (offset == m.group(0).offset) {
95+
// empty match - advance by one to not match empty string repeatedly
96+
offset++;
97+
}
98+
}
99+
res.push_back(std::move(m));
100+
}
101+
return res;
102+
}
103+
104+
bool Re2::search(const std::string &s, RegexMatch *m, ssize_t max_groups) const {
105+
return do_match(re, s.data(), s.size(), m, max_groups, 0);
106+
}
107+
108+
#endif
109+
110+
} // namespace backend
111+
} // namespace regex
112+
} // namespace modsecurity
113+

src/regex/backend/re2.h

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* ModSecurity, http://www.modsecurity.org/
3+
* Copyright (c) 2019
4+
*
5+
* You may not use this file except in compliance with
6+
* the License. You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* If any of the files related to licensing are missing or if you have any
11+
* other questions related to licensing please contact Trustwave Holdings, Inc.
12+
* directly using the email address [email protected].
13+
*
14+
*/
15+
16+
#ifdef WITH_RE2
17+
#include <re2/re2.h>
18+
#endif
19+
20+
#include <string>
21+
#include <list>
22+
23+
#include "src/regex/regex_match.h"
24+
25+
#ifndef SRC_REGEX_BACKEND_RE2_H_
26+
#define SRC_REGEX_BACKEND_RE2_H_
27+
28+
namespace modsecurity {
29+
namespace regex {
30+
namespace backend {
31+
32+
#ifdef WITH_RE2
33+
34+
class Re2 {
35+
public:
36+
explicit Re2(const std::string& pattern_);
37+
38+
// RE2 class is not copyable, so neither is this
39+
Re2(const Re2&) = delete;
40+
Re2& operator=(const Re2&) = delete;
41+
42+
std::vector<RegexMatch> searchAll(const std::string& s, bool overlapping = false) const;
43+
bool search(const std::string &s, RegexMatch *m = nullptr, ssize_t max_groups = -1) const;
44+
45+
bool ok(std::string *error = nullptr) const {
46+
if (re.ok()) {
47+
return true;
48+
}
49+
if (error != nullptr) {
50+
*error = re.error();
51+
}
52+
return false;
53+
}
54+
55+
const std::string& getPattern() const {
56+
return re.pattern();
57+
};
58+
private:
59+
const RE2 re;
60+
};
61+
62+
#endif
63+
64+
} // namespace backend
65+
} // namespace regex
66+
} // namespace modsecurity
67+
68+
#endif // SRC_REGEX_BACKEND_PCRE_H_

src/regex/regex.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include <list>
2222

2323
#include "src/regex/backend/pcre.h"
24+
#include "src/regex/backend/re2.h"
2425
#include "src/regex/regex_match.h"
2526

2627
#ifndef SRC_REGEX_REGEX_H_
@@ -30,7 +31,13 @@
3031
namespace modsecurity {
3132
namespace regex {
3233

34+
#ifdef WITH_PCRE
3335
using selectedBackend = backend::Pcre;
36+
#elif WITH_RE2
37+
using selectedBackend = backend::Re2;
38+
#else
39+
#error "no regex backend selected"
40+
#endif
3441

3542
class Regex : public selectedBackend {
3643
public:

0 commit comments

Comments
 (0)