Skip to content

Commit

Permalink
chore(search): Safe json validation, serialize fix (#2100)
Browse files Browse the repository at this point in the history
* chore(search): Safe json validation, serialize fix

Signed-off-by: Vladislav Oleshko <[email protected]>

---------

Signed-off-by: Vladislav Oleshko <[email protected]>
  • Loading branch information
dranikpg authored Nov 2, 2023
1 parent 146f46e commit e46dd24
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 25 deletions.
7 changes: 4 additions & 3 deletions src/core/search/lexer.lex
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@

blank [ \t\r]
dq \"
sq \'
esc_chars ['"\?\\abfnrtv]
esc_seq \\{esc_chars}
str_char ([^"]|{esc_seq})
term_char [_]|\w


Expand Down Expand Up @@ -65,10 +65,11 @@ term_char [_]|\w
"KNN" return Parser::make_KNN (loc());
"AS" return Parser::make_AS (loc());

[0-9]+ return make_UINT32(matched_view(), loc());
[0-9]{1,9} return make_UINT32(matched_view(), loc());
[+-]?(([0-9]*[.])?[0-9]+|inf) return make_DOUBLE(matched_view(), loc());

{dq}{str_char}*{dq} return make_StringLit(matched_view(1, 1), loc());
{dq}([^"]|{esc_seq})*{dq} return make_StringLit(matched_view(1, 1), loc());
{sq}([^']|{esc_seq})*{sq} return make_StringLit(matched_view(1, 1), loc());

"$"{term_char}+ return ParseParam(str(), loc());
"@"{term_char}+ return Parser::make_FIELD(str(), loc());
Expand Down
18 changes: 15 additions & 3 deletions src/core/search/search_parser_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,6 @@ TEST_F(SearchParserTest, Scanner) {
absl::SimpleAtod("33.3", &d);
SetInput("33.3");
NEXT_EQ(TOK_DOUBLE, double, d);

SetInput("18446744073709551616");
NEXT_ERROR();
}

TEST_F(SearchParserTest, Parse) {
Expand All @@ -130,4 +127,19 @@ TEST_F(SearchParserTest, ParseParams) {
NEXT_EQ(TOK_UINT32, uint32_t, 10);
}

TEST_F(SearchParserTest, Quotes) {
SetInput(" \"fir st\" 'sec@o@nd' \":third:\" 'four\\\"th' ");
NEXT_EQ(TOK_TERM, string, "fir st");
NEXT_EQ(TOK_TERM, string, "sec@o@nd");
NEXT_EQ(TOK_TERM, string, ":third:");
NEXT_EQ(TOK_TERM, string, "four\"th");
}

TEST_F(SearchParserTest, Numeric) {
SetInput("11 123123123123 '22'");
NEXT_EQ(TOK_UINT32, uint32_t, 11);
NEXT_EQ(TOK_DOUBLE, double, 123123123123.0);
NEXT_EQ(TOK_TERM, string, "22");
}

} // namespace dfly::search
27 changes: 22 additions & 5 deletions src/server/search/doc_accessors.cc
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,11 @@ struct JsonAccessor::JsonPathContainer : public jsoncons::jsonpath::jsonpath_exp
};

BaseAccessor::StringList JsonAccessor::GetStrings(string_view active_field) const {
auto path_res = GetPath(active_field)->evaluate(json_);
auto* path = GetPath(active_field);
if (!path)
return {};

auto path_res = path->evaluate(json_);
DCHECK(path_res.is_array()); // json path always returns arrays

if (path_res.empty())
Expand All @@ -121,6 +125,8 @@ BaseAccessor::StringList JsonAccessor::GetStrings(string_view active_field) cons
return {buf_};
}

buf_.clear();

// First, grow buffer and compute string sizes
vector<size_t> sizes;
for (auto element : path_res.array_range()) {
Expand All @@ -141,7 +147,11 @@ BaseAccessor::StringList JsonAccessor::GetStrings(string_view active_field) cons
}

BaseAccessor::VectorInfo JsonAccessor::GetVector(string_view active_field) const {
auto res = GetPath(active_field)->evaluate(json_);
auto* path = GetPath(active_field);
if (!path)
return {};

auto res = path->evaluate(json_);
DCHECK(res.is_array());
if (res.empty())
return {nullptr, 0};
Expand All @@ -163,7 +173,10 @@ JsonAccessor::JsonPathContainer* JsonAccessor::GetPath(std::string_view field) c

error_code ec;
auto path_expr = jsoncons::jsonpath::make_expression<JsonType>(field, ec);
DCHECK(!ec) << "missing validation on ft.create step";
if (ec) {
LOG(WARNING) << "Invalid Json path: " << field << ' ' << ec.message();
return nullptr;
}

JsonPathContainer path_container{move(path_expr)};
auto ptr = make_unique<JsonPathContainer>(move(path_container));
Expand All @@ -180,8 +193,12 @@ SearchDocData JsonAccessor::Serialize(const search::Schema& schema) const {
SearchDocData JsonAccessor::Serialize(const search::Schema& schema,
const SearchParams::FieldReturnList& fields) const {
SearchDocData out{};
for (const auto& [ident, name] : fields)
out[name] = GetPath(ident)->evaluate(json_).to_string();
for (const auto& [ident, name] : fields) {
if (auto* path = GetPath(ident); path) {
if (auto res = path->evaluate(json_); !res.empty())
out[name] = res[0].to_string();
}
}
return out;
}

Expand Down
6 changes: 3 additions & 3 deletions src/server/search/doc_accessors.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ struct BaseAccessor : public search::DocumentAccessor {
virtual SearchDocData Serialize(const search::Schema& schema) const = 0;

// Serialize selected fields
SearchDocData Serialize(const search::Schema& schema,
const SearchParams::FieldReturnList& fields) const;
virtual SearchDocData Serialize(const search::Schema& schema,
const SearchParams::FieldReturnList& fields) const;
};

// Accessor for hashes stored with listpack
Expand Down Expand Up @@ -74,7 +74,7 @@ struct JsonAccessor : public BaseAccessor {

// The JsonAccessor works with structured types and not plain strings, so an overload is needed
SearchDocData Serialize(const search::Schema& schema,
const SearchParams::FieldReturnList& fields) const;
const SearchParams::FieldReturnList& fields) const override;

static void RemoveFieldFromCache(std::string_view field);

Expand Down
8 changes: 4 additions & 4 deletions src/server/search/search_family.cc
Original file line number Diff line number Diff line change
Expand Up @@ -366,9 +366,9 @@ void SearchFamily::FtDropIndex(CmdArgList args, ConnectionContext* cntx) {
});

DCHECK(num_deleted == 0u || num_deleted == shard_set->size());
if (num_deleted == shard_set->size())
return (*cntx)->SendOk();
(*cntx)->SendError("Unknown Index name");
if (num_deleted == 0u)
return (*cntx)->SendError("-Unknown Index name");
return (*cntx)->SendOk();
}

void SearchFamily::FtInfo(CmdArgList args, ConnectionContext* cntx) {
Expand All @@ -389,7 +389,7 @@ void SearchFamily::FtInfo(CmdArgList args, ConnectionContext* cntx) {
DCHECK(num_notfound == 0u || num_notfound == shard_set->size());

if (num_notfound > 0u)
return (*cntx)->SendError("Unknown index name");
return (*cntx)->SendError("Unknown Index name");

DCHECK(infos.front().base_index.schema.fields.size() ==
infos.back().base_index.schema.fields.size());
Expand Down
31 changes: 24 additions & 7 deletions src/server/search/search_family_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ TEST_F(SearchFamilyTest, JsonArrayValues) {
{"game": "Pacman", "score": 15},
{"game": "Mario", "score": 7}
],
"areas": "US-central"
"areas": ["US-central"]
}
)";
string_view D3 = R"(
Expand All @@ -229,10 +229,17 @@ TEST_F(SearchFamilyTest, JsonArrayValues) {
Run({"json.set", "k2", ".", D2});
Run({"json.set", "k3", ".", D3});

auto resp = Run({"ft.create", "i1", "on", "json", "schema", "$.name", "text", "$.plays[*].game",
"as", "games", "tag", "$.plays[*].score", "as", "scores", "numeric",
"$.areas[*]", "as", "areas", "tag"});
EXPECT_EQ(resp, "OK");
Run({"ft.create", "i1",
"on", "json",
"schema", "$.name",
"as", "name",
"text", "$.plays[*].game",
"as", "games",
"tag", "$.plays[*].score",
"as", "scores",
"numeric", "$.areas[*]",
"as", "areas",
"tag"});

EXPECT_THAT(Run({"ft.search", "i1", "*"}), AreDocIds("k1", "k2", "k3"));

Expand All @@ -248,8 +255,18 @@ TEST_F(SearchFamilyTest, JsonArrayValues) {
EXPECT_THAT(Run({"ft.search", "i1", "@scores:[(15 20]"}), AreDocIds("k3"));

// Find platers by areas
EXPECT_THAT(Run({"ft.search", "i1", "@areas:{\"EU-central\"}"}), AreDocIds("k1", "k3"));
EXPECT_THAT(Run({"ft.search", "i1", "@areas:{\"US-central\"}"}), AreDocIds("k2"));
EXPECT_THAT(Run({"ft.search", "i1", "@areas:{'EU-central'}"}), AreDocIds("k1", "k3"));
EXPECT_THAT(Run({"ft.search", "i1", "@areas:{'US-central'}"}), AreDocIds("k2"));

// Test complicated RETURN expression
auto res = Run(
{"ft.search", "i1", "@name:bob", "return", "1", "max($.plays[*].score)", "as", "max-score"});
EXPECT_THAT(res.GetVec()[2], RespArray(ElementsAre("max-score", "15")));

// Test invalid json path expression omits that field
res = Run({"ft.search", "i1", "@name:alex", "return", "1", "::??INVALID??::", "as", "retval"});
EXPECT_EQ(res.GetVec()[1], "k1");
EXPECT_THAT(res.GetVec()[2], RespArray(ElementsAre()));
}

TEST_F(SearchFamilyTest, Tags) {
Expand Down

0 comments on commit e46dd24

Please sign in to comment.