Skip to content

Commit 256e7d6

Browse files
authored
Merge pull request #31 from blacktear23/query
Add documents for query language, Let query package support put and remove statements
2 parents d5e2573 + ccef2e4 commit 256e7d6

23 files changed

+1071
-121
lines changed

query/README.md

+17
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,20 @@ FieldAccessExpression ::= "[" String "]" |
6565
"[" Number "]"
6666
```
6767

68+
Put Statement:
69+
70+
```
71+
PutStmt ::= "PUT" KVPair (, KVPair)*
72+
73+
KVPair ::= "(" Expression, Expression ")"
74+
```
75+
76+
Remove Statement:
77+
78+
```
79+
RemoveStmt ::= "REMOVE" Expression (, Expression)*
80+
```
81+
6882
Features:
6983

7084
1. Scan ranger optimize: EmptyResult, PrefixScan, RangeScan, MultiGet
@@ -99,6 +113,9 @@ q select key, value, l2_distance(list(1,2,3,4), json(value) as l2_dis where key
99113
q select key, int(value) as f1 where f1 > 10
100114
q select key, split(value) as f1 where 'a' in f1
101115
q select key, value, l2_distance(list(1,2,3,4), json(value)) as l2_dis where key ^= 'embedding_json' & l2_dis > 0.6 order by l2_dis desc limit 5
116+
117+
# Put datas
118+
q put ('k1', 'v1'), ('k2', upper('v' + key))
102119
```
103120

104121
## Build With LLama

query/checker.go

+6
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,12 @@ func (e *BinaryOpExpr) checkWithBetween(ctx *CheckCtx) error {
226226
}
227227

228228
func (e *FieldExpr) Check(ctx *CheckCtx) error {
229+
if e.Field == KeyKW && ctx.NotAllowKey {
230+
return NewSyntaxError(e.Pos, "not allow key keyword in expression")
231+
}
232+
if e.Field == ValueKW && ctx.NotAllowValue {
233+
return NewSyntaxError(e.Pos, "not allow value keyword in expression")
234+
}
229235
return nil
230236
}
231237

query/docs.md

+264
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,264 @@
1+
# A SQL like query language for TiKV
2+
3+
The query package provide a SQL like query language for user to do some search operation on TiKV's key-value pair.
4+
5+
## Query Syntax
6+
7+
### Basic syntax
8+
9+
```
10+
select (field expression), (field expression)... where (filter expression) group by (group expression) order by (order expression) limit (start, counts)
11+
```
12+
13+
**Field Expression**
14+
15+
```
16+
Field Expression := (FunctionCall | key | value | "*") ("as" FieldName)?
17+
18+
FunctionCall := FunctionName "(" FuncArgs ")" |
19+
FunctionName "(" FuncArgs ")" FieldAccessExpression*
20+
21+
FuncArgs := Expression (, Expression)*
22+
23+
FieldAccessExpression := "[" string "]" | "[" number "]"
24+
```
25+
26+
Basically can be `*`, `key` or `value` and you can use AS keyword to rename it. Such as:
27+
28+
```
29+
# same as select key, value where key ^= "prefix"
30+
select * where key ^= "prefix"
31+
32+
# rename key to f1 and value to f2 in result set
33+
select key as f1, value as f2 where key ^= "prefix"
34+
```
35+
36+
The `key` is key-value pair's key, and aslo `value` is the value.
37+
38+
If using function it support functions shows below:
39+
40+
| Function | Description |
41+
| -------- | ----------- |
42+
| lower(value: str): str | convert value string into lower case |
43+
| upper(value: str): str | convert value string into upper case |
44+
| int(value: any): int | convert value into integer, if cannot convert to integer just return error
45+
| float(value: any): float | convert value into float, if cannot convert to float just return error |
46+
| str(value: any): str | convert value into string |
47+
| is_int(value: any): bool | return is value can be converted into integer |
48+
| is_float(value: any): bool | return is value can be converted into float |
49+
| substr(value: str, start: int, end: int): str | return substring of value from `start` position to `end` position |
50+
| split(value: str, spliter: str): list | split value into a string list by spliter string |
51+
| list(elem1: any, elem2: any...): list | convert many elements into a list, list elements' type must be same, the list type support `int`, `str`, `float` types |
52+
| float_list(elem1: float, elem2: float...): list | convert many float elements into a list |
53+
| flist(elem1: float, elem2: float...): list | same as float_list |
54+
| int_list(elem1: int, elem2: int...): list | convert many integer elements into a list |
55+
| ilist(elem1: int, elem2: int...): list | same as int_list |
56+
| len(value: list): int | return value list length |
57+
| l2_distance(left: list, right: list): float | calculate l2 distance of two list |
58+
| cosine_distance(left: list, right: list): float | calculate cosine distance of two list |
59+
| json(value: str): json | parse string value into json type |
60+
| join(seperator: str, val1: any, val2: any...): str | join values by seperator |
61+
62+
You can use any of the functions above in field expression, such as:
63+
64+
```
65+
# Convert value into int type
66+
select key, int(value) where key ^= "prefix"
67+
68+
# Convert value into int type and do some math on it
69+
select key, ((int(value) + 1) * 8) where key ^= "prefix"
70+
71+
# Convert value into upper case
72+
select key, upper(value) where key ^= "prefix"
73+
74+
# Calculate l2 distance on two vectors
75+
select key, l2_distance(list(1,2,3,4), split(value, ",")) where key ^= "prefix"
76+
```
77+
78+
And you may notice there has a `json` type and yes you can use `[]` operator to access `json` map and list. And `[]` operator can also use in `list` type.
79+
80+
```
81+
select key, json(value)["key1"]["key2"] where key ^= "prefix"
82+
83+
select key, list(1,2,3,4)[2] where key ^= "prefix"
84+
```
85+
86+
**Filter Expression**
87+
88+
Filter expression followed the `where` keyword, and it contains filter condition expressions.
89+
90+
```
91+
Filter Expression := "!"? Expression
92+
93+
Expression := "(" BinaryExpression | UnaryExpression ")"
94+
95+
UnaryExpression := "key" | "value" | string | number | "true" | "false" | FunctionCall | FieldName
96+
97+
BinaryExpression := Expression Op Expression |
98+
Expression "between" Expression "and" Expression |
99+
Expression "in" "(" Expression (, Expression)* ")" |
100+
Expression "in" FunctionCall |
101+
FunctionCall
102+
103+
Op := MathOp | CompareOp | AndOrOp
104+
MathOp := "+" | "-" | "*" | "/"
105+
AndOrOp := "&" | "|"
106+
CompareOp := "=" | "!=" | "^=" | "~=" | ">" | ">=" | "<" | "<="
107+
108+
FunctionCall := FunctionName "(" FuncArgs ")" |
109+
FunctionName "(" FuncArgs ")" FieldAccessExpression*
110+
111+
FuncArgs := Expression (, Expression)*
112+
113+
FieldAccessExpression := "[" string "]" | "[" number "]"
114+
```
115+
116+
The basic usage of filter expression is filter key as equal or has same prefix. So there has some special compare operator for this:
117+
118+
* `=`: Equals
119+
* `!=`: Not equals
120+
* `^=`: Prefix match
121+
* `~=`: Regexp match
122+
123+
For example:
124+
125+
```
126+
# Key equals "key01"
127+
select * where key = "key01"
128+
129+
# Keys that has "key01" prefix
130+
select * where key ^= "key01"
131+
132+
# Keys that match "^key[0-9]+$"
133+
select * where key ~= "^key[0-9]+$"
134+
```
135+
136+
And we also provide `between` ... `and` expression and `in` expression same as SQL:
137+
138+
```
139+
select * where key between "k" and "l"
140+
141+
select * where key in ("k1", "k2", "k3")
142+
```
143+
144+
To concate more expressions you can use `&` and `|` operator:
145+
146+
```
147+
select * where key in ("k1", "k2", "k3") & value ~= "^prefix[0-9]+"
148+
149+
select * where key ^= "key" | value ^= "val"
150+
```
151+
152+
And then is using field name in filter expression, that will save some characters for SQL writer.
153+
154+
```
155+
# filter value's substring from 2 to 3 (one char) is between "b" to "e"
156+
select key, substr(value, 2, 3) as mid, value where mid between "b" and "e"
157+
```
158+
159+
If you want, you can also do some math on filter expression:
160+
161+
```
162+
select * where key ^= "num" & int(value) + 1 > 10
163+
```
164+
165+
If value is a JSON string and you want to filter data by some fields, you can use field access operator:
166+
167+
```
168+
select * where key ^= "json" & json(value)["user"] = "Bob"
169+
```
170+
171+
**Order By**
172+
173+
Same as SQL, you can use `order by` to sort result set.
174+
175+
```
176+
Order Expression := OrderByField (, OrderByField)*
177+
178+
OrderByField := FieldName (ASC | DESC)?
179+
```
180+
181+
The `FieldName` can be `key`, `value` or the name defined by select:
182+
183+
```
184+
select key, value where key ^= "prefix" order by value
185+
186+
select key, int(value) as snum where key ^= "prefix" order by snum asc, key asc
187+
```
188+
189+
**Limit**
190+
191+
Same as SQL. If one number follow limit keyword just define how many rows return. If two numbers followed, first is how many rows should be skip and the second is how many rows return.
192+
193+
```
194+
select * where key ^= "prefix" limit 10
195+
196+
select * where key ^= "prefix" limit 10, 10
197+
```
198+
199+
### Aggregation
200+
201+
The query language also support aggregation. You can use `GROUP BY` expression like in SQL:
202+
203+
```
204+
Group Expression := FieldName (, FieldName)*
205+
```
206+
207+
Below is the aggregation function list:
208+
209+
| Function | Description |
210+
| -------- | ----------- |
211+
| count(value: int): int | Count value by group |
212+
| sum(value: int): int | Sum value by group |
213+
| avg(value: int): int | Calculate average value by group |
214+
| min(value: int): int | Find the minimum value by group |
215+
| max(value: int): int | Find the maxmum value by group |
216+
| quantile(value: float, percent: float): float | Calculate the Quantile by group |
217+
218+
For example:
219+
220+
```
221+
select count(1), substr(key, 3, 4) as pk where key ^= "k_" group by pk
222+
223+
select count(1), sum(int(value)) as sum, substr(key, 0, 2) as kprefix where key between 'k' and 'l' group by kprefix order by sum desc
224+
```
225+
226+
### Put statement
227+
228+
If you want to insert some data into TiKV, you can use `put` statement.
229+
230+
```
231+
PutStmt := "PUT" KeyValuePair (, KeyValuePair)*
232+
233+
KeyValuePair := "(" Expression "," Expression ")"
234+
```
235+
236+
For example:
237+
238+
```
239+
put ("k1", "v1"), ("k2", "v2")
240+
241+
# Use function call to generate value
242+
put ("k3", upper("value3")), ("k4", join(",", 1, 2, 3, 4))
243+
244+
# use key keyword to generate value
245+
put ("k4", upper("val_" + key))
246+
```
247+
248+
Notice: In put statement you can only use `key` keyword to generate the value. If `value` keyword in statement it will report an syntax error.
249+
250+
### Remove statement
251+
252+
If you want to delete some data from TiKV, you ca use `remove` statement.
253+
254+
```
255+
RemoveStmt := "REMOVE" Expression (, Expression)*
256+
```
257+
258+
For example:
259+
260+
```
261+
remove "k1", "k2"
262+
```
263+
264+
Notice: In remove statement you cannot use `key` and `value` keyword.

query/expression.go

+5-3
Original file line numberDiff line numberDiff line change
@@ -140,9 +140,11 @@ var (
140140
)
141141

142142
type CheckCtx struct {
143-
Fields []Expression
144-
FieldNames []string
145-
FieldTypes []Type
143+
Fields []Expression
144+
FieldNames []string
145+
FieldTypes []Type
146+
NotAllowKey bool
147+
NotAllowValue bool
146148
}
147149

148150
func (c *CheckCtx) GetNamedExpr(name string) (Expression, bool) {

query/expression_exec.go

+3
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,9 @@ func (e *FunctionCallExpr) executeFunc(kv KVPair, funcObj *Function, ctx *Execut
544544
if !funcObj.VarArgs && len(e.Args) != funcObj.NumArgs {
545545
return nil, NewExecuteError(e.GetPos(), "Function %s require %d arguments but got %d", funcObj.Name, funcObj.NumArgs, len(e.Args))
546546
}
547+
if funcObj.VarArgs && len(e.Args) < funcObj.NumArgs {
548+
return nil, NewExecuteError(e.GetPos(), "Function %s require at least %d arguments but got %d", funcObj.Name, funcObj.NumArgs, len(e.Args))
549+
}
547550
return funcObj.Body(kv, e.Args, ctx)
548551
}
549552

query/expression_exec_test.go

+8
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,14 @@ func (t *mockTxn) Get(key []byte) ([]byte, error) {
139139
return nil, nil
140140
}
141141

142+
func (t *mockTxn) Put(key []byte, value []byte) error {
143+
return nil
144+
}
145+
146+
func (t *mockTxn) Delete(key []byte) error {
147+
return nil
148+
}
149+
142150
func (t *mockTxn) Cursor() (Cursor, error) {
143151
return &mockSmokeCursor{
144152
txn: t,

query/expression_test.go

+8
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,14 @@ func (t *mockQueryTxn) Get(key []byte) ([]byte, error) {
3131
return nil, nil
3232
}
3333

34+
func (t *mockQueryTxn) Put(key []byte, value []byte) error {
35+
return nil
36+
}
37+
38+
func (t *mockQueryTxn) Delete(key []byte) error {
39+
return nil
40+
}
41+
3442
func (t *mockQueryTxn) Cursor() (Cursor, error) {
3543
return &mockCursor{
3644
data: t.data,

query/filter_optimizer_test.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@ import (
88

99
func optimizeQuery(query string) (*ScanType, error) {
1010
p := NewParser(query)
11-
stmt, err := p.Parse()
11+
gstmt, err := p.Parse()
1212
if err != nil {
1313
return nil, err
1414
}
15+
stmt := gstmt.(*SelectStmt)
1516
o := NewFilterOptimizer(stmt.Where, nil, nil)
1617
ret := o.optimizeExpr(stmt.Where.Expr)
1718
return ret, nil

query/func.go

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ var (
2424
"flist": &Function{"flist", 1, true, TLIST, funcFloatList, funcFloatListVec},
2525
"ilist": &Function{"ilist", 1, true, TLIST, funcIntList, funcIntListVec},
2626
"len": &Function{"len", 1, false, TNUMBER, funcLen, funcLenVec},
27+
"join": &Function{"join", 2, true, TSTR, funcJoin, funcJoinVec},
2728

2829
"cosine_distance": &Function{"cosine_distance", 2, false, TNUMBER, funcCosineDistance, funcCosineDistanceVec},
2930
"l2_distance": &Function{"l2_distance", 2, false, TNUMBER, funcL2Distance, funcL2DistanceVec},

query/kv.go

+2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ package query
22

33
type Txn interface {
44
Get(key []byte) (value []byte, err error)
5+
Put(key []byte, value []byte) error
6+
Delete(key []byte) error
57
Cursor() (cursor Cursor, err error)
68
}
79

0 commit comments

Comments
 (0)