Skip to content

Commit

Permalink
Internal: Add SearchOperatorSet.
Browse files Browse the repository at this point in the history
  • Loading branch information
kohler committed Sep 6, 2024
1 parent c9abeda commit 676e9c6
Show file tree
Hide file tree
Showing 7 changed files with 180 additions and 86 deletions.
3 changes: 2 additions & 1 deletion batch/makedist.sh
Original file line number Diff line number Diff line change
Expand Up @@ -475,9 +475,10 @@ src/search/st_revpref.php
src/search/st_sclass.php
src/search/st_tag.php
src/search/st_topic.php
src/searchatom.php
src/searchexample.php
src/searchexpr.php
src/searchoperator.php
src/searchoperatorset.php
src/searchselection.php
src/searchsplitter.php
src/searchterm.php
Expand Down
17 changes: 9 additions & 8 deletions src/papersearch.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@ class SearchScope {
public $pos1;
/** @var int */
public $pos2;
/** @var ?SearchAtom */
/** @var ?SearchExpr */
public $defkw;
/** @var bool */
public $defkw_error = false;

/** @param int $pos1
* @param int $pos2
* @param ?SearchAtom $defkw */
* @param ?SearchExpr $defkw */
function __construct($pos1, $pos2, $defkw) {
$this->pos1 = $pos1;
$this->pos2 = $pos2;
Expand Down Expand Up @@ -709,7 +709,7 @@ static function escape_word($str) {
}
}

/** @param ?SearchAtom $sa
/** @param ?SearchExpr $sa
* @param string $str
* @param SearchScope $scope
* @param int $depth
Expand Down Expand Up @@ -763,7 +763,7 @@ static private function _canonical_qt($qt) {
}
}

/** @param ?SearchAtom $sa
/** @param ?SearchExpr $sa
* @param string $type
* @param string $qt
* @param Conf $conf
Expand Down Expand Up @@ -840,9 +840,10 @@ static private function _canonical_expression($str, $type, $qt, Conf $conf, $dep
return "";
}
$splitter = new SearchSplitter($str);
$sa = $splitter->parse_expression($type === "all" ? "SPACE" : "SPACEOR");
$sa = $splitter->parse_expression(null, $type === "all" ? "SPACE" : "SPACEOR");
if ($type === "none" && $sa) {
$sax = SearchAtom::make_op(SearchOperator::get("NOT"), 0, strlen($str), null);
$op = SearchOperatorSet::paper_search_operators()->lookup("NOT");
$sax = SearchExpr::make_op($op, 0, strlen($str), null);
$sax->child[] = $sa;
$sa = $sax;
}
Expand Down Expand Up @@ -1146,7 +1147,7 @@ function then_term() {
return $this->_then_term;
}

/** @param ?SearchAtom $a
/** @param ?SearchExpr $a
* @param bool $top
* @return array{int,int} */
private static function strip_show_atom($a, $top) {
Expand All @@ -1157,7 +1158,7 @@ private static function strip_show_atom($a, $top) {
if ($a->op && $a->op->type === "(" && $top && ($ch = $a->child[0] ?? null)) {
return self::strip_show_atom($ch, true);
}
if (!$a->kword && $a->op && !$a->op->unary) {
if (!$a->kword && $a->op && !$a->op->unary()) {
$pos1 = $pos2 = null;
foreach ($a->child as $ch) {
$span = self::strip_show_atom($ch, false);
Expand Down
38 changes: 19 additions & 19 deletions src/searchatom.php → src/searchexpr.php
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
<?php
// searchatom.php -- HotCRP class holding information about search words
// searchexpr.php -- HotCRP class holding information about search words
// Copyright (c) 2006-2024 Eddie Kohler; see LICENSE.

class SearchAtom {
class SearchExpr {
/** @var ?string */
public $kword;
/** @var string */
Expand All @@ -15,17 +15,17 @@ class SearchAtom {
public $pos2;
/** @var ?SearchOperator */
public $op;
/** @var ?list<SearchAtom> */
/** @var ?list<SearchExpr> */
public $child;
/** @var ?SearchAtom */
/** @var ?SearchExpr */
public $parent;

/** @param string $text
* @param int $pos1
* @param ?SearchAtom $parent
* @return SearchAtom */
* @param ?SearchExpr $parent
* @return SearchExpr */
static function make_simple($text, $pos1, $parent = null) {
$sa = new SearchAtom;
$sa = new SearchExpr;
$sa->text = $text;
$sa->kwpos1 = $sa->pos1 = $pos1;
$sa->pos2 = $pos1 + strlen($text);
Expand All @@ -38,10 +38,10 @@ static function make_simple($text, $pos1, $parent = null) {
* @param int $kwpos1
* @param int $pos1
* @param int $pos2
* @param ?SearchAtom $parent
* @return SearchAtom */
* @param ?SearchExpr $parent
* @return SearchExpr */
static function make_keyword($kword, $text, $kwpos1, $pos1, $pos2, $parent = null) {
$sa = new SearchAtom;
$sa = new SearchExpr;
$sa->kword = $kword === "" ? null : $kword;
$sa->text = $text;
$sa->kwpos1 = $kwpos1;
Expand All @@ -54,12 +54,12 @@ static function make_keyword($kword, $text, $kwpos1, $pos1, $pos2, $parent = nul
/** @param SearchOperator $op
* @param int $kwpos1
* @param int $kwpos2
* @param ?SearchAtom $reference
* @return SearchAtom */
* @param ?SearchExpr $reference
* @return SearchExpr */
static function make_op($op, $kwpos1, $kwpos2, $reference) {
$sa = new SearchAtom;
$sa = new SearchExpr;
$sa->op = $op;
if ($op->unary) {
if ($op->unary()) {
$sa->kwpos1 = $sa->pos1 = $kwpos1;
$sa->pos2 = $kwpos2;
$sa->child = [];
Expand All @@ -75,7 +75,7 @@ static function make_op($op, $kwpos1, $kwpos2, $reference) {

/** @return bool */
function is_complete() {
return !$this->op || count($this->child) > ($this->op->unary ? 0 : 1);
return !$this->op || count($this->child) > ($this->op->unary() ? 0 : 1);
}

/** @return bool */
Expand All @@ -84,7 +84,7 @@ function is_incomplete_paren() {
}

/** @param int $pos
* @return SearchAtom */
* @return SearchExpr */
function complete($pos) {
if (!$this->is_complete()) {
$this->pos2 = $pos;
Expand All @@ -101,7 +101,7 @@ function complete($pos) {

/** @param int $pos1
* @param int $pos2
* @return SearchAtom */
* @return SearchExpr */
function complete_paren($pos1, $pos2) {
$a = $this;
$first = $a->op && $a->op->type === "(" && !empty($a->child);
Expand All @@ -116,9 +116,9 @@ function complete_paren($pos1, $pos2) {
return $a;
}

/** @return list<SearchAtom> */
/** @return list<SearchExpr> */
function flattened_children() {
if (!$this->op || $this->op->unary) {
if (!$this->op || $this->op->unary()) {
return $this->child ?? [];
}
$a = [];
Expand Down
48 changes: 20 additions & 28 deletions src/searchoperator.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,46 +9,38 @@ class SearchOperator {
/** @var ?string
* @readonly */
public $subtype;
/** @var bool
* @readonly */
public $unary;
/** @var int
* @readonly */
public $precedence;
/** @var int
* @readonly */
public $flags;

/** @var ?array<string,SearchOperator> */
static private $list = null;
const F_UNARY = 1;
const F_ALLOW_SUBTYPE = 2;
const F_SUBTYPE = 4;
const F_UNNAMED = 8;

/** @param string $type
* @param bool $unary
* @param int $precedence
* @param int $flags
* @param ?string $subtype */
function __construct($type, $unary, $precedence, $subtype = null) {
function __construct($type, $precedence, $flags, $subtype = null) {
$this->type = $type;
$this->subtype = $subtype;
$this->unary = $unary;
$this->precedence = $precedence;
$this->flags = $flags;
}

/** @return bool */
function unary() {
return ($this->flags & self::F_UNARY) !== 0;
}

/** @return ?SearchOperator */
static function get($name) {
if (!self::$list) {
self::$list["("] = new SearchOperator("(", true, 0);
self::$list[")"] = new SearchOperator(")", true, 0);
self::$list["NOT"] = self::$list["-"] = self::$list["!"] =
new SearchOperator("not", true, 8);
self::$list["+"] = new SearchOperator("+", true, 8);
self::$list["SPACE"] = new SearchOperator("space", false, 7);
self::$list["AND"] = self::$list["&&"] =
new SearchOperator("and", false, 6);
self::$list["XOR"] = self::$list["^^"] =
new SearchOperator("xor", false, 5);
self::$list["OR"] = self::$list["||"] =
new SearchOperator("or", false, 4);
self::$list["SPACEOR"] = new SearchOperator("or", false, 3);
self::$list["THEN"] = new SearchOperator("then", false, 2);
self::$list["HIGHLIGHT"] = new SearchOperator("highlight", false, 1);
}
return self::$list[$name] ?? null;
/** @param string $subtype
* @return SearchOperator */
function make_subtype($subtype) {
assert(($this->flags & self::F_ALLOW_SUBTYPE) !== 0);
return new SearchOperator($this->type, $this->precedence, ($this->flags & ~self::F_ALLOW_SUBTYPE) | self::F_SUBTYPE, $subtype);
}
}
110 changes: 110 additions & 0 deletions src/searchoperatorset.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
<?php
// searchoperatorset.php -- HotCRP helper class for search operators
// Copyright (c) 2006-2024 Eddie Kohler; see LICENSE.

class SearchOperatorSet {
/** @var array<string,SearchOperator> */
private $a = [];
/** @var ?string */
private $regex;

/** @var ?SearchOperatorSet */
static private $psops = null;


/** @param string $name
* @param SearchOperator $op */
function define($name, $op) {
// XXX no backslashes allowed
// XXX $name should be ctype_punct or contain no ctype_punct
$this->a[$name] = $op;
$this->regex = null;
}

/** @param string $name
* @return ?SearchOperator */
function lookup($name) {
$op = $this->a[$name] ?? null;
if ($op === null
&& ($colon = strpos($name, ":")) !== false
&& ($xop = $this->a[substr($name, 0, $colon)]) !== null
&& ($xop->flags & SearchOperator::F_ALLOW_SUBTYPE) !== 0) {
$op = $xop->make_subtype(substr($name, $colon + 1));
$this->a[$name] = $op;
}
return $op;
}

/** @return string */
function regex() {
if ($this->regex !== null) {
return $this->regex;
}
$ch = "";
$br = $alnum = [];
foreach ($this->a as $name => $op) {
// XXX need more careful handling of longest-match
if (($op->flags & (SearchOperator::F_SUBTYPE | SearchOperator::F_UNNAMED)) !== 0) {
continue;
}
if (ctype_punct($name)) {
assert(($op->flags & SearchOperator::F_ALLOW_SUBTYPE) === 0);
if (strlen($name) === 1) {
$ch .= preg_quote($name, "/");
} else {
$br[] = preg_quote($name, "/");
}
} else {
$x = preg_quote($name, "/");
if (($op->flags & SearchOperator::F_ALLOW_SUBTYPE) !== 0) {
$x .= '(?::\w+)?';
}
$alnum[] = $x;
}
}
if ($ch !== "") {
$br[] = "[{$ch}]";
}
if (!empty($alnum)) {
$br[] = '(?:' . join("|", $alnum) . ')(?=[\s\(\)]|\z)';
}
$this->regex = '/\G(?:' . join("|", $br) . ')/s';
return $this->regex;
}

/** @return SearchOperatorSet */
static function paper_search_operators() {
if (self::$psops !== null) {
return self::$psops;
}
$psops = new SearchOperatorSet;
$psops->define("(", new SearchOperator("(", 0, SearchOperator::F_UNARY));
$psops->define(")", new SearchOperator(")", 0, SearchOperator::F_UNARY));
$op = new SearchOperator("not", 8, SearchOperator::F_UNARY);
$psops->define("NOT", $op);
$psops->define("not", $op);
$psops->define("-", $op);
$psops->define("!", $op);
$psops->define("+", new SearchOperator("+", 8, SearchOperator::F_UNARY));
$psops->define("SPACE", new SearchOperator("space", 7, SearchOperator::F_UNNAMED));
$op = new SearchOperator("and", 6, 0);
$psops->define("AND", $op);
$psops->define("and", $op);
$psops->define("&&", $op);
$op = new SearchOperator("xor", 5, 0);
$psops->define("XOR", $op);
$psops->define("xor", $op);
$psops->define("^^", $op);
$op = new SearchOperator("or", 4, 0);
$psops->define("OR", $op);
$psops->define("or", $op);
$psops->define("||", $op);
$psops->define("SPACEOR", new SearchOperator("or", 3, SearchOperator::F_UNNAMED));
$op = new SearchOperator("then", 2, 0);
$psops->define("THEN", $op);
$psops->define("then", $op);
$psops->define("HIGHLIGHT", new SearchOperator("highlight", 1, SearchOperator::F_ALLOW_SUBTYPE));
self::$psops = $psops;
return $psops;
}
}
Loading

0 comments on commit 676e9c6

Please sign in to comment.