Skip to content

Commit 12f51dc

Browse files
authored
Merge pull request #17 from KvanTTT/atn-remove-uuid-and-shifting-by-2
ATN serialized data: remove shifting by 2, remove UUID
2 parents b56cfb9 + 0d39b81 commit 12f51dc

File tree

1 file changed

+7
-105
lines changed

1 file changed

+7
-105
lines changed

src/Atn/ATNDeserializer.php

Lines changed: 7 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -46,38 +46,7 @@
4646

4747
final class ATNDeserializer
4848
{
49-
/**
50-
* This value should never change. Updates following this version are
51-
* reflected as change in the unique ID SERIALIZED_UUID.
52-
*/
53-
public const SERIALIZED_VERSION = 3;
54-
55-
/**
56-
* This is the earliest supported serialized UUID.
57-
* Stick to serialized version for now, we don't need a UUID instance.
58-
*/
59-
private const BASE_SERIALIZED_UUID = 'AADB8D7E-AEEF-4415-AD2B-8204D6CF042E';
60-
61-
/**
62-
* This UUID indicates the serialized ATN contains two sets of IntervalSets,
63-
* where the second set's values are encoded as 32-bit integers to support
64-
* the full Unicode SMP range up to U+10FFFF.
65-
*/
66-
private const ADDED_UNICODE_SMP = '59627784-3BE5-417A-B9EB-8131A7286089';
67-
68-
/**
69-
* This list contains all of the currently supported UUIDs, ordered by when
70-
* the feature first appeared in this branch.
71-
*/
72-
private const SUPPORTED_UUIDS = [
73-
self::BASE_SERIALIZED_UUID,
74-
self::ADDED_UNICODE_SMP,
75-
];
76-
77-
/**
78-
* This is the current serialized UUID.
79-
*/
80-
private const SERIALIZED_UUID = self::ADDED_UNICODE_SMP;
49+
public const SERIALIZED_VERSION = 4;
8150

8251
/** @var ATNDeserializationOptions */
8352
private $deserializationOptions;
@@ -88,9 +57,6 @@ final class ATNDeserializer
8857
/** @var int */
8958
private $pos = 0;
9059

91-
/** @var string */
92-
private $uuid = '';
93-
9460
/** @var array<int, callable|null>|null */
9561
private $stateFactories;
9662

@@ -102,38 +68,10 @@ public function __construct(?ATNDeserializationOptions $options = null)
10268
$this->deserializationOptions = $options ?? ATNDeserializationOptions::defaultOptions();
10369
}
10470

105-
/**
106-
* Determines if a particular serialized representation of an ATN supports
107-
* a particular feature, identified by the {@see UUID} used for serializing
108-
* the ATN at the time the feature was first introduced.
109-
*
110-
* @param string $feature The {@see UUID} marking the first time the
111-
* feature was supported in the serialized ATN.
112-
* @param string $actualUuid The {@see UUID} of the actual serialized ATN
113-
* which is currently being deserialized.
114-
*
115-
* @return bool `true` if the `actualUuid` value represents a serialized
116-
* ATN at or after the feature identified by `feature` was
117-
* introduced; otherwise, `false`.
118-
*/
119-
protected function isFeatureSupported(string $feature, string $actualUuid) : bool
120-
{
121-
$featureIndex = \array_search($feature, self::SUPPORTED_UUIDS, true);
122-
123-
if ($featureIndex === false) {
124-
return false;
125-
}
126-
127-
$actualUuidIndex = \array_search($actualUuid, self::SUPPORTED_UUIDS, true);
128-
129-
return $actualUuidIndex >= $featureIndex;
130-
}
131-
13271
public function deserialize(string $data) : ATN
13372
{
13473
$this->reset($data);
13574
$this->checkVersion();
136-
$this->checkUUID();
13775
$atn = $this->readATN();
13876
$this->readStates($atn);
13977
$this->readRules($atn);
@@ -145,14 +83,10 @@ public function deserialize(string $data) : ATN
14583
return $this->readInt();
14684
});
14785

148-
// Next, if the ATN was serialized with the Unicode SMP feature,
149-
// deserialize sets with 32-bit arguments <= U+10FFFF.
150-
151-
if ($this->isFeatureSupported(self::ADDED_UNICODE_SMP, $this->uuid)) {
152-
$this->readSets($sets, function () {
153-
return $this->readInt32();
154-
});
155-
}
86+
// Next, deserialize sets with 32-bit arguments <= U+10FFFF.
87+
$this->readSets($sets, function () {
88+
return $this->readInt32();
89+
});
15690

15791
$this->readEdges($atn, $sets);
15892
$this->readDecisions($atn);
@@ -178,10 +112,8 @@ private function reset(string $data) : void
178112
return;
179113
}
180114

181-
$this->data = [StringUtils::codePoint($characters[0])];
182-
for ($i = 1, $length = \count($characters); $i < $length; $i++) {
183-
$code = StringUtils::codePoint($characters[$i]);
184-
$this->data[] = $code > 1 ? $code - 2 : $code + 65533;
115+
for ($i = 0, $length = \count($characters); $i < $length; $i++) {
116+
$this->data[] = StringUtils::codePoint($characters[$i]);
185117
}
186118

187119
$this->pos = 0;
@@ -201,21 +133,6 @@ private function checkVersion() : void
201133
}
202134
}
203135

204-
private function checkUUID() : void
205-
{
206-
$uuid = $this->readUUID();
207-
208-
if (!\in_array($uuid, self::SUPPORTED_UUIDS, true)) {
209-
throw new \InvalidArgumentException(\sprintf(
210-
'Could not deserialize ATN with UUID: %s (expected %s or a legacy UUID).',
211-
$uuid,
212-
self::SERIALIZED_UUID
213-
));
214-
}
215-
216-
$this->uuid = $uuid;
217-
}
218-
219136
private function readATN() : ATN
220137
{
221138
$grammarType = $this->readInt();
@@ -739,21 +656,6 @@ private function readInt32() : int
739656
return $low | ($high << 16);
740657
}
741658

742-
private function readUUID() : string
743-
{
744-
$bb = [];
745-
for ($i=0; $i < 8; $i++) {
746-
$int = $this->readInt();
747-
$bb[] = $int & 0xFF;
748-
$bb[] = ($int >> 8) & 0xFF;
749-
}
750-
751-
$bb = \array_reverse($bb);
752-
$hex = \strtoupper(\bin2hex(\implode(\array_map('chr', $bb))));
753-
754-
return \vsprintf('%s%s-%s-%s-%s-%s%s%s', \str_split($hex, 4));
755-
}
756-
757659
/**
758660
* @param array<IntervalSet> $sets
759661
*/

0 commit comments

Comments
 (0)