This repository has been archived by the owner on Feb 26, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathRE_TreeNode.cpp
382 lines (338 loc) · 12.7 KB
/
RE_TreeNode.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
/**
* @file RE_TreeNode.cpp
* Implementation of the regular expression tree node class.
* @author Daniel Dreibrodt, Konstantin Steinmiller
*/
#include <stddef.h>
#include <cstdio>
#include <string>
#include <vector>
#include "RE_TreeNode.hpp"
#include "RE_RegularExpression.hpp"
#include "FSA_FiniteStateAutomaton.hpp"
#include "FSA_State.hpp"
using namespace std;
/**
* Creates a new node in the regular expression tree.
* @author Daniel Dreibrodt, Konstantin Steinmiller
* @param c The content of the node. This is either an operator or a literal.
*/
RETreeNode::RETreeNode(string c) {
content = c;
if(content == "<epsilon>") {
//empty literal
content = "";
}
p_left = NULL;
p_right = NULL;
}
RETreeNode::~RETreeNode() {
if(p_left != NULL) {
delete p_left;
p_left = NULL;
}
if(p_right != NULL) {
delete p_right;
p_right = NULL;
}
}
/**
* Gets the content of the node. This is either an operator or a literal.
* @author Daniel Dreibrodt, Konstantin Steinmiller
* @return Node content.
*/
string RETreeNode::getContent() {
return content;
}
/**
* Sets the content of the node. This can either be an operator or a literal.
* Note that you cannot change a node type by giving an operator node a literal value
* or vice versa.
* @author Daniel Dreibrodt, Konstantin Steinmiller
*/
void RETreeNode::setContent(string c) {
if(!isOperator() && RegularExpression::isOperator(c))
throw "Cannot change a terminal to an operator.";
if(isOperator() && !RegularExpression::isOperator(c))
throw "Cannot change an operator to a terminal.";
content = c;
}
/**
* Checks whether the given node represents an operator.
* @author Daniel Dreibrodt, Konstantin Steinmiller
* @return Returns true only if a child node is present and the content of the node is a valid operator.
*/
bool RETreeNode::isOperator() {
return (p_left != NULL || RegularExpression::isOperator(content));
}
/**
* Gets the left operand of this operator. Literals have no left operand.
* @return Left operand.
* @author Daniel Dreibrodt, Konstantin Steinmiller
*/
RETreeNode *RETreeNode::getLeft() {
return p_left;
}
/**
* Gets the right operand of this operator. Literals have no right operand.
* @return Right operand.
* @author Daniel Dreibrodt, Konstantin Steinmiller
*/
RETreeNode *RETreeNode::getRight() {
return p_right;
}
/**
* Sets the left operand of this operator. Literals can have no left operand.
* @param p_l Left operand.
* @author Daniel Dreibrodt, Konstantin Steinmiller
*/
void RETreeNode::setLeft(RETreeNode *p_l) {
if(isOperator()||p_l==NULL)
p_left = p_l;
else
throw "Node ("+content+") does not represent an operator. Cannot add left child: "+p_l->getContent();
}
/**
* Sets the right operand of this operator. Literals and Kleene-Stars can have no right operand.
* @param p_r Right operand.
* @author Daniel Dreibrodt, Konstantin Steinmiller
*/
void RETreeNode::setRight(RETreeNode *p_r) {
if((isOperator() && content!=RegularExpression::re_starOp) || p_r==NULL)
p_right = p_r;
else
throw "Node ("+content+") does not represent an operator. Cannot add right child: "+p_r->getContent();
}
/**
* Determines whether this tree node represents an empty regular expression.
* @return Whether the regular expression represented by this node is empty.
* @author Daniel Dreibrodt
*/
bool RETreeNode::isEmpty() {
if(content.length()==0) {
return true;
} else if(isOperator()) {
bool lEmpty = (p_left != NULL)?p_left->isEmpty():true;
bool rEmpty = (p_right != NULL)?p_right->isEmpty():true;
return lEmpty && rEmpty;
} else {
return false;
}
}
/**
* Creates a clone of this tree node.
* @return A new tree node that represents the same regular expression tree as this node.
* @author Daniel Dreibrodt
*/
RETreeNode *RETreeNode::clone() {
RETreeNode *node = new RETreeNode(content);
if(p_left!=NULL)
node->setLeft(p_left->clone());
if(p_right!=NULL)
node->setRight(p_right->clone());
return node;
}
/**
* Removes all redundancies from the regular expression.
* So expressions like (<epsilon>)* or (<epsilon>|<epsilon>) will be changed to <epsilon>.
* Expressions like (A.<epsilon>) will be changed to A (if A is a literal, subtree equality is not yet checked).
* Also incomplete subtrees, like operator nodes without children, will be changed to empty nodes.
* @author Daniel Dreibrodt
*/
void RETreeNode::simplify() {
if(p_left!=NULL)
p_left->simplify();
if(p_right!=NULL)
p_right->simplify();
if(isEmpty() && content.length()>0) {
if(p_left!=NULL) {
delete p_left;
p_left = NULL;
}
if(p_right!=NULL) {
delete p_right;
p_right = NULL;
}
content = "";
} else {
if(content == RegularExpression::re_andOp) {
if(p_left->isEmpty()) {
RETreeNode *p_oldRight = p_right;
content = p_oldRight->content;
if(p_oldRight->getLeft()!=NULL) {
setLeft(p_oldRight->getLeft()->clone());
} else {
setLeft(NULL);
}
if(p_oldRight->getRight()!=NULL) {
setRight(p_oldRight->getRight()->clone());
} else {
setRight(NULL);
}
//Delete old node
delete p_oldRight;
p_oldRight = NULL;
} else if(p_right->isEmpty()) {
RETreeNode *p_oldLeft = p_left;
content = p_oldLeft->content;
if(p_oldLeft->getLeft()!=NULL) {
setLeft(p_oldLeft->getLeft()->clone());
} else {
setLeft(NULL);
}
if(p_oldLeft->getRight()!=NULL) {
setRight(p_oldLeft->getRight()->clone());
} else {
setRight(NULL);
}
//Delete old node
delete p_oldLeft;
p_oldLeft = NULL;
}
} else if(content == RegularExpression::re_orOp) {
if(!p_left->isOperator() && !p_right->isOperator()) {
if(p_left->getContent() == p_right->getContent()) {
content = p_left->getContent();
delete p_left;
p_left = NULL;
delete p_right;
p_right = NULL;
}
}
}
}
}
/**
* Generates a non-deterministic finite state automaton representing
* the regular expression tree with this node as its root.
* @param labelNum Pointer to the counter variable that is used for naming the states of the FSA.
* @return A NDFSA representing the regular expression tree with this node as its root.
* @author Daniel Dreibrodt, Konstantin Steinmiller
*/
FiniteStateAutomaton *RETreeNode::toFSA(int *labelNum) {
//Self-developed algorithm
if(isOperator()) {
if(content == RegularExpression::re_andOp) {
// For a concatenation all final states of the FSA for the left subtree
// have to be connected with an empty transition to the start state
// of the FSA for the right subtree.
// All final states of the left FSA have to be turned into normal states.
// The start state of the right FSA has to be turned into a normal state.
FiniteStateAutomaton *leftFSA = p_left->toFSA(labelNum);
FiniteStateAutomaton *rightFSA = p_right->toFSA(labelNum);
vector<State*> *leftStates = leftFSA->getStateList();
vector<State*> *leftFinalStates = leftFSA->getFinalStates();
vector<Transition*> *leftTransitions = leftFSA->getTransitions();
vector<State*> *rightStates = rightFSA->getStateList();
vector<Transition*> *rightTransitions = rightFSA->getTransitions();
State *startStateRight = rightFSA->getStartState();
startStateRight->setStartState(false);
//Add all states of right automaton to left one
leftStates->reserve(leftStates->size()+rightStates->size());
leftStates->insert(leftStates->end(), rightStates->begin(), rightStates->end());
//Add all transitions of right automaton to left one
leftTransitions->reserve(leftTransitions->size()+rightTransitions->size());
leftTransitions->insert(leftTransitions->end(), rightTransitions->begin(), rightTransitions->end());
//One could create a more minimal FSA here by addin the transitions to and from right start state
// for all left final states and remove old right start state and its transitions.
//For each final state of left automaton, add transition to start state of right one
//and turn the final state into a normal one
unsigned int i;
for(i=0;i<leftFinalStates->size();i++) {
State *finalState = leftFinalStates->at(i);
leftFSA->addTransition(finalState,"",startStateRight);
finalState->setFinalState(false);
}
delete rightFSA;
return leftFSA;
} else if(content == RegularExpression::re_orOp) {
// For an boolean or operator a new start state is created
// which has empty transitions to both starting states of the left
// and right subtree's FSAs.
// Those two start states are then turned into normal states.
char *startStateName = new char[20];
sprintf(startStateName, "S%d", *labelNum);
State *startState = new State(startStateName, true, false);
(*labelNum)++;
FiniteStateAutomaton *leftFSA = p_left->toFSA(labelNum);
FiniteStateAutomaton *rightFSA = p_right->toFSA(labelNum);
vector<State*> *leftStates = leftFSA->getStateList();
vector<State*> *rightStates = rightFSA->getStateList();
vector<Transition*> *leftTransitions = leftFSA->getTransitions();
vector<Transition*> *rightTransitions = rightFSA->getTransitions();
State *startStateRight = rightFSA->getStartState();
startStateRight->setStartState(false);
State *startStateLeft = leftFSA->getStartState();
startStateLeft->setStartState(false);
//Add all states of right automaton to left one
leftStates->reserve(leftStates->size()+rightStates->size());
leftStates->insert(leftStates->end(), rightStates->begin(), rightStates->end());
//Add all transitions of right automaton to left one
leftTransitions->reserve(leftTransitions->size()+rightTransitions->size());
leftTransitions->insert(leftTransitions->end(), rightTransitions->begin(), rightTransitions->end());
leftFSA->addState(startState);
leftFSA->addTransition(startState,"",startStateLeft);
leftFSA->addTransition(startState,"",startStateRight);
delete rightFSA;
return leftFSA;
} else if(content == RegularExpression::re_starOp) {
// For a repetition all final states of the left subtree's
// FSA are connected to its starting state.
// Then the starting state is also turned into a final state.
FiniteStateAutomaton *leftFSA = p_left->toFSA(labelNum);
vector<State*> *finalStates = leftFSA->getFinalStates();
State *startState = leftFSA->getStartState();
unsigned int i=0;
for(i=0;i<finalStates->size();i++) {
State *finalState = finalStates->at(i);
leftFSA->addTransition(finalState,"",startState);
finalState->setFinalState(false);
}
startState->setFinalState(true);
return leftFSA;
}
} else {
//For literals create one start state A and one final state B
//The transition between A and B is marked by the literal itself
FiniteStateAutomaton *fsa = new FiniteStateAutomaton();
char *stateAname = new char[20];
sprintf(stateAname, "S%da", *labelNum);
State *stateA = new State(stateAname, true, false);
fsa->addState(stateA);
char *stateBname = new char[20];
sprintf(stateBname, "S%db", *labelNum);
(*labelNum)++;
State *stateB = new State(stateBname, false, true);
fsa->addState(stateB);
fsa->addTransition(stateAname, content, stateBname);
return fsa;
}
return NULL;
}
/**
* Converts a regular expression tree to a string by performing
* an inorder tree walk.
* @return The string representation of the regular expression specified by the given node.
* @author Daniel Dreibrodt, Konstantin Steinmiller
*/
string RETreeNode::toString() {
string s = "";
if(isOperator()) {
s += "(";
if(getLeft() != NULL) {
s += getLeft()->toString();
}
}
if(content.length()==0) {
s += "<epsilon>";
} else {
s += getContent();
}
if(isOperator()) {
if(getRight() != NULL) {
s += getRight()->toString();
}
s += ")";
}
return s;
}