@@ -372,26 +372,87 @@ module BindingStringParser<BindingStringReaderSig BindingStringReader> {
372
372
this = MkDoubleQuoteToken ( _, _, _, result )
373
373
}
374
374
375
- Token getNext ( ) {
376
- result .getBegin ( ) = this .getEnd ( ) + 1 and
377
- result .getReader ( ) = this .getReader ( )
375
+ /**
376
+ * Gets the next contiguous token after the end of this token.
377
+ */
378
+ Token getNextAfterEnd ( ) {
379
+ // Find the last contained token, then get the next token after that
380
+ result = getNextAfterContained ( ) and
381
+ // Ensure contiguous tokens
382
+ consecutiveToken ( this , result )
378
383
}
379
384
380
- predicate isFirst ( ) {
381
- not exists ( Token other |
382
- other .getBegin ( ) < this .getBegin ( ) and other .getReader ( ) = this .getReader ( )
385
+ bindingset [ t1, t2]
386
+ pragma [ inline_late]
387
+ private predicate consecutiveToken ( Token t1 , Token t2 ) { t1 .getEnd ( ) + 1 = t2 .getBegin ( ) }
388
+
389
+ /**
390
+ * Gets the next token that occurs after the start of this token.
391
+ */
392
+ private Token getNext ( ) {
393
+ exists ( int pos |
394
+ tokenOrdering ( this .getReader ( ) , this , pos ) and
395
+ tokenOrdering ( this .getReader ( ) , result , pos + uniqueTokensAtPosition ( this .getReader ( ) , pos ) )
383
396
)
384
397
}
385
398
399
+ /**
400
+ * Get the last token contained by this token.
401
+ */
402
+ pragma [ noinline]
403
+ private Token getNextAfterContained ( ) {
404
+ exists ( Token lastToken |
405
+ contains ( lastToken ) and
406
+ result = lastToken .getNext ( ) and
407
+ not contains ( result )
408
+ )
409
+ }
410
+
411
+ predicate isFirst ( ) { tokenOrdering ( this .getReader ( ) , this , 1 ) }
412
+
386
413
predicate contains ( Token t ) {
387
- this .getReader ( ) = t .getReader ( ) and
388
- this .getBegin ( ) < t .getBegin ( ) and
389
- this .getEnd ( ) > t .getEnd ( )
414
+ // base case, every token contains itself
415
+ t = this
416
+ or
417
+ // Recursive case, find an existing token contained by this token, and determine whether the next token is
418
+ // contained
419
+ exists ( Token prev |
420
+ this .contains ( prev ) and
421
+ prev .getNext ( ) = t and
422
+ // In the case of overlapping ranges, report tokens that begin inside this token as "contained"
423
+ this .getEnd ( ) >= t .getBegin ( )
424
+ )
425
+ }
426
+
427
+ /**
428
+ * The token `t` is completely contained within this outer token.
429
+ */
430
+ predicate strictContains ( Token t ) {
431
+ this .contains ( t ) and t .getBegin ( ) > this .getBegin ( ) and t .getEnd ( ) < this .getEnd ( )
390
432
}
391
433
392
434
stdlib:: Location getLocation ( ) { result = getReader ( ) .getLocation ( ) }
393
435
}
394
436
437
+ /**
438
+ * Holds if `t` is ordered at `position` according to the location of the beginning of the token.
439
+ *
440
+ * Note: `position` is not contiguous as certain strings may be matched by multiple tokens. In this
441
+ * case those tokens will all have the same `position`, and the subsequent token will have
442
+ * `position + count(tokens_with_current_position)`.
443
+ */
444
+ private predicate tokenOrdering ( BindingStringReader reader , Token t , int position ) {
445
+ t = rank [ position ] ( Token token | token .getReader ( ) = reader | token order by token .getBegin ( ) )
446
+ }
447
+
448
+ /**
449
+ * Identify how many tokens are at a given position in the ordering, i.e. have the same beginning and end.
450
+ */
451
+ private int uniqueTokensAtPosition ( BindingStringReader reader , int position ) {
452
+ tokenOrdering ( reader , _, position ) and
453
+ result = count ( Token t | tokenOrdering ( reader , t , position ) )
454
+ }
455
+
395
456
private class WhiteSpaceToken extends Token , MkWhiteSpaceToken { }
396
457
397
458
private class CommaToken extends Token , MkCommaToken { }
@@ -431,10 +492,10 @@ module BindingStringParser<BindingStringReaderSig BindingStringReader> {
431
492
private class DotToken extends Token , MkDot { }
432
493
433
494
private Token getNextSkippingWhitespace ( Token t ) {
434
- result = t .getNext ( ) and
495
+ result = t .getNextAfterEnd ( ) and
435
496
not result instanceof WhiteSpaceToken
436
497
or
437
- exists ( WhiteSpaceToken ws | ws = t .getNext ( ) | result = getNextSkippingWhitespace ( ws ) )
498
+ exists ( WhiteSpaceToken ws | ws = t .getNextAfterEnd ( ) | result = getNextSkippingWhitespace ( ws ) )
438
499
}
439
500
440
501
private newtype TMemberList =
@@ -625,15 +686,15 @@ module BindingStringParser<BindingStringReaderSig BindingStringReader> {
625
686
private newtype TValue =
626
687
MkNumber ( float n , Token source ) {
627
688
exists ( NumberToken t | t .getValue ( ) .toFloat ( ) = n and source = t |
628
- not any ( StringToken str ) .contains ( t ) and
629
- not any ( NameToken name ) .contains ( t )
689
+ not any ( StringToken str ) .strictContains ( t ) and
690
+ not any ( NameToken name ) .strictContains ( t )
630
691
)
631
692
} or
632
693
MkString ( string s , Token source ) {
633
694
exists ( StringToken t |
634
695
t .( Token ) .getValue ( ) = s and
635
696
t = source and
636
- not any ( NameToken nameToken ) .contains ( t )
697
+ not any ( NameToken nameToken ) .strictContains ( t )
637
698
)
638
699
} or
639
700
MkObject ( MemberList members , Token source ) {
@@ -662,27 +723,27 @@ module BindingStringParser<BindingStringReaderSig BindingStringReader> {
662
723
} or
663
724
MkTrue ( Token source ) {
664
725
exists ( TrueToken t |
665
- not any ( StringToken str ) .contains ( t ) and
666
- not any ( NameToken nameToken ) .contains ( t ) and
726
+ not any ( StringToken str ) .strictContains ( t ) and
727
+ not any ( NameToken nameToken ) .strictContains ( t ) and
667
728
source = t
668
729
)
669
730
} or
670
731
MkFalse ( Token source ) {
671
732
exists ( FalseToken t |
672
- not any ( StringToken str ) .contains ( t ) and
673
- not any ( NameToken nameToken ) .contains ( t ) and
733
+ not any ( StringToken str ) .strictContains ( t ) and
734
+ not any ( NameToken nameToken ) .strictContains ( t ) and
674
735
source = t
675
736
)
676
737
} or
677
738
MkNull ( Token source ) {
678
739
exists ( NullToken t |
679
- not any ( StringToken str ) .contains ( t ) and
680
- not any ( NameToken nameToken ) .contains ( t ) and
740
+ not any ( StringToken str ) .strictContains ( t ) and
741
+ not any ( NameToken nameToken ) .strictContains ( t ) and
681
742
source = t
682
743
)
683
744
} or
684
745
MkName ( Token source ) {
685
- exists ( NameToken t | not any ( StringToken str ) .contains ( t ) and source = t )
746
+ exists ( NameToken t | not any ( StringToken str ) .strictContains ( t ) and source = t )
686
747
} or
687
748
MkIdent ( Token source ) {
688
749
exists ( IdentToken t | source = t and getNextSkippingWhitespace ( t ) instanceof ColonToken )
0 commit comments