From 54b62b5eeae5c6e655906656d22be527d40b6f2d Mon Sep 17 00:00:00 2001
From: Andrew Ray <me@andrewray.me>
Date: Fri, 10 May 2024 21:41:56 -0700
Subject: [PATCH] Minor refactor to check for "definedX" macros, readme update,
 version bump

---
 README.md                                   | 28 +++++++++---
 package.json                                |  2 +-
 src/preprocessor/preprocessor-grammar.pegjs | 48 ++++++++-------------
 src/preprocessor/preprocessor.test.ts       | 38 ++++++++++------
 4 files changed, 66 insertions(+), 50 deletions(-)

diff --git a/README.md b/README.md
index 1c38ce1..6ff1e79 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@ npm install --save @shaderfrog/glsl-parser
 
 ## Parsing
 
-```javascript
+```typescript
 import { parser, generate } from '@shaderfrog/glsl-parser';
 
 // To parse a GLSL program's source code into an AST:
@@ -71,7 +71,7 @@ operator, and `#if` expressions can only operate on integer constants, not other
 types of data. The Shaderfrog GLSL preprocessor can't be used as a C/C++
 preprocessor without modification.
 
-```javascript
+```typescript
 import preprocess from '@shaderfrog/glsl-parser/preprocessor';
 
 // Preprocess a program
@@ -104,7 +104,7 @@ A preprocessed program string can be handed off to the main GLSL parser.
 If you want more  control over preprocessing, the `preprocess` function above is
 a convenience method for approximately the following:
 
-```javascript
+```typescript
 import {
   preprocessAst,
   preprocessComments,
@@ -116,14 +116,14 @@ import {
 const commentsRemoved = preprocessComments(`float a = 1.0;`)
 
 // Parse the source text into an AST
-const program = parser.parse(commentsRemoved);
+const ast = parser.parse(commentsRemoved);
 
 // Then preproces it, expanding #defines, evaluating #ifs, etc
-preprocessAst(program);
+preprocessAst(ast);
 
 // Then convert it back into a program string, which can be passed to the
 // core glsl parser
-const preprocessed = preprocessorGenerate(program);
+const preprocessed = generate(ast);
 ```
 
 ## Scope
@@ -275,7 +275,7 @@ The Shaderfrog parser provides a AST visitor function for manipulating and
 searching an AST. The visitor API loosely follows the [Babel visitor API](https://github.com/jamiebuilds/babel-handbook/blob/master/translations/en/plugin-handbook.md#toc-visitors). A visitor object looks
 like:
 
-```javascript
+```typescript
 const visitors = {
   function_call: {
     enter: (path) => {},
@@ -341,6 +341,20 @@ visit(ast, {
 console.log('There are ', numberOfFunctionCalls, 'function calls');
 ```
 
+You can also visit the preprocessed AST with `visitPreprocessedAst`. Visitors
+follow the same convention outlined above.
+
+```typescript
+import {
+  parser,
+  visitPreprocessedAst,
+} from '@shaderfrog/glsl-parser/preprocessor';
+
+// Parse the source text into an AST
+const ast = parser.parse(`float a = 1.0;`);
+visitPreprocessedAst(ast, visitors);
+```
+
 ### Utility Functions
 
 Rename all the variables in a program:
diff --git a/package.json b/package.json
index 0dc4b78..7ca204b 100644
--- a/package.json
+++ b/package.json
@@ -3,7 +3,7 @@
   "engines": {
     "node": ">=16"
   },
-  "version": "3.1.0",
+  "version": "3.2.0",
   "type": "module",
   "description": "A GLSL ES 1.0 and 3.0 parser and preprocessor that can preserve whitespace and comments",
   "scripts": {
diff --git a/src/preprocessor/preprocessor-grammar.pegjs b/src/preprocessor/preprocessor-grammar.pegjs
index fca70f3..b4681c5 100644
--- a/src/preprocessor/preprocessor-grammar.pegjs
+++ b/src/preprocessor/preprocessor-grammar.pegjs
@@ -70,22 +70,21 @@ CARET = token:"^" _:_? { return node('literal', { literal: token, wsEnd: _ }); }
 AMPERSAND = token:"&" _:_? { return node('literal', { literal: token, wsEnd: _ }); }
 COLON = token:":" _:_? { return node('literal', { literal: token, wsEnd: _ }); }
 
-DEFINE = wsStart:_? token:"#define" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); }
-INCLUDE = wsStart:_? token:"#include" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); }
-LINE = wsStart:_? token:"#line" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); }
-UNDEF = wsStart:_? token:"#undef" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); }
-ERROR = wsStart:_? token:"#error" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); }
-PRAGMA = wsStart:_? token:"#pragma" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); }
-DEFINED = wsStart:_? token:"defined" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); }
-DEFINED_WITH_END_WS = wsStart:_? token:"defined" wsEnd:__ { return node('literal', { literal: token, wsStart, wsEnd }); }
-IF = wsStart:_? token:"#if" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); }
-IFDEF = wsStart:_? token:"#ifdef" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); }
-IFNDEF = wsStart:_? token:"#ifndef" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); }
-ELIF = wsStart:_? token:"#elif" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); }
-ELSE = wsStart:_? token:"#else" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); }
-ENDIF = wsStart:_? token:"#endif" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); }
-VERSION = wsStart:_? token:"#version" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); }
-EXTENSION = wsStart:_? token:"#extension" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); }
+DEFINE = wsStart:_? token:"#define" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); }
+INCLUDE = wsStart:_? token:"#include" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); }
+LINE = wsStart:_? token:"#line" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); }
+UNDEF = wsStart:_? token:"#undef" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); }
+ERROR = wsStart:_? token:"#error" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); }
+PRAGMA = wsStart:_? token:"#pragma" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); }
+DEFINED = wsStart:_? token:"defined" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); }
+IF = wsStart:_? token:"#if" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); }
+IFDEF = wsStart:_? token:"#ifdef" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); }
+IFNDEF = wsStart:_? token:"#ifndef" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); }
+ELIF = wsStart:_? token:"#elif" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); }
+ELSE = wsStart:_? token:"#else" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); }
+ENDIF = wsStart:_? token:"#endif" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); }
+VERSION = wsStart:_? token:"#version" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); }
+EXTENSION = wsStart:_? token:"#extension" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); }
 
 IDENTIFIER = identifier:$([A-Za-z_] [A-Za-z_0-9]*) _:_? { return node('identifier', { identifier, wsEnd: _ }); }
 IDENTIFIER_NO_WS = identifier:$([A-Za-z_] [A-Za-z_0-9]*) { return node('identifier', { identifier }); }
@@ -222,12 +221,9 @@ primary_expression "primary expression"
 unary_expression "unary expression"
   // "defined" is a unary operator, it can appear with optional parens. I'm not
   // sure if it makes sense to have it in the unary_expression section
-  = operator:DEFINED lp:LEFT_PAREN identifier:IDENTIFIER rp:RIGHT_PAREN {
+  = operator:DEFINED lp:LEFT_PAREN? identifier:IDENTIFIER rp:RIGHT_PAREN? {
     return node('unary_defined', { operator, lp, identifier, rp, });
   }
-  / operator:DEFINED_WITH_END_WS identifier:IDENTIFIER {
-    return node('unary_defined', { operator, identifier});
-  }
   / operator:(PLUS / DASH / BANG / TILDE)
     expression:unary_expression {
       return node('unary', { operator, expression });
@@ -327,18 +323,10 @@ logical_or_expression "logical or expression"
 // I added this as a maybe entry point to expressions
 constant_expression "constant expression" = logical_or_expression
 
-// Must have a space or a comment
-__ "whitespace or comment" = w:whitespace rest:(comment whitespace?)* {
-  return collapse(w, rest);
-}
-/ c:comment rest:(whitespace comment?)* {
-  return collapse(c, rest);
-}
-
 // The whitespace is optional so that we can put comments immediately after
 // terminals, like void/* comment */
 // The ending whitespace is so that linebreaks can happen after comments
-_ "whitespace or comment or null" = w:whitespace? rest:(comment whitespace?)* {
+_ "whitespace or comment" = w:whitespace? rest:(comment whitespace?)* {
   return collapse(w, rest);
 }
 
@@ -355,3 +343,5 @@ single_comment = $('//' [^\n\r]*)
 multiline_comment = $("/*" inner:(!"*/" i:. { return i; })* "*/")
 
 whitespace "whitespace" = $[ \t]+
+
+terminal = ![A-Za-z_0-9] _:_? { return _; }
diff --git a/src/preprocessor/preprocessor.test.ts b/src/preprocessor/preprocessor.test.ts
index 910f98c..4433a90 100644
--- a/src/preprocessor/preprocessor.test.ts
+++ b/src/preprocessor/preprocessor.test.ts
@@ -81,7 +81,7 @@ before if
       #if A == 1 || B == 2
       inside if
       #define A
-          #elif A == 1 || defined(B) && C == 2
+          #elif A == 1 || defined B && C == 2
           float a;
           #elif A == 1 || defined(B) && C == 2
           float a;
@@ -159,7 +159,7 @@ before if
 #if !defined(A) && (defined(B) && C == 2)
 inside first if
 #endif
-#if ((defined(B) && C == 2) || defined(A))
+#if ((defined B && C == 2) || defined(A))
 inside second if
 #endif
 after if
@@ -477,23 +477,35 @@ test('generate #ifdef & #ifndef & #else', () => {
 `);
 });
 
-
-test('parse defined && defined() && definedXXX', () => {
+test('test macro with "defined" at start of name', () => {
   const program = `
-#if defined AAA && defined/**/BBB && defined/**/ CCC && definedXXX && defined(DDD)
+#define definedX 1
+#if defined(definedX) && defined definedX && definedX 
+true
 #endif
 `;
+  expectParsedProgram(program);
   const ast = parse(program);
-  const astStr = JSON.stringify(ast);
-  expect(astStr.includes('"identifier":"definedXXX"')).toBeTruthy();
-  expect(astStr.includes('"identifier":"AAA"')).toBeTruthy();
-  expect(astStr.includes('"identifier":"BBB"')).toBeTruthy();
-  expect(astStr.includes('"identifier":"CCC"')).toBeTruthy();
-  expect(astStr.includes('"identifier":"DDD"')).toBeTruthy();
-  expect(astStr.includes('"identifier":"XXX"')).toBeFalsy();
-  expect(astStr.match(/unary_defined/g)?.length).toBe(4);
+  preprocessAst(ast);
+  expect(generate(ast)).toBe(`
+true
+`);
+});
 
+test('inline comments in if statement expression', () => {
+  const program = `
+#define AAA
+#define BBB
+#if defined/**/AAA && defined/**/ BBB
+true
+#endif
+`;
   expectParsedProgram(program);
+  const ast = parse(program);
+  preprocessAst(ast);
+  expect(generate(ast)).toBe(`
+true
+`);
 });
 
 /*