From 54b62b5eeae5c6e655906656d22be527d40b6f2d Mon Sep 17 00:00:00 2001 From: Andrew Ray Date: Fri, 10 May 2024 21:41:56 -0700 Subject: [PATCH] Minor refactor to check for "definedX" macros, readme update, version bump --- README.md | 28 +++++++++--- package.json | 2 +- src/preprocessor/preprocessor-grammar.pegjs | 48 ++++++++------------- src/preprocessor/preprocessor.test.ts | 38 ++++++++++------ 4 files changed, 66 insertions(+), 50 deletions(-) diff --git a/README.md b/README.md index 1c38ce1..6ff1e79 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ npm install --save @shaderfrog/glsl-parser ## Parsing -```javascript +```typescript import { parser, generate } from '@shaderfrog/glsl-parser'; // To parse a GLSL program's source code into an AST: @@ -71,7 +71,7 @@ operator, and `#if` expressions can only operate on integer constants, not other types of data. The Shaderfrog GLSL preprocessor can't be used as a C/C++ preprocessor without modification. -```javascript +```typescript import preprocess from '@shaderfrog/glsl-parser/preprocessor'; // Preprocess a program @@ -104,7 +104,7 @@ A preprocessed program string can be handed off to the main GLSL parser. If you want more control over preprocessing, the `preprocess` function above is a convenience method for approximately the following: -```javascript +```typescript import { preprocessAst, preprocessComments, @@ -116,14 +116,14 @@ import { const commentsRemoved = preprocessComments(`float a = 1.0;`) // Parse the source text into an AST -const program = parser.parse(commentsRemoved); +const ast = parser.parse(commentsRemoved); // Then preproces it, expanding #defines, evaluating #ifs, etc -preprocessAst(program); +preprocessAst(ast); // Then convert it back into a program string, which can be passed to the // core glsl parser -const preprocessed = preprocessorGenerate(program); +const preprocessed = generate(ast); ``` ## Scope @@ -275,7 +275,7 @@ The Shaderfrog parser provides a AST visitor function for manipulating and searching an AST. The visitor API loosely follows the [Babel visitor API](https://github.com/jamiebuilds/babel-handbook/blob/master/translations/en/plugin-handbook.md#toc-visitors). A visitor object looks like: -```javascript +```typescript const visitors = { function_call: { enter: (path) => {}, @@ -341,6 +341,20 @@ visit(ast, { console.log('There are ', numberOfFunctionCalls, 'function calls'); ``` +You can also visit the preprocessed AST with `visitPreprocessedAst`. Visitors +follow the same convention outlined above. + +```typescript +import { + parser, + visitPreprocessedAst, +} from '@shaderfrog/glsl-parser/preprocessor'; + +// Parse the source text into an AST +const ast = parser.parse(`float a = 1.0;`); +visitPreprocessedAst(ast, visitors); +``` + ### Utility Functions Rename all the variables in a program: diff --git a/package.json b/package.json index 0dc4b78..7ca204b 100644 --- a/package.json +++ b/package.json @@ -3,7 +3,7 @@ "engines": { "node": ">=16" }, - "version": "3.1.0", + "version": "3.2.0", "type": "module", "description": "A GLSL ES 1.0 and 3.0 parser and preprocessor that can preserve whitespace and comments", "scripts": { diff --git a/src/preprocessor/preprocessor-grammar.pegjs b/src/preprocessor/preprocessor-grammar.pegjs index fca70f3..b4681c5 100644 --- a/src/preprocessor/preprocessor-grammar.pegjs +++ b/src/preprocessor/preprocessor-grammar.pegjs @@ -70,22 +70,21 @@ CARET = token:"^" _:_? { return node('literal', { literal: token, wsEnd: _ }); } AMPERSAND = token:"&" _:_? { return node('literal', { literal: token, wsEnd: _ }); } COLON = token:":" _:_? { return node('literal', { literal: token, wsEnd: _ }); } -DEFINE = wsStart:_? token:"#define" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); } -INCLUDE = wsStart:_? token:"#include" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); } -LINE = wsStart:_? token:"#line" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); } -UNDEF = wsStart:_? token:"#undef" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); } -ERROR = wsStart:_? token:"#error" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); } -PRAGMA = wsStart:_? token:"#pragma" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); } -DEFINED = wsStart:_? token:"defined" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); } -DEFINED_WITH_END_WS = wsStart:_? token:"defined" wsEnd:__ { return node('literal', { literal: token, wsStart, wsEnd }); } -IF = wsStart:_? token:"#if" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); } -IFDEF = wsStart:_? token:"#ifdef" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); } -IFNDEF = wsStart:_? token:"#ifndef" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); } -ELIF = wsStart:_? token:"#elif" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); } -ELSE = wsStart:_? token:"#else" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); } -ENDIF = wsStart:_? token:"#endif" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); } -VERSION = wsStart:_? token:"#version" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); } -EXTENSION = wsStart:_? token:"#extension" wsEnd:_? { return node('literal', { literal: token, wsStart, wsEnd }); } +DEFINE = wsStart:_? token:"#define" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); } +INCLUDE = wsStart:_? token:"#include" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); } +LINE = wsStart:_? token:"#line" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); } +UNDEF = wsStart:_? token:"#undef" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); } +ERROR = wsStart:_? token:"#error" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); } +PRAGMA = wsStart:_? token:"#pragma" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); } +DEFINED = wsStart:_? token:"defined" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); } +IF = wsStart:_? token:"#if" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); } +IFDEF = wsStart:_? token:"#ifdef" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); } +IFNDEF = wsStart:_? token:"#ifndef" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); } +ELIF = wsStart:_? token:"#elif" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); } +ELSE = wsStart:_? token:"#else" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); } +ENDIF = wsStart:_? token:"#endif" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); } +VERSION = wsStart:_? token:"#version" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); } +EXTENSION = wsStart:_? token:"#extension" wsEnd:terminal { return node('literal', { literal: token, wsStart, wsEnd }); } IDENTIFIER = identifier:$([A-Za-z_] [A-Za-z_0-9]*) _:_? { return node('identifier', { identifier, wsEnd: _ }); } IDENTIFIER_NO_WS = identifier:$([A-Za-z_] [A-Za-z_0-9]*) { return node('identifier', { identifier }); } @@ -222,12 +221,9 @@ primary_expression "primary expression" unary_expression "unary expression" // "defined" is a unary operator, it can appear with optional parens. I'm not // sure if it makes sense to have it in the unary_expression section - = operator:DEFINED lp:LEFT_PAREN identifier:IDENTIFIER rp:RIGHT_PAREN { + = operator:DEFINED lp:LEFT_PAREN? identifier:IDENTIFIER rp:RIGHT_PAREN? { return node('unary_defined', { operator, lp, identifier, rp, }); } - / operator:DEFINED_WITH_END_WS identifier:IDENTIFIER { - return node('unary_defined', { operator, identifier}); - } / operator:(PLUS / DASH / BANG / TILDE) expression:unary_expression { return node('unary', { operator, expression }); @@ -327,18 +323,10 @@ logical_or_expression "logical or expression" // I added this as a maybe entry point to expressions constant_expression "constant expression" = logical_or_expression -// Must have a space or a comment -__ "whitespace or comment" = w:whitespace rest:(comment whitespace?)* { - return collapse(w, rest); -} -/ c:comment rest:(whitespace comment?)* { - return collapse(c, rest); -} - // The whitespace is optional so that we can put comments immediately after // terminals, like void/* comment */ // The ending whitespace is so that linebreaks can happen after comments -_ "whitespace or comment or null" = w:whitespace? rest:(comment whitespace?)* { +_ "whitespace or comment" = w:whitespace? rest:(comment whitespace?)* { return collapse(w, rest); } @@ -355,3 +343,5 @@ single_comment = $('//' [^\n\r]*) multiline_comment = $("/*" inner:(!"*/" i:. { return i; })* "*/") whitespace "whitespace" = $[ \t]+ + +terminal = ![A-Za-z_0-9] _:_? { return _; } diff --git a/src/preprocessor/preprocessor.test.ts b/src/preprocessor/preprocessor.test.ts index 910f98c..4433a90 100644 --- a/src/preprocessor/preprocessor.test.ts +++ b/src/preprocessor/preprocessor.test.ts @@ -81,7 +81,7 @@ before if #if A == 1 || B == 2 inside if #define A - #elif A == 1 || defined(B) && C == 2 + #elif A == 1 || defined B && C == 2 float a; #elif A == 1 || defined(B) && C == 2 float a; @@ -159,7 +159,7 @@ before if #if !defined(A) && (defined(B) && C == 2) inside first if #endif -#if ((defined(B) && C == 2) || defined(A)) +#if ((defined B && C == 2) || defined(A)) inside second if #endif after if @@ -477,23 +477,35 @@ test('generate #ifdef & #ifndef & #else', () => { `); }); - -test('parse defined && defined() && definedXXX', () => { +test('test macro with "defined" at start of name', () => { const program = ` -#if defined AAA && defined/**/BBB && defined/**/ CCC && definedXXX && defined(DDD) +#define definedX 1 +#if defined(definedX) && defined definedX && definedX +true #endif `; + expectParsedProgram(program); const ast = parse(program); - const astStr = JSON.stringify(ast); - expect(astStr.includes('"identifier":"definedXXX"')).toBeTruthy(); - expect(astStr.includes('"identifier":"AAA"')).toBeTruthy(); - expect(astStr.includes('"identifier":"BBB"')).toBeTruthy(); - expect(astStr.includes('"identifier":"CCC"')).toBeTruthy(); - expect(astStr.includes('"identifier":"DDD"')).toBeTruthy(); - expect(astStr.includes('"identifier":"XXX"')).toBeFalsy(); - expect(astStr.match(/unary_defined/g)?.length).toBe(4); + preprocessAst(ast); + expect(generate(ast)).toBe(` +true +`); +}); +test('inline comments in if statement expression', () => { + const program = ` +#define AAA +#define BBB +#if defined/**/AAA && defined/**/ BBB +true +#endif +`; expectParsedProgram(program); + const ast = parse(program); + preprocessAst(ast); + expect(generate(ast)).toBe(` +true +`); }); /*