From de7ef88d73f99bea966dcb3b5105d61950125200 Mon Sep 17 00:00:00 2001 From: Andrew Ray Date: Thu, 22 Jun 2023 01:20:57 -0700 Subject: [PATCH 1/8] Refactor to support tracking undeclared functions and types This is a significant set of changes, some breaking. The main goal of this change is to support tracking whether or not functions and types have declarations in the scope. The type and function scope entries now have a `definition` key, which points to the definition of a function or type. Features: - The parser now supports overloaded function tracking in scope. A function scope index went from `{ [fnName]: { references: AstNode[] }` to `{ [fnName]: { [overloadSignature]: { declaration?: AstNode, references: AstNode[], ... } } }`. This is a breaking change. Note that if you're using the `renameFunctions` utility function provided by the parser, this change _may_ be opaque to you. - The semantic analysis of this library is still mostly non-existent, but there are now improved warnings for missing function and type definitions - New `failOnWarn` parser option flag to raise errors on things like undefined variables. Breaking API changes: - Adds a new `TypeNameNode` AST node type, to distinguish a type name from an identifier in the AST. If you're using node visitors to visit `identifier` nodes, you'll need a new visitor for `type_name` nodes. - Removes `ParameterDeclaratorNode` and moves everything into `ParameterDeclarationNode` - In the AST node Typescript definitions, any time I didn't know what node was, I put in `any`. I replaced that with `AstNode`. I don't yet know if I want to keep this, because `AstNode` could lead to more issues than it causes. It could lead to type errors and forced casting that wouldn't come along with `any`. Like it might force you to make sure our node isn't a `LiteralNode` even though technically the grammar doesn't allow for that. Internal development: - All of the functions that were defined in `src/parser/glsl-grammar.pegjs` are now rewritten in typescript and extracted into an external file. - Various clean-ups of the grammar, like removing the duplicate path `function_prototype_no_new_scope` - Cleanup of tsconfig.json file - Adds the `tracer` Peggyjs option to the parser, for debugging - Removes preprocessor tests from parse.ast.ts - Breaking out of source code into more logical files --- .github/workflows/main.yml | 2 + .gitignore | 2 + README.md | 134 +++++- build.sh | 3 +- jest.config.js | 3 +- package-lock.json | 18 +- package.json | 4 +- src/ast/{node.ts => ast-types.ts} | 162 ++++--- src/ast/ast.test.ts | 45 +- src/ast/ast.ts | 175 +------ src/ast/index.ts | 3 +- src/ast/visit.ts | 138 ++++++ src/parser/generator.ts | 11 +- src/parser/glsl-grammar.pegjs | 754 +++++++++--------------------- src/parser/grammar.ts | 631 +++++++++++++++++++++++++ src/parser/parse.test.ts | 531 +++------------------ src/parser/parser.d.ts | 8 + src/parser/scope.test.ts | 550 ++++++++++++++++++++++ src/parser/scope.ts | 222 +++++++++ src/parser/test-helpers.ts | 194 ++++++++ src/parser/utils.ts | 103 ++-- src/preprocessor/preprocessor.ts | 98 ++-- tsconfig.json | 29 +- 23 files changed, 2410 insertions(+), 1410 deletions(-) rename src/ast/{node.ts => ast-types.ts} (79%) create mode 100644 src/ast/visit.ts create mode 100644 src/parser/grammar.ts create mode 100644 src/parser/scope.test.ts create mode 100644 src/parser/scope.ts create mode 100644 src/parser/test-helpers.ts diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c2bba0b..c8d43ec 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -25,3 +25,5 @@ jobs: - run: npm ci - name: Run tests run: npm test + - name: Typecheck + run: npx tsc --noEmit diff --git a/.gitignore b/.gitignore index 177e25e..d6f31e4 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ node_modules dist .vscode .DS_Store +tmp +src/parser/parser.js diff --git a/README.md b/README.md index b09b04d..7aca1e5 100644 --- a/README.md +++ b/README.md @@ -28,10 +28,10 @@ npm install --save @shaderfrog/glsl-parser import { parser, generate } from '@shaderfrog/glsl-parser'; // To parse a GLSL program's source code into an AST: -const ast = parser.parse('float a = 1.0;'); +const program = parser.parse('float a = 1.0;'); // To turn a parsed AST back into a source program -const program = generate(ast); +const transpiled = generate(program); ``` The parser accepts an optional second `options` argument: @@ -41,18 +41,24 @@ parser.parse('float a = 1.0;', options); Where `options` is: -```js -{ +```typescript +type ParserOptions = { // Hide warnings. If set to false or not set, then the parser logs warnings - // like undefined functions and variables - quiet: boolean, + // like undefined functions and variables. If `failOnWarn` is set to true, + // warnings will still cause the parser to raise an error. Defaults to false. + quiet: boolean; // The origin of the GLSL, for debugging. For example, "main.js", If the // parser raises an error (specifically a GrammarError), and you call - // error.format([]) on it, the error shows { source: 'main.js', ... } - grammarSource: string, + // error.format([]) on it, the error shows { source: 'main.js', ... }. + // Defaults to null. + grammarSource: string; // If true, sets location information on each AST node, in the form of - // { column: number, line: number, offset: number } - includeLocation: boolean + // { column: number, line: number, offset: number }. Defaults to false. + includeLocation: boolean; + // If true, causes the parser to raise an error instead of log a warning. + // The parser does limited type checking, and things like undeclared variables + // are treated as warnings. Defaults to false. + failOnWarn: boolean; } ``` @@ -76,8 +82,8 @@ console.log(preprocess(` Where `options` is: -```js -{ +```typescript +type PreprocessorOptions = { // Don't strip comments before preprocessing preserveComments: boolean, // Macro definitions to use when preprocessing @@ -109,16 +115,98 @@ import { const commentsRemoved = preprocessComments(`float a = 1.0;`) // Parse the source text into an AST -const ast = parser.parse(commentsRemoved); +const program = parser.parse(commentsRemoved); // Then preproces it, expanding #defines, evaluating #ifs, etc -preprocessAst(ast); +preprocessAst(program); // Then convert it back into a program string, which can be passed to the // core glsl parser -const preprocessed = preprocessorGenerate(ast); +const preprocessed = preprocessorGenerate(program); ``` +## Scope + +`parse()` returns a [`Program`], which has a `scopes` array on it. A scope looks +like: +```typescript +type Scope = { + name: string; + parent?: Scope; + bindings: ScopeIndex; + types: TypeScopeIndex; + functions: FunctionScopeIndex; + location?: LocationObject; +} +``` + +The `name` of a scope is either `"global"`, the name of the function that +introduced the scope, or in anonymous blocks, `"{"`. In each scope, `bindings` represents variables, +`types` represents user-created types (structs in GLSL), and `functions` represents +functions. + +For `bindings` and `types`, the scope index looks like: +```typescript +type ScopeIndex = { + [name: string]: { + declaration?: AstNode; + references: AstNode[]; + } +} +``` + +Where `name` is the name of the variable or type. `declaration` is the AST node +where the variable was declared. In the case the variable is used without being +declared, `declaration` won't be present. If you set the [`failOnWarn` parser +option](#Parsing) to `true`, the parser will throw an error when encountering +an undeclared variable, rather than allow a scope entry without a declaration. + +For `functions`, the scope index is slighty different: +```typescript +type FunctionScopeIndex = { + [name: string]: { + [signature: string]: { + returnType: string; + parameterTypes: string[]; + declaration?: FunctionNode; + references: AstNode[]; + } + } +}; +``` + +Where `name` is the name of the function, and `signature` is a string representing +the function's return and parameter types, in the form of `"returnType: paramType1, paramType2, ..."` +or `"returnType: void"` in the case of no arguments. Each `signature` in this +index represents an "overloaded" function in GLSL, as in: + +```glsl +void someFunction(int x) {}; +void someFunction(int x, int y) {}; +``` + +With this source code, there will be two entries under `name`, one for each +overload signature. The `references` are the uses of that specific overloaded +version of the function. `references` also contains the function prototypes +for the overloaded function, if present. + +In the case there is only one declaration for a function, there will still be +a single entry under `name` with the function's `signature`. + +⚠️ Caution! This parser does very limited type checking. This leads to a known +case where a function call can match to the wrong overload in scope: + +```glsl +void someFunction(float, float); +void someFunction(bool, bool); +someFunction(true, true); // This will be attributed to the wrong scope entry +``` + +The parser doesn't know the type of the operands in the function call, so it +matches based on the name and arity of the functions. + +See also [#Utility-Functions] for renaming scope references. + ## Manipulating and Searching ASTs ### Visitors @@ -283,7 +371,17 @@ and `#extension` have no effect, and can be fully preserved as part of parsing. # Local Development -To run the tests (and do other things), you must first build the parser files -using Peggy. Run `./build.sh` to generate these files. - To work on the tests, run `npx jest --watch`. + +The GLSL grammar definition lives in `src/parser/glsl-grammar.pegjs`. Peggyjs +supports inlining Javascript code in the `.pegjs` file to define utility +functions, but that means you have to write in vanilla Javascript, which is +terrible. Instead, I've pulled out utility functions into the `grammar.ts` +entrypoint. Some functions need access to Peggy's local variables, like +`location(s)`, so the `makeLocals()` function uses a closure to provide that +access. + +To submit a change, please open a pull request. Tests are appreciated! + +See [the Github workflow](.github/workflows/main.yml) for the checks run against +each PR. diff --git a/build.sh b/build.sh index fb57d57..463a813 100755 --- a/build.sh +++ b/build.sh @@ -8,10 +8,9 @@ mkdir -p dist # Compile the typescript project npx tsc -# Build the parers with peggy. Requires tsc to run first for the subfolders npx peggy --cache -o dist/parser/parser.js src/parser/glsl-grammar.pegjs # Manualy copy in the type definitions -cp src/parser/parser.d.ts dist/parser/parser.d.ts +cp src/parser/parser.d.ts dist/parser/ npx peggy --cache -o dist/preprocessor/preprocessor-parser.js src/preprocessor/preprocessor-grammar.pegjs cp src/preprocessor/preprocessor-parser.d.ts dist/preprocessor/preprocessor-parser.d.ts diff --git a/jest.config.js b/jest.config.js index a0ba9a5..91cb476 100644 --- a/jest.config.js +++ b/jest.config.js @@ -1,4 +1,5 @@ module.exports = { - testPathIgnorePatterns: ['dist/'], moduleFileExtensions: ['ts', 'tsx', 'js', 'json', 'pegjs', 'glsl'], + modulePathIgnorePatterns: ['src/parser/parser.js'], + testPathIgnorePatterns: ['dist', 'src/parser/parser.js'], }; diff --git a/package-lock.json b/package-lock.json index 8086bdd..2733b4b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@shaderfrog/glsl-parser", - "version": "1.3.0", + "version": "1.4.2", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "@shaderfrog/glsl-parser", - "version": "1.3.0", + "version": "1.4.2", "license": "ISC", "devDependencies": { "@babel/core": "^7.15.5", @@ -18,7 +18,7 @@ "jest": "^27.0.2", "peggy": "^1.2.0", "prettier": "^2.1.2", - "typescript": "^4.9.3" + "typescript": "^4.9.5" }, "engines": { "node": ">=16" @@ -5114,9 +5114,9 @@ } }, "node_modules/typescript": { - "version": "4.9.3", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.9.3.tgz", - "integrity": "sha512-CIfGzTelbKNEnLpLdGFgdyKhG23CKdKgQPOBc+OUNrkJ2vr+KSzsSV5kq5iWhEQbok+quxgGzrAtGWCyU7tHnA==", + "version": "4.9.5", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.9.5.tgz", + "integrity": "sha512-1FXk9E2Hm+QzZQ7z+McJiHL4NW1F2EzMu9Nq9i3zAaGqibafqYwCVU6WyWAuyQRRzOlxou8xZSyXLEN8oKj24g==", "dev": true, "bin": { "tsc": "bin/tsc", @@ -9212,9 +9212,9 @@ } }, "typescript": { - "version": "4.9.3", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.9.3.tgz", - "integrity": "sha512-CIfGzTelbKNEnLpLdGFgdyKhG23CKdKgQPOBc+OUNrkJ2vr+KSzsSV5kq5iWhEQbok+quxgGzrAtGWCyU7tHnA==", + "version": "4.9.5", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.9.5.tgz", + "integrity": "sha512-1FXk9E2Hm+QzZQ7z+McJiHL4NW1F2EzMu9Nq9i3zAaGqibafqYwCVU6WyWAuyQRRzOlxou8xZSyXLEN8oKj24g==", "dev": true }, "unicode-canonical-property-names-ecmascript": { diff --git a/package.json b/package.json index 876300b..df1de0c 100644 --- a/package.json +++ b/package.json @@ -3,7 +3,7 @@ "engines": { "node": ">=16" }, - "version": "1.4.2", + "version": "2.0.0-beta.0", "description": "A GLSL ES 1.0 and 3.0 parser and preprocessor that can preserve whitespace and comments", "scripts": { "prepare": "npm run build && ./prepublish.sh", @@ -44,6 +44,6 @@ "jest": "^27.0.2", "peggy": "^1.2.0", "prettier": "^2.1.2", - "typescript": "^4.9.3" + "typescript": "^4.9.5" } } diff --git a/src/ast/node.ts b/src/ast/ast-types.ts similarity index 79% rename from src/ast/node.ts rename to src/ast/ast-types.ts index 6d41db7..95d1d55 100644 --- a/src/ast/node.ts +++ b/src/ast/ast-types.ts @@ -1,10 +1,20 @@ /** * This file is written by hand, to map to the parser expression results in * parser/glsl-grammar.pegjs. It very, very likely contains errors. I put in - * *any* types where I was lazy or didn't know the core type. + * *AstNode* types where I was lazy or didn't know the core type. */ -type LocationInfo = { offset: number; line: number; column: number }; +import { Scope } from '../parser/scope'; + +export interface Program { + type: 'program'; + program: AstNode[]; + scopes: Scope[]; + wsStart?: string; + wsEnd?: string; +} + +export type LocationInfo = { offset: number; line: number; column: number }; export type LocationObject = { start: LocationInfo; @@ -35,30 +45,31 @@ export interface IdentifierNode extends BaseNode { whitespace: Whitespace; } +export interface TypeNameNode extends BaseNode { + type: 'type_name'; + identifier: string; + whitespace: Whitespace; +} + export interface ArraySpecifierNode extends BaseNode { type: 'array_specifier'; lb: LiteralNode; - expression: any; + expression: AstNode; rb: LiteralNode; } -export interface ArraySpecifiersNode extends BaseNode { - type: 'array_specifiers'; - specifiers: ArraySpecifierNode[]; -} - export interface AssignmentNode extends BaseNode { type: 'assignment'; - left: any; + left: AstNode; operator: LiteralNode; - right: any; + right: AstNode; } export interface BinaryNode extends BaseNode { type: 'binary'; - operator: any; - left: any; - right: any; + operator: LiteralNode; + left: AstNode; + right: AstNode; } export interface BoolConstantNode extends BaseNode { @@ -76,16 +87,16 @@ export interface BreakStatementNode extends BaseNode { export interface CompoundStatementNode extends BaseNode { type: 'compound_statement'; lb: LiteralNode; - statements: any[]; + statements: AstNode[]; rb: LiteralNode; } export interface ConditionExpressionNode extends BaseNode { type: 'condition_expression'; - specified_type: any; + specified_type: AstNode; identifier: IdentifierNode; operator: LiteralNode; - initializer: any; + initializer: AstNode; } export interface ContinueStatementNode extends BaseNode { @@ -96,29 +107,29 @@ export interface ContinueStatementNode extends BaseNode { export interface DeclarationStatementNode extends BaseNode { type: 'declaration_statement'; - declaration: any; + declaration: AstNode; semi: LiteralNode; } export interface DeclarationNode extends BaseNode { type: 'declaration'; identifier: IdentifierNode; - quantifier: any; + quantifier: ArraySpecifierNode[]; operator: LiteralNode; - initializer: any; + initializer: AstNode; } export interface DeclaratorListNode extends BaseNode { type: 'declarator_list'; - specified_type: any; - declarations: any[]; + specified_type: AstNode; + declarations: AstNode[]; commas: LiteralNode[]; } export interface DefaultCaseNode extends BaseNode { type: 'default_case'; statements: []; - default: any; + default: AstNode; colon: LiteralNode; } @@ -131,10 +142,10 @@ export interface DiscardStatementNode extends BaseNode { export interface DoStatementNode extends BaseNode { type: 'do_statement'; do: KeywordNode; - body: any; + body: AstNode; while: KeywordNode; lp: LiteralNode; - expression: any; + expression: AstNode; rp: LiteralNode; semi: LiteralNode; } @@ -147,7 +158,7 @@ export interface DoubleConstantNode extends BaseNode { export interface ExpressionStatementNode extends BaseNode { type: 'expression_statement'; - expression: any; + expression: AstNode; semi: LiteralNode; } @@ -163,22 +174,33 @@ export interface FloatConstantNode extends BaseNode { whitespace: Whitespace; } +type SimpleStatement = + | ContinueStatementNode + | BreakStatementNode + | ReturnStatementNode + | DiscardStatementNode + | DeclarationStatementNode + | ExpressionStatementNode + | IfStatementNode + | SwitchStatementNode + | WhileStatementNode; + export interface ForStatementNode extends BaseNode { type: 'for_statement'; for: KeywordNode; - body: any; + body: CompoundStatementNode | SimpleStatement; lp: LiteralNode; - init: any; + init: AstNode; initSemi: LiteralNode; - condition: any; + condition: ConditionExpressionNode; conditionSemi: LiteralNode; - operation: any; + operation: AstNode; rp: LiteralNode; } export interface FullySpecifiedTypeNode extends BaseNode { type: 'fully_specified_type'; - qualifiers: any[]; + qualifiers: AstNode[]; specifier: TypeSpecifierNode; } @@ -192,7 +214,7 @@ export interface FunctionCallNode extends BaseNode { type: 'function_call'; identifier: AstNode; lp: LiteralNode; - args: any[]; + args: AstNode[]; rp: LiteralNode; } @@ -206,7 +228,7 @@ export interface FunctionHeaderNode extends BaseNode { export interface FunctionPrototypeNode extends BaseNode { type: 'function_prototype'; header: FunctionHeaderNode; - parameters: any[]; + parameters: ParameterDeclarationNode[]; commas: LiteralNode[]; rp: LiteralNode; } @@ -214,24 +236,24 @@ export interface FunctionPrototypeNode extends BaseNode { export interface GroupNode extends BaseNode { type: 'group'; lp: LiteralNode; - expression: any; + expression: AstNode; rp: LiteralNode; } export interface IfStatementNode extends BaseNode { type: 'if_statement'; if: KeywordNode; - body: any; + body: AstNode; lp: LiteralNode; - condition: any; + condition: AstNode; rp: LiteralNode; - else: any[]; + else: AstNode[]; } export interface InitializerListNode extends BaseNode { type: 'initializer_list'; lb: LiteralNode; - initializers: any[]; + initializers: AstNode[]; commas: LiteralNode[]; rb: LiteralNode; } @@ -244,10 +266,10 @@ export interface IntConstantNode extends BaseNode { export interface InterfaceDeclaratorNode extends BaseNode { type: 'interface_declarator'; - qualifiers: any; - interface_type: any; + qualifiers: AstNode; + interface_type: AstNode; lp: LiteralNode; - declarations: any; + declarations: AstNode; rp: LiteralNode; identifier?: QuantifiedIdentifierNode; } @@ -256,35 +278,30 @@ export interface LayoutQualifierIdNode extends BaseNode { type: 'layout_qualifier_id'; identifier: IdentifierNode; operator: LiteralNode; - expression: any; + expression: AstNode; } export interface LayoutQualifierNode extends BaseNode { type: 'layout_qualifier'; layout: KeywordNode; lp: LiteralNode; - qualifiers: any[]; + qualifiers: AstNode[]; commas: LiteralNode[]; rp: LiteralNode; } export interface ParameterDeclarationNode extends BaseNode { type: 'parameter_declaration'; - qualifier: any[]; - declaration: ParameterDeclaratorNode | TypeSpecifierNode; -} - -export interface ParameterDeclaratorNode extends BaseNode { - type: 'parameter_declarator'; - specifier: any; + qualifier: AstNode[]; + specifier: TypeSpecifierNode; identifier: IdentifierNode; - quantifier: any; + quantifier: ArraySpecifierNode[]; } export interface PostfixNode extends BaseNode { type: 'postfix'; - expression: any; - postfix: any; + expression: AstNode; + postfix: AstNode; } export interface PrecisionNode extends BaseNode { @@ -302,7 +319,7 @@ export interface PreprocessorNode extends BaseNode { export interface QualifierDeclaratorNode extends BaseNode { type: 'qualifier_declarator'; - qualifiers: any[]; + qualifiers: AstNode[]; declarations: IdentifierNode[]; commas: LiteralNode[]; } @@ -310,30 +327,30 @@ export interface QualifierDeclaratorNode extends BaseNode { export interface QuantifiedIdentifierNode extends BaseNode { type: 'quantified_identifier'; identifier: IdentifierNode; - quantifier: any; + quantifier: ArraySpecifierNode[]; } export interface QuantifierNode extends BaseNode { type: 'quantifier'; lb: LiteralNode; - expression: any; + expression: AstNode; rb: LiteralNode; } export interface ReturnStatementNode extends BaseNode { type: 'return_statement'; return: KeywordNode; - expression: any; + expression: AstNode; semi: LiteralNode; } export interface StructNode extends BaseNode { type: 'struct'; lb: LiteralNode; - declarations: any[]; + declarations: AstNode[]; rb: LiteralNode; struct: KeywordNode; - typeName: IdentifierNode; + typeName: TypeNameNode; } export interface StructDeclarationNode extends BaseNode { @@ -353,7 +370,7 @@ export interface SubroutineQualifierNode extends BaseNode { type: 'subroutine_qualifier'; subroutine: KeywordNode; lp: LiteralNode; - type_names: IdentifierNode[]; + type_names: TypeNameNode[]; commas: LiteralNode[]; rp: LiteralNode; } @@ -361,8 +378,8 @@ export interface SubroutineQualifierNode extends BaseNode { export interface SwitchCaseNode extends BaseNode { type: 'switch_case'; statements: []; - case: any; - test: any; + case: AstNode; + test: AstNode; colon: LiteralNode; } @@ -370,26 +387,26 @@ export interface SwitchStatementNode extends BaseNode { type: 'switch_statement'; switch: KeywordNode; lp: LiteralNode; - expression: any; + expression: AstNode; rp: LiteralNode; lb: LiteralNode; - cases: any[]; + cases: AstNode[]; rb: LiteralNode; } export interface TernaryNode extends BaseNode { type: 'ternary'; - expression: any; + expression: AstNode; question: LiteralNode; - left: any; - right: any; + left: AstNode; + right: AstNode; colon: LiteralNode; } export interface TypeSpecifierNode extends BaseNode { type: 'type_specifier'; - specifier: KeywordNode | IdentifierNode | StructNode; - quantifier: any; + specifier: KeywordNode | IdentifierNode | StructNode | TypeNameNode; + quantifier: ArraySpecifierNode[] | null; } export interface UintConstantNode extends BaseNode { @@ -401,24 +418,24 @@ export interface UintConstantNode extends BaseNode { export interface UnaryNode extends BaseNode { type: 'unary'; operator: LiteralNode; - expression: any; + expression: AstNode; } export interface WhileStatementNode extends BaseNode { type: 'while_statement'; while: KeywordNode; lp: LiteralNode; - condition: any; + condition: AstNode; rp: LiteralNode; - body: any; + body: AstNode; } export type AstNode = | LiteralNode | KeywordNode | IdentifierNode + | TypeNameNode | ArraySpecifierNode - | ArraySpecifiersNode | AssignmentNode | BinaryNode | BoolConstantNode @@ -450,7 +467,6 @@ export type AstNode = | LayoutQualifierIdNode | LayoutQualifierNode | ParameterDeclarationNode - | ParameterDeclaratorNode | PostfixNode | PrecisionNode | PreprocessorNode diff --git a/src/ast/ast.test.ts b/src/ast/ast.test.ts index 2e3a531..c73d948 100644 --- a/src/ast/ast.test.ts +++ b/src/ast/ast.test.ts @@ -1,10 +1,21 @@ -import { visit } from '.'; -import { AstNode, BinaryNode } from './node'; +import { AstNode, BinaryNode, IdentifierNode, LiteralNode } from './ast-types'; +import { visit } from './visit'; + +const literal = (literal: string): LiteralNode => ({ + type: 'literal', + literal, + whitespace: '', +}); +const identifier = (identifier: string): IdentifierNode => ({ + type: 'identifier', + identifier, + whitespace: '', +}); test('visit()', () => { const tree: BinaryNode = { type: 'binary', - operator: '-', + operator: literal('-'), // mock location data location: { start: { line: 0, column: 0, offset: 0 }, @@ -12,22 +23,15 @@ test('visit()', () => { }, left: { type: 'binary', - operator: '+', - left: { - type: 'identifier', - identifier: 'foo', - }, - right: { - type: 'identifier', - identifier: 'bar', - }, + operator: literal('+'), + left: identifier('foo'), + right: identifier('bar'), }, right: { type: 'group', - expression: { - type: 'identifier', - identifier: 'baz', - }, + lp: literal('('), + rp: literal(')'), + expression: identifier('baz'), }, }; @@ -40,10 +44,13 @@ test('visit()', () => { enter: (path) => { const { node } = path; if (node.identifier === 'foo') { - grandparent = path.findParent(({ node }) => node.operator === '-') + grandparent = path.findParent( + ({ node }) => node.operator.literal === '-' + )?.node; + parent = path.findParent(({ node }) => node.operator.literal === '+') + ?.node; + unfound = path.findParent(({ node }) => node.operator.literal === '*') ?.node; - parent = path.findParent(({ node }) => node.operator === '+')?.node; - unfound = path.findParent(({ node }) => node.operator === '*')?.node; } }, }, diff --git a/src/ast/ast.ts b/src/ast/ast.ts index 10a371e..868962b 100644 --- a/src/ast/ast.ts +++ b/src/ast/ast.ts @@ -1,169 +1,4 @@ -import type { AstNode, LocationObject } from './node'; - -export type ScopeIndex = { - [name: string]: { references: AstNode[] }; -}; - -export type Scope = { - name: string; - parent?: Scope; - bindings: ScopeIndex; - types: ScopeIndex; - functions: ScopeIndex; - location: LocationObject; -}; - -const isNode = (node: AstNode) => !!node?.type; -const isTraversable = (node: any) => isNode(node) || Array.isArray(node); - -/** - * Converts an AST to a singe value, visiting nodes and using visitor callbacks - * to generate the node's value. TODO: Could this be done with a reducetree - * function? Also this is different than the enter/exit visitors in the ast - * visitor function. Can these be merged into the same strategy? - */ - -export interface Program { - type: 'program'; - program: AstNode[]; - scopes: Scope[]; - wsStart?: string; - wsEnd?: string; -} - -export type Path = { - node: NodeType; - parent: Program | AstNode | undefined; - parentPath: Path | undefined; - key: string | undefined; - index: number | undefined; - skip: () => void; - remove: () => void; - replaceWith: (replacer: AstNode) => void; - findParent: (test: (p: Path) => boolean) => Path | undefined; - - skipped?: boolean; - removed?: boolean; - replaced?: any; -}; - -const makePath = ( - node: NodeType, - parent: AstNode | Program | undefined, - parentPath: Path | undefined, - key: string | undefined, - index: number | undefined -): Path => ({ - node, - parent, - parentPath, - key, - index, - skip: function () { - this.skipped = true; - }, - remove: function () { - this.removed = true; - }, - replaceWith: function (replacer) { - this.replaced = replacer; - }, - findParent: function (test) { - return !parentPath - ? parentPath - : test(parentPath) - ? parentPath - : parentPath.findParent(test); - }, -}); - -export type NodeVisitor = { - enter?: (p: Path) => void; - exit?: (p: Path) => void; -}; - -// This builds a type of all AST types to a visitor type. Aka it builds -// { -// function_call: NodeVisitor, -// ... -// } -// AstNode['type'] is the union of all the type properties of all AST nodes. -// Extract pulls out the type from the AstNode union where the "type" -// property matches the NodeType (like "function_call"). Pretty sweet! -export type NodeVisitors = { - [NodeType in AstNode['type']]?: NodeVisitor< - Extract - >; -} & { program?: NodeVisitor }; - -/** - * Apply the visitor pattern to an AST that conforms to this compiler's spec - */ -const visit = (ast: Program | AstNode, visitors: NodeVisitors) => { - const visitNode = ( - node: AstNode | Program, - parent?: AstNode | Program, - parentPath?: Path, - key?: string, - index?: number - ) => { - const visitor = visitors[node.type]; - const path = makePath(node, parent, parentPath, key, index); - const parentNode = parent as any; - - if (visitor?.enter) { - visitor.enter(path as any); - if (path.removed) { - if (!key || !parent) { - throw new Error( - `Asked to remove ${node} but no parent key was present in ${parent}` - ); - } - if (typeof index === 'number') { - parentNode[key].splice(index, 1); - } else { - parentNode[key] = null; - } - return path; - } - if (path.replaced) { - if (!key || !parent) { - throw new Error( - `Asked to remove ${node} but no parent key was present in ${parent}` - ); - } - if (typeof index === 'number') { - parentNode[key].splice(index, 1, path.replaced); - } else { - parentNode[key] = path.replaced; - } - } - if (path.skipped) { - return path; - } - } - - Object.entries(node) - .filter(([_, nodeValue]) => isTraversable(nodeValue)) - .forEach(([nodeKey, nodeValue]) => { - if (Array.isArray(nodeValue)) { - for (let i = 0, offset = 0; i - offset < nodeValue.length; i++) { - const child = nodeValue[i - offset]; - const res = visitNode(child, node, path, nodeKey, i - offset); - if (res?.removed) { - offset += 1; - } - } - } else { - visitNode(nodeValue, node, path, nodeKey); - } - }); - - visitor?.exit?.(path as any); - }; - - visitNode(ast); -}; +import type { AstNode, Program } from './ast-types'; type NodeGenerator = (node: NodeType) => string; @@ -180,7 +15,7 @@ export type Generator = ( /** * Stringify an AST */ -const makeGenerator = (generators: NodeGenerators): Generator => { +export const makeGenerator = (generators: NodeGenerators): Generator => { const gen = ( ast: Program | AstNode | AstNode[] | string | string[] | undefined | null ): string => @@ -198,7 +33,9 @@ const makeGenerator = (generators: NodeGenerators): Generator => { export type EveryOtherGenerator = (nodes: AstNode[], eo: AstNode[]) => string; -const makeEveryOtherGenerator = (generate: Generator): EveryOtherGenerator => { +export const makeEveryOtherGenerator = ( + generate: Generator +): EveryOtherGenerator => { const everyOther = (nodes: AstNode[], eo: AstNode[]) => nodes.reduce( (output, node, index) => @@ -209,5 +46,3 @@ const makeEveryOtherGenerator = (generate: Generator): EveryOtherGenerator => { ); return everyOther; }; - -export { visit, makeGenerator, makeEveryOtherGenerator }; diff --git a/src/ast/index.ts b/src/ast/index.ts index bdd58c0..94b3dd9 100644 --- a/src/ast/index.ts +++ b/src/ast/index.ts @@ -1,2 +1,3 @@ export * from './ast'; -export * from './node'; +export * from './visit'; +export * from './ast-types'; diff --git a/src/ast/visit.ts b/src/ast/visit.ts new file mode 100644 index 0000000..5f91d10 --- /dev/null +++ b/src/ast/visit.ts @@ -0,0 +1,138 @@ +import type { AstNode, Program } from './ast-types'; + +const isNode = (node: AstNode) => !!node?.type; +const isTraversable = (node: any) => isNode(node) || Array.isArray(node); + +export type Path = { + node: NodeType; + parent: Program | AstNode | undefined; + parentPath: Path | undefined; + key: string | undefined; + index: number | undefined; + skip: () => void; + remove: () => void; + replaceWith: (replacer: AstNode) => void; + findParent: (test: (p: Path) => boolean) => Path | undefined; + + skipped?: boolean; + removed?: boolean; + replaced?: any; +}; + +const makePath = ( + node: NodeType, + parent: AstNode | Program | undefined, + parentPath: Path | undefined, + key: string | undefined, + index: number | undefined +): Path => ({ + node, + parent, + parentPath, + key, + index, + skip: function () { + this.skipped = true; + }, + remove: function () { + this.removed = true; + }, + replaceWith: function (replacer) { + this.replaced = replacer; + }, + findParent: function (test) { + return !parentPath + ? parentPath + : test(parentPath) + ? parentPath + : parentPath.findParent(test); + }, +}); + +export type NodeVisitor = { + enter?: (p: Path) => void; + exit?: (p: Path) => void; +}; + +// This builds a type of all AST types to a visitor type. Aka it builds +// { +// function_call: NodeVisitor, +// ... +// } +// AstNode['type'] is the union of all the type properties of all AST nodes. +// Extract pulls out the type from the AstNode union where the "type" +// property matches the NodeType (like "function_call"). Pretty sweet! +export type NodeVisitors = { + [NodeType in AstNode['type']]?: NodeVisitor< + Extract + >; +} & { program?: NodeVisitor }; + +/** + * Apply the visitor pattern to an AST that conforms to this compiler's spec + */ +export const visit = (ast: Program | AstNode, visitors: NodeVisitors) => { + const visitNode = ( + node: AstNode | Program, + parent?: AstNode | Program, + parentPath?: Path, + key?: string, + index?: number + ) => { + const visitor = visitors[node.type]; + const path = makePath(node, parent, parentPath, key, index); + const parentNode = parent as any; + + if (visitor?.enter) { + visitor.enter(path as any); + if (path.removed) { + if (!key || !parent) { + throw new Error( + `Asked to remove ${node} but no parent key was present in ${parent}` + ); + } + if (typeof index === 'number') { + parentNode[key].splice(index, 1); + } else { + parentNode[key] = null; + } + return path; + } + if (path.replaced) { + if (!key || !parent) { + throw new Error( + `Asked to remove ${node} but no parent key was present in ${parent}` + ); + } + if (typeof index === 'number') { + parentNode[key].splice(index, 1, path.replaced); + } else { + parentNode[key] = path.replaced; + } + } + if (path.skipped) { + return path; + } + } + + Object.entries(node) + .filter(([_, nodeValue]) => isTraversable(nodeValue)) + .forEach(([nodeKey, nodeValue]) => { + if (Array.isArray(nodeValue)) { + for (let i = 0, offset = 0; i - offset < nodeValue.length; i++) { + const child = nodeValue[i - offset]; + const res = visitNode(child, node, path, nodeKey, i - offset); + if (res?.removed) { + offset += 1; + } + } + } else { + visitNode(nodeValue, node, path, nodeKey); + } + }); + + visitor?.exit?.(path as any); + }; + + visitNode(ast); +}; diff --git a/src/parser/generator.ts b/src/parser/generator.ts index 10deba5..66d2cee 100644 --- a/src/parser/generator.ts +++ b/src/parser/generator.ts @@ -92,10 +92,10 @@ const generators: NodeGenerators = { generateWithEveryOther(node.declarations, node.commas), type_specifier: (node) => generate(node.specifier) + generate(node.quantifier), - array_specifiers: (node) => generate(node.specifiers), array_specifier: (node) => generate(node.lb) + generate(node.expression) + generate(node.rb), identifier: (node) => node.identifier + generate(node.whitespace), + type_name: (node) => node.identifier + generate(node.whitespace), function_header: (node) => generate(node.returnType) + generate(node.name) + generate(node.lp), function_prototype: (node) => @@ -107,7 +107,10 @@ const generators: NodeGenerators = { : '') + generate(node.rp), parameter_declaration: (node) => - generate(node.qualifier) + generate(node.declaration), + generate(node.qualifier) + + generate(node.specifier) + + generate(node.identifier) + + generate(node.quantifier), compound_statement: (node) => generate(node.lb) + generate(node.statements) + generate(node.rb), function: (node) => generate(node['prototype']) + generate(node.body), @@ -116,10 +119,6 @@ const generators: NodeGenerators = { generate(node.lp) + generate(node.args) + generate(node.rp), - parameter_declarator: (node) => - generate(node.specifier) + - generate(node.identifier) + - generate(node.quantifier), postfix: (node) => generate(node.expression) + generate(node.postfix), quantifier: (node) => generate(node.lb) + generate(node.expression) + generate(node.rb), diff --git a/src/parser/glsl-grammar.pegjs b/src/parser/glsl-grammar.pegjs index 0c014a8..898704e 100644 --- a/src/parser/glsl-grammar.pegjs +++ b/src/parser/glsl-grammar.pegjs @@ -1,401 +1,73 @@ -// https://www.khronos.org/registry/OpenGL/specs/gl/GLSLangSpec.4.40.pdf -// https://www.khronos.org/registry/OpenGL/specs/gl/GLSLangSpec.4.60.pdf - +/** + * Peggyjs (formerly Peg.js) grammar for Khronos OpenGL ES 3.00. The Khronos + * grammar is not defined as a PEG grammar. This grammar makes the neccessary + * translations for PEG, like making sure productions are defined with specific + * ordering. + * + * Full grammar reference: https://www.khronos.org/registry/OpenGL/specs/gl/GLSLangSpec.4.40.pdf + */ + +// Global parser definitions, shared between all parsers {{ // Apparently peggy can't handle an open curly brace in a string, see // https://github.com/pegjs/pegjs/issues/187 const OPEN_CURLY = String.fromCharCode(123); - // Types (aka struct) scope - const addTypes = (scope, ...types) => { - types.forEach(([identifier, type]) => { - scope.types[identifier] = { - references: [type] - }; - }); - }; - const addTypeReference = (scope, name, reference) => { - scope.types[name].references.push(reference); - }; - const findTypeScope = (scope, typeName) => { - if(!scope) { - return null; - } - if(typeName in scope.types) { - return scope; - } - return findTypeScope(scope.parent, typeName); - } - const isDeclaredType = (scope, typeName) => findTypeScope(scope, typeName) !== null; - - // Bindings (aka variables, parameters) scope - const createBindings = (scope, ...bindings) => { - bindings.forEach(([identifier, binding]) => { - const newBinding = scope.bindings[identifier] || { references: [] }; - newBinding.initializer = binding; - newBinding.references.unshift(binding); - scope.bindings[identifier] = newBinding - }); - }; - const addBindingReference = (scope, name, reference) => { - // In the case of "float a = 1, b = a;" we parse the final "a" before the - // parent declarator list is parsed. So we might need to add the final "a" - // to the scope first. - const foundScope = findBindingScope(scope, name); - if(foundScope) { - // console.log(name, 'found in scope', foundScope); - foundScope.bindings[name].references.push(reference); - } else { - // console.log(name,'not found in current scope, creating binding in', scope); - createBindings(scope, [name, reference]); - } - }; - const findBindingScope = (scope, name) => { - if(!scope) { - return null; - } - if(name in scope.bindings) { - return scope; - } - return findBindingScope(scope.parent, name); - } - - // Function scope - const createFunction = (scope, name, declaration) => { - scope.functions[name] = { references: [declaration] } - }; - const addFunctionReference = (scope, name, reference) => { - const global = findGlobalScope(scope); - if(name in global.functions) { - global.functions[name].references.push(reference); - } else { - createFunction(global, name, reference); - } - }; - const findGlobalScope = scope => scope.parent ? findGlobalScope(scope.parent) : scope; - const isDeclaredFunction = (scope, fnName) => fnName in findGlobalScope(scope).functions; - - // A "partial" is data that's computed as part of a production, but is then - // merged into some higher rule, and doesn't itself become a node. - const partial = (typeNameOrAttrs, attrs) => ({ - partial: - attrs === undefined - ? typeNameOrAttrs - : { - type: typeNameOrAttrs, - ...attrs, - }, - }); - - // Filter out "empty" elements from an array - const xnil = (...args) => args.flat().filter(e => - e !== undefined && e !== null && e !== '' && e.length !== 0 - ) - - // Given an array of nodes with potential null empty values, convert to text. - // Kind of like $(rule) but filters out empty rules - const toText = (...args) => xnil(args).join(''); - - const ifOnly = arr => arr.length > 1 ? arr : arr[0]; - - // Remove empty elements and return value if only 1 element remains - const collapse = (...args) => ifOnly(xnil(args)); - - // Create a left associative tree of nodes - const leftAssociate = (...nodes) => - nodes.flat().reduce((current, [operator, expr]) => ({ - type: 'binary', - operator, - left: current, - right: expr - })); - - - // From https://www.khronos.org/registry/OpenGL-Refpages/gl4/index.php - // excluding gl_ prefixed builtins, which don't appear to be functions - const builtIns = new Set([ - 'abs', - 'acos', - 'acosh', - 'all', - 'any', - 'asin', - 'asinh', - 'atan', - 'atanh', - 'atomicAdd', - 'atomicAnd', - 'atomicCompSwap', - 'atomicCounter', - 'atomicCounterDecrement', - 'atomicCounterIncrement', - 'atomicExchange', - 'atomicMax', - 'atomicMin', - 'atomicOr', - 'atomicXor', - 'barrier', - 'bitCount', - 'bitfieldExtract', - 'bitfieldInsert', - 'bitfieldReverse', - 'ceil', - 'clamp', - 'cos', - 'cosh', - 'cross', - 'degrees', - 'determinant', - 'dFdx', - 'dFdxCoarse', - 'dFdxFine', - 'dFdy', - 'dFdyCoarse', - 'dFdyFine', - 'distance', - 'dot', - 'EmitStreamVertex', - 'EmitVertex', - 'EndPrimitive', - 'EndStreamPrimitive', - 'equal', - 'exp', - 'exp2', - 'faceforward', - 'findLSB', - 'findMSB', - 'floatBitsToInt', - 'floatBitsToUint', - 'floor', - 'fma', - 'fract', - 'frexp', - 'fwidth', - 'fwidthCoarse', - 'fwidthFine', - 'greaterThan', - 'greaterThanEqual', - 'groupMemoryBarrier', - 'imageAtomicAdd', - 'imageAtomicAnd', - 'imageAtomicCompSwap', - 'imageAtomicExchange', - 'imageAtomicMax', - 'imageAtomicMin', - 'imageAtomicOr', - 'imageAtomicXor', - 'imageLoad', - 'imageSamples', - 'imageSize', - 'imageStore', - 'imulExtended', - 'intBitsToFloat', - 'interpolateAtCentroid', - 'interpolateAtOffset', - 'interpolateAtSample', - 'inverse', - 'inversesqrt', - 'isinf', - 'isnan', - 'ldexp', - 'length', - 'lessThan', - 'lessThanEqual', - 'log', - 'log2', - 'matrixCompMult', - 'max', - 'memoryBarrier', - 'memoryBarrierAtomicCounter', - 'memoryBarrierBuffer', - 'memoryBarrierImage', - 'memoryBarrierShared', - 'min', - 'mix', - 'mod', - 'modf', - 'noise', - 'noise1', - 'noise2', - 'noise3', - 'noise4', - 'normalize', - 'not', - 'notEqual', - 'outerProduct', - 'packDouble2x32', - 'packHalf2x16', - 'packSnorm2x16', - 'packSnorm4x8', - 'packUnorm', - 'packUnorm2x16', - 'packUnorm4x8', - 'pow', - 'radians', - 'reflect', - 'refract', - 'round', - 'roundEven', - 'sign', - 'sin', - 'sinh', - 'smoothstep', - 'sqrt', - 'step', - 'tan', - 'tanh', - 'texelFetch', - 'texelFetchOffset', - 'texture', - 'textureGather', - 'textureGatherOffset', - 'textureGatherOffsets', - 'textureGrad', - 'textureGradOffset', - 'textureLod', - 'textureLodOffset', - 'textureOffset', - 'textureProj', - 'textureProjGrad', - 'textureProjGradOffset', - 'textureProjLod', - 'textureProjLodOffset', - 'textureProjOffset', - 'textureQueryLevels', - 'textureQueryLod', - 'textureSamples', - 'textureSize', - 'transpose', - 'trunc', - 'uaddCarry', - 'uintBitsToFloat', - 'umulExtended', - 'unpackDouble2x32', - 'unpackHalf2x16', - 'unpackSnorm2x16', - 'unpackSnorm4x8', - 'unpackUnorm', - 'unpackUnorm2x16', - 'unpackUnorm4x8', - 'usubBorrow', - // GLSL ES 1.00 - 'texture2D', 'textureCube' - ]); + const { + makeLocals, + collapse, + partial, + leftAssociate, + isDeclaredFunction, + findGlobalScope, + makeScopeIndex, + findTypeScope, + isDeclaredType, + findBindingScope, + extractConstant, + quantifiersSignature, + signature, + ifOnly, + xnil, + builtIns, + // This require() without a file extension is an intentional hack. For local + // development, this will find the TypeScript file grammar.ts. When publihsed + // to npm, it will find the compiled Javascript file grammar.js. + } = require('./grammar'); }} -// Per-parse initializations +// Local parser code, unique to each invocation of the parser { - const getLocation = (loc) => { - // Try to avoid calling getLocation() more than neccessary - if(!options.includeLocation) { - return; - } - // Intentionally drop the "source" and "offset" keys from the location object - const { start, end } = loc || location(); - return { start, end }; - } - - // getLocation() (and etc. functions) are not available in global scope, - // so node() is moved to per-parse scope - const node = (type, attrs) => { - const n = { - type, - ...attrs, - } - if(options.includeLocation) { - n.location = getLocation(); - } - return n; - }; - - const makeScope = (name, parent, startLocation) => { - let newLocation = getLocation(startLocation); - - return { - name, - parent, - ...(newLocation ? { location: newLocation } : false), - bindings: {}, - types: {}, - functions: {}, - }; - }; - - const warn = (...args) => !options.quiet && console.warn(...args); - - let scope = makeScope('global'); - let scopes = [scope]; - - const pushScope = scope => { - // console.log('pushing scope at ',text()); - scopes.push(scope); - return scope; - }; - const popScope = scope => { - // console.log('popping scope at ',text()); - if(!scope.parent) { - throw new Error('popped bad scope', scope, 'at', text()); - } - return scope.parent; - }; - const setScopeEnd = (scope, end) => { - if(options.includeLocation) { - if(!scope.location) { - console.error('no end location at', text()); - } - scope.location.end = end; - } + const context = { + options, + location, + text, }; - - // Group the statements in a switch statement into cases / default arrays - const groupCases = (statements) => statements.reduce((cases, stmt) => { - const partial = stmt.partial || {}; - if(partial.type === 'case_label') { - return [ - ...cases, - node( - 'switch_case', - { - statements: [], - case: partial.case, - test: partial.test, - colon: partial.colon, - } - ) - ]; - } else if(partial.type === 'default_label') { - return [ - ...cases, - node( - 'default_case', - { - statements: [], - default: partial.default, - colon: partial.colon, - } - ) - ]; - // It would be nice to encode this in the grammar instead of a manual check - } else if(!cases.length) { - throw new Error('A switch statement body must start with a case or default label'); - } else { - const tail = cases.slice(-1)[0]; - return [...cases.slice(0, -1), { - ...tail, - statements: [ - ...tail.statements, - stmt - ] - }]; - } - }, []); + const { + getLocation, + node, + makeScope, + warn, + pushScope, + popScope, + setScopeEnd, + createFunctionPrototype, + addFunctionCallReference, + createFunctionDefinition, + addTypeReference, + addTypeIfFound, + createType, + addOrCreateBindingReference, + createBindings, + groupCases + } = makeLocals(context); } // Entrypoint to parsing! start = wsStart:_ program:translation_unit { // Set the global scope end to the end of the program - setScopeEnd(scope, getLocation()?.end); - return node('program', { wsStart, program, scopes }); + setScopeEnd(context.scope, getLocation()?.end); + return node('program', { wsStart, program, scopes: context.scopes }); } // "compatibility profile only and vertex language only; same as in when in a @@ -649,30 +321,11 @@ CARET = token:"^" _:_? { return node('literal', { literal: token, whitespace: _ AMPERSAND = token:"&" _:_? { return node('literal', { literal: token, whitespace: _ }); } QUESTION = token:"?" _:_? { return node('literal', { literal: token, whitespace: _ }); } -IDENTIFIER = !keyword identifier:$([A-Za-z_] [A-Za-z_0-9]*) _:_? { return node('identifier', { identifier, whitespace: _ }); } -TYPE_NAME = !keyword ident:IDENTIFIER { - const { identifier } = ident; - - // We do scope checking and parsing all in one pass. In the case of calling an - // undefined function, here, we don't know that we're in a function, so we - // can't warn appropriately. If we return false for the missing typename, the - // program won't parse, since the function call node won't match since it uses - // type_name for the function_identifier. So all we can do here is go on our - // merry way if the type isn't known. - - // This only applies to structs. I'm not sure if it's right. Because TYPE_NAME - // is used in lots of places, it's easier to put this check here. - let found; - if(found = findTypeScope(scope, identifier)) { - addTypeReference(found, identifier, ident); - // I removed this because a type name reference here can't be renamed because - // it's just a string and we don't know the parent node. This might apply - // to the type reference above as well - // } else if(found = findFunctionScope(scope, identifier)) { - // addFunctionReference(found, identifier, identifier); - } - - return ident; +IDENTIFIER = !keyword identifier:$([A-Za-z_] [A-Za-z_0-9]*) _:_? { + return node('identifier', { identifier, whitespace: _ }); +} +TYPE_NAME = !keyword identifier:$([A-Za-z_] [A-Za-z_0-9]*) _:_? { + return node('type_name', { identifier, whitespace: _ }); } // Integers @@ -710,7 +363,7 @@ primary_expression "primary expression" } / ident:IDENTIFIER { const { identifier } = ident; - addBindingReference(scope, identifier, ident); + addOrCreateBindingReference(context.scope, identifier, ident); return ident; } @@ -776,20 +429,30 @@ function_call // won't, so this will be null identifier.specifier.identifier; - const n = node('function_call', { ...identifierPartial, args, rp }); + const n = node('function_call', { ...identifierPartial, args: args || [], rp }); - // struct constructors are stored in scope types, not scope functions, - // skip them (the isDeclaredType check) - const isDeclared = isDeclaredFunction(scope, fnName); + // Scope check for function call if( - fnName && !isDeclaredType(scope, fnName) && - // GLSL has built in functions that users can override - (isDeclared || !builtIns.has(fnName)) + fnName && + // You can override built-in functions like "noise", so only add "noise" + // to scope usage if it's declared by the user + (isDeclaredFunction(context.scope, fnName) || !builtIns.has(fnName)) ) { - if(!isDeclared) { - warn(`Warning: Function "${fnName}" has not been declared`); + // Structs constructors look like function calls. If this is a struct, + // track it as such. Otherwise it becomes a function reference + if(isDeclaredType(context.scope, fnName)) { + if(identifier.type === 'type_specifier') { + addTypeReference( + context.scope, + fnName, + identifier.specifier + ); + } else { + throw new Error(`Unknown function call identifier type ${identifier.type}. Please file a bug against @shaderfrog/glsl-parser and incldue your source grammar.`) + } + } else { + addFunctionCallReference(context.scope, fnName, n); } - addFunctionReference(scope, fnName, n); } return n; @@ -1004,75 +667,87 @@ expression "expression" constant_expression = ternary_expression -declaration_statement = declaration:declaration { - return node( - 'declaration_statement', - { - declaration: declaration[0], - semi: declaration[1], - } - ); -} - // Note the grammar allows prototypes inside function bodies, but: // "Function declarations (prototypes) cannot occur inside of functions; // they must be at global scope, or for the built-in functions, outside the // global scope, otherwise a compile-time error results." -// Don't factor out the semicolon from these lines up into -// "declaration_statement". Doing so causes some productions to consume input -// that's meant for a later production. +// Each statement below has a semicolon it. This deviates from the grammar, but +// is required at least for init_declarator_list_statement - otherwise the +// type_specifier at the start of it consumes "fn" in "fn()", adds the type +// "fn" to the type scope, then backtracks when it hits the semicolon, but has +// a pollute scope. // -// The "function_prototype SEMICOLON" was moved out of this list and into -// function_prototype_no_new_scope, so that fn prototypes go first, then +// "function_prototype" was moved out of this list and into +// "function_prototype_no_new_scope", so that fn prototypes go first, then // functions, then declarations -declaration - = function_prototype_no_new_scope SEMICOLON - // Statements starting with "precision", like "precision highp float" - / precision_declarator SEMICOLON - // Grouped in/out/uniform/buffer declarations with a { members } block after. - / interface_declarator SEMICOLON - // A statement starting with only qualifiers like "in precision a;" - / qualifier_declarator SEMICOLON - // Handles most identifiers. Interface declarator handles layout() {} blocks. - // init_declartor_list needs to come after it, otherwise it eats the layout - // part without handling the open brace after it - / init_declarator_list SEMICOLON - -qualifier_declarator = - qualifiers:type_qualifiers - head:IDENTIFIER? - tail:(COMMA IDENTIFIER)* { +declaration_statement + = declaration:( + // Statements starting with "precision", like "precision highp float" + precision_declarator_statement + // Grouped in/out/uniform/buffer declarations with a { members } block after. + / interface_declarator_statement + // A statement starting with only qualifiers like "in precision a;" + / qualifier_declarator_statement + // Handles most identifiers. Interface declarator handles layout() {} blocks. + // init_declartor_list needs to come after it, otherwise it eats the layout + // part without handling the open brace after it + / init_declarator_list_statement + ) { return node( - 'qualifier_declarator', + 'declaration_statement', { - qualifiers, - // Head is optional, so remove falsey - declarations: xnil([head, ...tail.map(t => t[1])]), - commas: tail.map(t => t[0]) + declaration: declaration.partial.node, + semi: declaration.partial.semi, } ); } -interface_declarator +qualifier_declarator_statement = + qualifiers:type_qualifiers + head:IDENTIFIER? + tail:(COMMA IDENTIFIER)* + semi:SEMICOLON { + return partial({ + node: node( + 'qualifier_declarator', + { + qualifiers, + // Head is optional, so remove falsey + declarations: xnil([head, ...tail.map(t => t[1])]), + commas: tail.map(t => t[0]) + } + ), + semi + }); + } + +interface_declarator_statement = qualifiers:type_qualifiers interface_type:IDENTIFIER lp:LEFT_BRACE declarations:struct_declaration_list rp:RIGHT_BRACE - identifier:quantified_identifier? { + identifier:quantified_identifier? + semi:SEMICOLON { const n = node( 'interface_declarator', { qualifiers, interface_type, lp, declarations, rp, identifier } ); - createBindings(scope, [interface_type.identifier, n]); - return n; + createBindings(context.scope, [interface_type.identifier, n]); + return partial({ + node: n, + semi + }); } -precision_declarator "precision statement" +precision_declarator_statement "precision statement" // As in "precision highp float" - = prefix:PRECISION qualifier:precision_qualifier specifier:type_specifier { - return node('precision', { prefix, qualifier, specifier }); + = prefix:PRECISION qualifier:precision_qualifier specifier:type_specifier semi: SEMICOLON{ + return partial({ + node: node('precision', { prefix, qualifier, specifier }), + semi + }); } function_prototype_new_scope "function prototype" @@ -1083,9 +758,9 @@ function_prototype_new_scope "function prototype" // body. const bindings = (params?.parameters || []) // Ignore any param without an identifier, aka main(void) - .filter(p => !!p.declaration.identifier) - .map(p => [p.declaration.identifier.identifier, p]); - createBindings(scope, ...bindings) + .filter(p => !!p.identifier) + .map(p => [p.identifier.identifier, p]); + createBindings(context.scope, ...bindings) return node('function_prototype', { header, ...params, rp }); } @@ -1098,7 +773,7 @@ function_header_new_scope "function header" 'function_header', { returnType, name, lp } ); - scope = pushScope(makeScope(name.identifier, scope, lp.location)); + context.scope = pushScope(makeScope(name.identifier, context.scope, lp.location)); return n; } @@ -1132,24 +807,17 @@ function_parameters "function parameters" // Parameter note: vec4[1] param and vec4 param[1] are equivalent parameter_declaration "parameter declaration" = qualifier:parameter_qualifier* - declaration:(parameter_declarator / type_specifier) { + specifier:type_specifier + declaration:(IDENTIFIER array_specifiers?)? { return node( 'parameter_declaration', - { qualifier, declaration } - ); - } - -// Note array_specifier is "[const_expr]" -parameter_declarator "parameter declarator" - = specifier:type_specifier - identifier:IDENTIFIER - quantifier:array_specifier? { - const n = node( - 'parameter_declarator', - { specifier, identifier, quantifier } + { + qualifier, + specifier, + identifier: declaration?.[0], + quantifier: declaration?.[1] + } ); - // createBindings(scope, [identifier.identifier, n]); - return n; } // I added this because on page 114, it says formal parameters can only have @@ -1159,31 +827,38 @@ parameter_declarator "parameter declarator" parameter_qualifier = CONST / IN / OUT / INOUT / memory_qualifier / precision_qualifier memory_qualifier = COHERENT / VOLATILE / RESTRICT / READONLY / WRITEONLY -init_declarator_list +init_declarator_list_statement = head:initial_declaration tail:( op:COMMA expr:subsequent_declaration - )* { + )* + semi:SEMICOLON { const declarations = [ head.declaration, ...tail.map(t => t[1]) ].filter(decl => !!decl.identifier); - createBindings(scope, ...declarations.map(decl => [decl.identifier.identifier, decl])); + addTypeIfFound(context.scope, head.specified_type); - return node( - 'declarator_list', - { - specified_type: head.specified_type, - declarations, - commas: tail.map(t => t[0]) - } - ); + // initial_declaration also adds bindings to support "int a = 1, b = a;" + createBindings(context.scope, ...tail.map(t => t[1]).map(decl => [decl.identifier.identifier, decl])); + + return partial({ + node: node( + 'declarator_list', + { + specified_type: head.specified_type, + declarations, + commas: tail.map(t => t[0]) + } + ), + semi + }); } subsequent_declaration = identifier:IDENTIFIER - quantifier:array_specifier? + quantifier:array_specifiers? suffix:( EQUAL initializer )? { @@ -1194,18 +869,26 @@ subsequent_declaration ); } -// declaration > init_declarator_list > single_declaration +// declaration > init_declarator_list initial_declaration - // Apparently "float;" is a legal statement. I have no idea why. + // The grammar allows for "float;" as a legal statement, because + // fully_specified_type is what holds struct_specifier, which lets you define + // a struct without an identifier. = specified_type:fully_specified_type suffix:( - IDENTIFIER array_specifier? (EQUAL initializer)? + IDENTIFIER array_specifiers? (EQUAL initializer)? )? { // No gaurantee of a suffix because fully_specified_type contains a - // type_specifier which includes structs and type_names (IDENTIFIERs) + // type_specifier which includes structs and type_names const [identifier, quantifier, suffix_tail] = suffix || []; const [operator, initializer] = suffix_tail || []; + // This production is used as part of init_declarator_list, where we also + // add bindings, but I add bindings here to support "int a = 1, b = a;" + if(identifier) { + createBindings(context.scope, [identifier.identifier, identifier]); + } + // Break out the specified type so it can be grouped into the // declarator_list return { @@ -1295,7 +978,7 @@ storage_qualifier "storage qualifier" } type_specifier "type specifier" - = specifier:type_specifier_nonarray quantifier:array_specifier? { + = specifier:type_specifier_nonarray quantifier:array_specifiers? { return node('type_specifier', { specifier, quantifier }); } @@ -1324,13 +1007,13 @@ type_specifier_nonarray "type specifier" / IMAGE2DMS / IIMAGE2DMS / UIMAGE2DMS / IMAGE2DMSARRAY / IIMAGE2DMSARRAY / UIMAGE2DMSARRAY / struct_specifier / TYPE_NAME -array_specifier "array specifier" +array_specifiers "array specifier" = specifiers:( lb:LEFT_BRACKET expression:constant_expression? rb:RIGHT_BRACKET { return node('array_specifier', { lb, expression, rb }); } )+ { - return node('array_specifiers', { specifiers }); + return specifiers; } precision_qualifier "precision qualifier" @@ -1338,19 +1021,14 @@ precision_qualifier "precision qualifier" struct_specifier "struct specifier" = struct:STRUCT - typeName:IDENTIFIER? + typeName:TYPE_NAME? lb:LEFT_BRACE declarations:struct_declaration_list rb:RIGHT_BRACE { const n = node('struct', { lb, declarations, rb, struct, typeName }); // Anonymous structs don't get a type name if(typeName) { - addTypes(scope, [typeName.identifier, n]); - - // Struct names also become constructors for functions. Needing to track - // this as both a type and a function makes me think my scope data model - // is probably wrong - // addFunctionReference(scope, typeName.identifier, n); + createType(context.scope, typeName.identifier, n.typeName); } return n; } @@ -1358,6 +1036,7 @@ struct_specifier "struct specifier" struct_declaration_list = ( declaration:struct_declaration semi:SEMICOLON { + addTypeIfFound(context.scope, declaration.specified_type); return node('struct_declaration', { declaration, semi }); } )+ @@ -1366,6 +1045,7 @@ struct_declaration = specified_type:fully_specified_type head:quantified_identifier tail:(COMMA quantified_identifier)* { + if(specified_type) return node( 'struct_declarator', { @@ -1378,7 +1058,7 @@ struct_declaration // Fields inside of structs and interace blocks. They don't show up in scope quantified_identifier - = identifier:IDENTIFIER quantifier:array_specifier? { + = identifier:IDENTIFIER quantifier:array_specifiers? { return node('quantified_identifier', { identifier, quantifier }); } @@ -1414,22 +1094,23 @@ simple_statement / expression_statement / if_statement / switch_statement + // TODO: This does not end in semicolon and returns a partial :O / case_label / iteration_statement // { block of statements } that introduces a new scope compound_statement = lb:(sym:LEFT_BRACE { - scope = pushScope(makeScope(OPEN_CURLY, scope)); + context.scope = pushScope(makeScope(OPEN_CURLY, context.scope)); return sym; }) statements:statement_list? rb:RIGHT_BRACE { // Use start of right bracket, so trailing whitespace is not counted towards // scope range - setScopeEnd(scope, rb.location?.start); + setScopeEnd(context.scope, rb.location?.start); - scope = popScope(scope); + context.scope = popScope(context.scope); return node( 'compound_statement', @@ -1513,7 +1194,7 @@ case_label iteration_statement "iteration statement" = whileSymbol:(sym:WHILE { - scope = pushScope(makeScope('while', scope)); + context.scope = pushScope(makeScope('while', context.scope)); return sym; }) lp:LEFT_PAREN @@ -1522,9 +1203,9 @@ iteration_statement "iteration statement" body:statement_no_new_scope { // use right bracket or fallback to location.end const end = body.rb ? body.rb.location?.start : body.location?.end; - setScopeEnd(scope, end); + setScopeEnd(context.scope, end); - scope = popScope(scope); + context.scope = popScope(context.scope); return node( 'while_statement', @@ -1560,7 +1241,7 @@ iteration_statement "iteration statement" ); } / forSymbol:(sym:FOR { - scope = pushScope(makeScope('for', scope)); + context.scope = pushScope(makeScope('for', context.scope)); return sym; }) lp:LEFT_PAREN @@ -1574,9 +1255,9 @@ iteration_statement "iteration statement" rp:RIGHT_PAREN body:statement_no_new_scope { const end = body.rb ? body.rb.location?.start : body.location?.end; - setScopeEnd(scope, end); + setScopeEnd(context.scope, end); - scope = popScope(scope); + context.scope = popScope(context.scope); return node( 'for_statement', @@ -1606,7 +1287,7 @@ condition 'condition_expression', { specified_type, identifier, operator, initializer } ); - createBindings(scope, [identifier.identifier, n]); + createBindings(context.scope, [identifier.identifier, n]); return n; } / expression @@ -1634,10 +1315,14 @@ preprocessor "prepocessor" = line:$('#' [^\n]*) _:_? { return node('preprocessor // Translation unit is start of grammar translation_unit = (external_declaration / preprocessor)+ -// Definitions without bodies, like "f(vec4, vec4);" +// Definitions without bodies, like "void f(vec4, vec4);" function_prototype_statement = declaration:function_prototype_no_new_scope semi:SEMICOLON { - addFunctionReference(scope, declaration.header.name.identifier, declaration); + (declaration.parameters || []).forEach(p => addTypeIfFound(context.scope, p.specifier)); + addTypeIfFound(context.scope, declaration.header.returnType); + + createFunctionPrototype(context.scope, declaration.header.name.identifier, declaration); + const n = node( 'declaration_statement', { @@ -1648,12 +1333,6 @@ function_prototype_statement = return n; } -function_prototype = - fp:function_prototype_no_new_scope semi:SEMICOLON { - addFunctionReference(scope, fp.header.name.identifier, fp); - return [fp, semi]; - } - // "function_prototype_statement" isn't in the grammar. It's removed from // declaration_statement and added here to catch function prototypes. The issue // is that the other productions cause barfing: @@ -1675,14 +1354,23 @@ external_declaration function_definition = prototype:function_prototype_new_scope body:compound_statement_no_new_scope { - const n = node('function', { prototype, body }); - setScopeEnd(scope, body.rb.location?.start); + setScopeEnd(context.scope, body.rb.location?.start); + + context.scope = popScope(context.scope); - scope = popScope(scope); + // Check the return type and parameters for any customer type usage. This + // has to be done in the global scope, even though function parameters are + // bound to the function scope, becuase the *types* come from the global + // scope. In: + // void main(MyStruct x) { struct MyStruct {...} } + // MyStruct is global, and shouldn't match the inner shadowing MyStruct, so + // the check for types has to be done after we pop the scope + (prototype.parameters || []).forEach(p => addTypeIfFound(context.scope, p.specifier)); + addTypeIfFound(context.scope, prototype.header.returnType); - addFunctionReference(scope, prototype.header.name.identifier, n); + createFunctionDefinition(context.scope, prototype.header.name.identifier, n, n); return n; } @@ -1699,7 +1387,9 @@ comment // can be followed by more multiline comments, or a single comment, and // collapse everything into one array / a:multiline_comment d:( - x:whitespace cc:comment { return xnil(x, cc); } + x:whitespace cc:comment { + return xnil(x, cc); + } )* { return xnil(a, d.flat()); } single_comment = $('//' [^\n]*) diff --git a/src/parser/grammar.ts b/src/parser/grammar.ts new file mode 100644 index 0000000..64c3a5f --- /dev/null +++ b/src/parser/grammar.ts @@ -0,0 +1,631 @@ +/** + * Helper functions used by preprocessor-grammar.pegjs. Also re-exports + * functions from other files used in the grammar. + */ + +import { + AstNode, + CompoundStatementNode, + LocationInfo, + LocationObject, + BinaryNode, + FunctionPrototypeNode, + LiteralNode, + FunctionNode, + FunctionCallNode, + TypeNameNode, + FullySpecifiedTypeNode, + TypeSpecifierNode, +} from '../ast'; +import { ParserOptions } from './parser'; +import { + Scope, + findGlobalScope, + findOverloadDefinition, + findTypeScope, + functionDeclarationSignature, + functionUseSignature, + newOverloadIndex, + isDeclaredFunction, + isDeclaredType, + makeScopeIndex, + findBindingScope, +} from './scope'; + +export { + Scope, + findGlobalScope, + findOverloadDefinition, + findTypeScope, + functionDeclarationSignature, + functionUseSignature, + newOverloadIndex, + isDeclaredFunction, + isDeclaredType, +}; + +export const UNKNOWN_TYPE = 'UNKNOWN TYPE'; + +// Peggyjs globals +type Text = () => string; +type Location = () => LocationObject; + +// Context passed to makeLocals +type Context = { + text: Text; + location: Location; + options: ParserOptions; + scope: Scope; + scopes: Scope[]; +}; + +// A "partial" is data that's computed as part of definition production, but is then +// merged into some higher rule, and doesn't itself become definition node. +export type PartialNode = { partial: any }; +export const partial = (typeNameOrAttrs: string | object, attrs: object) => ({ + partial: + attrs === undefined + ? typeNameOrAttrs + : { + type: typeNameOrAttrs, + ...attrs, + }, +}); + +// Filter out "empty" elements from an array +export const xnil = (...args: any[]) => + args + .flat() + .filter((e) => e !== undefined && e !== null && e !== '' && e.length !== 0); + +// Given an array of nodes with potential null empty values, convert to text. +// Kind of like $(rule) but filters out empty rules +export const toText = (...args: any[]) => xnil(args).join(''); + +export const ifOnly = (arr: any[]) => (arr.length > 1 ? arr : arr[0]); + +// Remove empty elements and return value if only 1 element remains +export const collapse = (...args: any[]) => ifOnly(xnil(args)); + +// Create definition left associative tree of nodes +export const leftAssociate = ( + head: AstNode, + ...tail: [[LiteralNode, AstNode]][] +) => + tail.flat().reduce( + (left, [operator, right]) => ({ + type: 'binary', + operator, + left, + right, + }), + head + ); + +// From https://www.khronos.org/registry/OpenGL-Refpages/gl4/index.php +// excluding gl_ prefixed builtins, which don't appear to be functions +export const builtIns = new Set([ + 'abs', + 'acos', + 'acosh', + 'all', + 'any', + 'asin', + 'asinh', + 'atan', + 'atanh', + 'atomicAdd', + 'atomicAnd', + 'atomicCompSwap', + 'atomicCounter', + 'atomicCounterDecrement', + 'atomicCounterIncrement', + 'atomicExchange', + 'atomicMax', + 'atomicMin', + 'atomicOr', + 'atomicXor', + 'barrier', + 'bitCount', + 'bitfieldExtract', + 'bitfieldInsert', + 'bitfieldReverse', + 'ceil', + 'clamp', + 'cos', + 'cosh', + 'cross', + 'degrees', + 'determinant', + 'dFdx', + 'dFdxCoarse', + 'dFdxFine', + 'dFdy', + 'dFdyCoarse', + 'dFdyFine', + 'distance', + 'dot', + 'EmitStreamVertex', + 'EmitVertex', + 'EndPrimitive', + 'EndStreamPrimitive', + 'equal', + 'exp', + 'exp2', + 'faceforward', + 'findLSB', + 'findMSB', + 'floatBitsToInt', + 'floatBitsToUint', + 'floor', + 'fma', + 'fract', + 'frexp', + 'fwidth', + 'fwidthCoarse', + 'fwidthFine', + 'greaterThan', + 'greaterThanEqual', + 'groupMemoryBarrier', + 'imageAtomicAdd', + 'imageAtomicAnd', + 'imageAtomicCompSwap', + 'imageAtomicExchange', + 'imageAtomicMax', + 'imageAtomicMin', + 'imageAtomicOr', + 'imageAtomicXor', + 'imageLoad', + 'imageSamples', + 'imageSize', + 'imageStore', + 'imulExtended', + 'intBitsToFloat', + 'interpolateAtCentroid', + 'interpolateAtOffset', + 'interpolateAtSample', + 'inverse', + 'inversesqrt', + 'isinf', + 'isnan', + 'ldexp', + 'length', + 'lessThan', + 'lessThanEqual', + 'log', + 'log2', + 'matrixCompMult', + 'max', + 'memoryBarrier', + 'memoryBarrierAtomicCounter', + 'memoryBarrierBuffer', + 'memoryBarrierImage', + 'memoryBarrierShared', + 'min', + 'mix', + 'mod', + 'modf', + 'noise', + 'noise1', + 'noise2', + 'noise3', + 'noise4', + 'normalize', + 'not', + 'notEqual', + 'outerProduct', + 'packDouble2x32', + 'packHalf2x16', + 'packSnorm2x16', + 'packSnorm4x8', + 'packUnorm', + 'packUnorm2x16', + 'packUnorm4x8', + 'pow', + 'radians', + 'reflect', + 'refract', + 'round', + 'roundEven', + 'sign', + 'sin', + 'sinh', + 'smoothstep', + 'sqrt', + 'step', + 'tan', + 'tanh', + 'texelFetch', + 'texelFetchOffset', + 'texture', + 'textureGather', + 'textureGatherOffset', + 'textureGatherOffsets', + 'textureGrad', + 'textureGradOffset', + 'textureLod', + 'textureLodOffset', + 'textureOffset', + 'textureProj', + 'textureProjGrad', + 'textureProjGradOffset', + 'textureProjLod', + 'textureProjLodOffset', + 'textureProjOffset', + 'textureQueryLevels', + 'textureQueryLod', + 'textureSamples', + 'textureSize', + 'transpose', + 'trunc', + 'uaddCarry', + 'uintBitsToFloat', + 'umulExtended', + 'unpackDouble2x32', + 'unpackHalf2x16', + 'unpackSnorm2x16', + 'unpackSnorm4x8', + 'unpackUnorm', + 'unpackUnorm2x16', + 'unpackUnorm4x8', + 'usubBorrow', + // GLSL ES 1.00 + 'texture2D', + 'textureCube', +]); + +/** + * Uses a closure to provide Peggyjs-parser-execution-aware context + */ +export const makeLocals = (context: Context) => { + const getLocation = (loc?: LocationObject) => { + // Try to avoid calling getLocation() more than neccessary + if (!context.options.includeLocation) { + return; + } + // Intentionally drop the "source" and "offset" keys from the location object + const { start, end } = loc || context.location(); + return { start, end }; + }; + + // getLocation() (and etc. functions) are not available in global scope, + // so node() is moved to per-parse scope + const node = (type: AstNode['type'], attrs: any): AstNode => { + const n: AstNode = { + type, + ...attrs, + }; + if (context.options.includeLocation) { + n.location = getLocation(); + } + return n; + }; + + const makeScope = ( + name: string, + parent?: Scope, + startLocation?: LocationObject + ): Scope => { + let newLocation = getLocation(startLocation); + + return { + name, + parent, + ...(newLocation ? { location: newLocation } : false), + bindings: {}, + types: {}, + functions: {}, + }; + }; + + const warn = (message: string): void => { + if (context.options.failOnWarn) { + throw new Error(message); + } + if (!context.options.quiet) { + console.warn(message); + } + }; + + const pushScope = (scope: Scope) => { + context.scopes.push(scope); + return scope; + }; + const popScope = (scope: Scope) => { + if (!scope.parent) { + throw new Error(`Popped bad scope ${scope} at ${context.text()}`); + } + return scope.parent; + }; + + const setScopeEnd = (scope: Scope, end: LocationInfo) => { + if (context.options.includeLocation) { + if (!scope.location) { + console.error(`No end location at ${context.text()}`); + } else { + scope.location.end = end; + } + } + }; + + /** + * Use this when you encounter a function call. warns() if the function is + * not defined or doesn't have a known overload. See the "Caution" note in the + * README for the false positive in findOverloadDefinition() + */ + const addFunctionCallReference = ( + scope: Scope, + name: string, + fnRef: FunctionCallNode + ) => { + const global = findGlobalScope(scope); + + const signature = functionUseSignature(fnRef); + if (!global.functions[name]) { + warn( + `Encountered undeclared function: "${name}" with signature "${signature[2]}"` + ); + global.functions[name] = { + [signature[2]]: newOverloadIndex(signature[0], signature[1], fnRef), + }; + } else { + const existingOverload = findOverloadDefinition( + signature, + global.functions[name] + ); + if (!existingOverload) { + warn( + `No matching overload for function: "${name}" with signature "${signature[2]}"` + ); + global.functions[name][signature[2]] = newOverloadIndex( + signature[0], + signature[1], + fnRef + ); + } else { + existingOverload.references.push(fnRef); + } + } + }; + + /** + * Create a definition for a function in the global scope. Use this when you + * encounter a function definition. + */ + const createFunctionDefinition = ( + scope: Scope, + name: string, + fnRef: FunctionNode + ) => { + const global = findGlobalScope(scope); + + const signature = functionDeclarationSignature(fnRef); + if (!global.functions[name]) { + global.functions[name] = {}; + } + const existing = global.functions[name][signature[2]]; + if (existing) { + if (existing.declaration) { + warn( + `Encountered duplicate function definition: "${name}" with signature "${signature[2]}"` + ); + } else { + existing.declaration = fnRef; + } + existing.references.push(fnRef); + } else { + global.functions[name][signature[2]] = newOverloadIndex( + signature[0], + signature[1], + fnRef + ); + global.functions[name][signature[2]].declaration = fnRef; + } + }; + + /** + * Create a definition for a function prototype. This is *not* the function + * declaration in scope. + */ + const createFunctionPrototype = ( + scope: Scope, + name: string, + fnRef: FunctionPrototypeNode + ) => { + const global = findGlobalScope(scope); + + const signature = functionDeclarationSignature(fnRef); + if (!global.functions[name]) { + global.functions[name] = {}; + } + const existing = global.functions[name][signature[2]]; + if (existing) { + warn( + `Encountered duplicate function prototype: "${name}" with signature "${signature[2]}"` + ); + existing.references.push(fnRef); + } else { + global.functions[name][signature[2]] = newOverloadIndex( + signature[0], + signature[1], + fnRef + ); + } + }; + + /** + * Add the use of a struct TYPE_NAME to the scope. Use this when you know + * you've encountered a struct name. + */ + const addTypeReference = ( + scope: Scope, + name: string, + reference: TypeNameNode + ) => { + const declaredScope = findTypeScope(scope, name); + if (declaredScope) { + declaredScope.types[name].references.push(reference); + } else { + warn(`Encountered undeclared type: "${name}"`); + scope.types[name] = { + references: [reference], + }; + } + }; + + /** + * Create a new user defined type (struct) scope entry. Use this only when you + * know this is a valid struct definition. If the struct name is already + * defined, warn() + */ + const createType = ( + scope: Scope, + name: string, + declaration: TypeNameNode + ) => { + if (name in scope.types) { + if (scope.types[name].declaration) { + warn(`Encountered duplicate type declaration: "${name}"`); + } else { + warn(`Type "${name}" was used before it was declared`); + scope.types[name].declaration = declaration; + } + scope.types[name].references.push(declaration); + } else { + scope.types[name] = { + declaration, + references: [declaration], + }; + } + }; + + /** + * Given a TypeSpecifier, check if it includes a TYPE_NAME node, and if so, + * track it in scope. Use this on any TypeSpecifier. + */ + const addTypeIfFound = ( + scope: Scope, + node: FullySpecifiedTypeNode | TypeSpecifierNode + ) => { + const specifier = + node.type === 'fully_specified_type' + ? node?.specifier?.specifier + : node?.specifier; + + if (specifier.type === 'type_name') { + const name = specifier.identifier; + addTypeReference(scope, name, specifier); + // If type is 'struct', then it was declared in struct_specifier. If + } else if (specifier.type !== 'struct' && specifier.type !== 'keyword') { + console.warn('Unknown specifier', specifier); + throw new Error( + `Unknown declarator specifier ${specifier?.type}. Please file a bug against @shaderfrog/glsl-parser and incldue your source grammar.` + ); + } + }; + + /** + * Create new variable declarations in the scope. Only use this when you know + * the variable is being defined by the AstNode in question. + */ + const createBindings = (scope: Scope, ...bindings: [string, AstNode][]) => { + bindings.forEach(([identifier, binding]) => { + const existing = scope.bindings[identifier]; + if (existing) { + warn(`Encountered duplicate variable declaration: "${identifier}"`); + existing.references.unshift(binding); + } else { + scope.bindings[identifier] = makeScopeIndex(binding, binding); + } + }); + }; + + /** + * When a variable name is encountered in the AST, either add it to the scope + * it's defined in, or if it's not defined, warn(), and add a scope entry + * without a declaraiton. + * Used in the parse tree when you don't know if a variable should be defined + * yet or not, like encountering an IDENTIFIER in an expression. + */ + const addOrCreateBindingReference = ( + scope: Scope, + name: string, + reference: AstNode + ) => { + // In the case of "float definition = 1, b = definition;" we parse the final "definition" before the + // parent declarator list is parsed. So we might need to add the final "definition" + // to the scope first. + const foundScope = findBindingScope(scope, name); + if (foundScope) { + foundScope.bindings[name].references.push(reference); + } else { + warn(`Encountered undefined variable: "${name}"`); + // This intentionally does not provide a declaration + scope.bindings[name] = makeScopeIndex(reference); + } + }; + + // Group the statements in a switch statement into cases / default arrays + const groupCases = (statements: (AstNode | PartialNode)[]) => + statements.reduce((cases, stmt) => { + const partial = 'partial' in stmt ? stmt.partial : {}; + if (partial.type === 'case_label') { + return [ + ...cases, + node('switch_case', { + statements: [], + case: partial.case, + test: partial.test, + colon: partial.colon, + }), + ]; + } else if (partial.type === 'default_label') { + return [ + ...cases, + node('default_case', { + statements: [], + default: partial.default, + colon: partial.colon, + }), + ]; + // It would be nice to encode this in the grammar instead of a manual check + } else if (!cases.length) { + throw new Error( + 'A switch statement body must start with a case or default label' + ); + } else { + // While converting this file to Typescript, I don't remember what this + // else case is covering + const tail = cases.slice(-1)[0]; + return [ + ...cases.slice(0, -1), + { + ...tail, + statements: [...(tail as CompoundStatementNode).statements, stmt], + } as AstNode, + ]; + } + }, []); + + context.scope = makeScope('global'); + context.scopes = [context.scope]; + + return { + getLocation, + node, + makeScope, + warn, + pushScope, + popScope, + setScopeEnd, + createFunctionDefinition, + addFunctionCallReference, + createFunctionPrototype, + groupCases, + addTypeReference, + addTypeIfFound, + createType, + createBindings, + addOrCreateBindingReference, + }; +}; diff --git a/src/parser/parse.test.ts b/src/parser/parse.test.ts index f06038a..a0eab17 100644 --- a/src/parser/parse.test.ts +++ b/src/parser/parse.test.ts @@ -1,223 +1,10 @@ -import fs from 'fs'; -import path from 'path'; -import peggy, { GrammarError } from 'peggy'; -import util from 'util'; -import generate from './generator'; -import { AstNode, FunctionNode, ScopeIndex, Scope } from '../ast'; -import { Parser, ParserOptions } from './parser'; -import { renameBindings, renameFunctions, renameTypes } from './utils'; -import { preprocessAst } from '../preprocessor/preprocessor'; -import generatePreprocess from '../preprocessor/generator'; - -const fileContents = (filePath: string) => fs.readFileSync(filePath).toString(); -const inspect = (arg: any) => console.log(util.inspect(arg, false, null, true)); - -// Most of this ceremony around building a parser is dealing with Peggy's error -// format() function, where the grammarSource has to line up in generate() and -// format() to get nicely formatted errors if there's a syntax error in the -// grammar -const buildParser = (file: string) => { - const grammar = fileContents(file); - try { - return peggy.generate(grammar, { - grammarSource: file, - cache: true, - }); - } catch (e) { - const err = e as SyntaxError; - if ('format' in err && typeof err.format === 'function') { - console.error(err.format([{ source: file, text: grammar }])); - } - throw e; - } -}; - -const preprocessParser = buildParser( - './src/preprocessor/preprocessor-grammar.pegjs' -); - -const preprocess = (program: string) => { - const ast = preprocessParser.parse(program, { grammarSource: program }); - preprocessAst(ast); - return generatePreprocess(ast); -}; - -const debugEntry = (bindings: ScopeIndex) => { - return Object.entries(bindings).map( - ([k, v]) => - `"${k}": (${v.references.length} references): ${v.references - .map((r) => r.type) - .join(', ')}` - ); -}; - -const debugScopes = (scopes: Scope[]) => - scopes.map((s) => ({ - name: s.name, - bindings: debugEntry(s.bindings), - functions: debugEntry(s.functions), - })); - -const testFile = fileContents('./src/parser/glsltest.glsl'); - -const parser = buildParser('./src/parser/glsl-grammar.pegjs'); - -const middle = /\/\* start \*\/((.|[\r\n])+)(\/\* end \*\/)?/m; - -const parseSrc = (src: string, options: ParserOptions = {}) => { - const grammarSource = ''; - try { - return parser.parse(src, { - ...options, - grammarSource, - }); - } catch (e) { - const err = e as GrammarError; - if ('format' in err) { - console.error(err.format([{ source: grammarSource, text: src }])); - } - console.error(`Error parsing lexeme!\n"${src}"`); - throw err; - } -}; - -const debugSrc = (src: string) => { - inspect(parseSrc(src).program); -}; - -const debugStatement = (stmt: AstNode) => { - const program = `void main() {/* start */${stmt}/* end */}`; - const ast = parseSrc(program); - inspect((ast.program[0] as FunctionNode).body.statements[0]); -}; - -const expectParsedStatement = (src: string, options = {}) => { - const program = `void main() {/* start */${src}/* end */}`; - const ast = parseSrc(program, options); - const glsl = generate(ast); - if (glsl !== program) { - inspect(ast.program[0]); - // @ts-ignore - expect(glsl.match(middle)[1]).toBe(src); - } -}; - -const parseStatement = (src: string, options: ParserOptions = {}) => { - const program = `void main() {${src}}`; - return parseSrc(program, options); -}; - -const expectParsedProgram = (src: string, options: ParserOptions = {}) => { - const ast = parseSrc(src, options); - const glsl = generate(ast); - if (glsl !== src) { - inspect(ast); - expect(glsl).toBe(src); - } -}; - -test('scope bindings and type names', () => { - const ast = parseSrc(` -float a, b = 1.0, c = a; -vec2 texcoord1, texcoord2; -vec3 position; -vec4 myRGBA; -ivec2 textureLookup; -bvec3 less; -float arr1[5] = float[5](3.4, 4.2, 5.0, 5.2, 1.1); -vec4[2] arr2[3]; -vec4[3][2] arr3; -vec3 fnName() {} -struct light { - float intensity; - vec3 position; -}; -coherent buffer Block { - readonly vec4 member1; - vec4 member2; -};`); - // debugAst(ast); - expect(Object.keys(ast.scopes[0].bindings)).toEqual([ - 'a', - 'b', - 'c', - 'texcoord1', - 'texcoord2', - 'position', - 'myRGBA', - 'textureLookup', - 'less', - 'arr1', - 'arr2', - 'arr3', - 'Block', - ]); - expect(Object.keys(ast.scopes[0].functions)).toEqual(['fnName']); - expect(Object.keys(ast.scopes[0].types)).toEqual(['light']); -}); +import { buildParser } from './test-helpers'; -test('scope references', () => { - const ast = parseSrc(` -float a, b = 1.0, c = a; -mat2x2 myMat = mat2( vec2( 1.0, 0.0 ), vec2( 0.0, 1.0 ) ); -struct { - float s; - float t; -} structArr[]; -struct structType { - float s; - float t; -}; -structType z; - -float protoFn(float x); - -float shadowed; -float reused; -float unused; -void useMe() {} -vec3 fnName(float arg1, vec3 arg2) { - float shadowed = arg1; - structArr[0].x++; - - if(true) { - float x = shadowed + 1 + reused; - } - - { - float compound; - compound = shadowed + reused; - } - - { - float compound; - compound = shadowed + reused + compound; - } - - protoFn(1.0); - useMe(); -}`); - expect(ast.scopes[0].bindings.a.references).toHaveLength(2); - expect(ast.scopes[0].bindings.b.references).toHaveLength(1); - expect(ast.scopes[0].bindings.c.references).toHaveLength(1); - expect(ast.scopes[0].bindings.myMat.references).toHaveLength(1); - expect(ast.scopes[0].bindings.structArr.references).toHaveLength(2); - expect(ast.scopes[0].bindings.shadowed.references).toHaveLength(1); - expect(ast.scopes[0].types.structType.references).toHaveLength(2); - expect(ast.scopes[0].functions.useMe.references).toHaveLength(2); - expect(ast.scopes[2].bindings.arg1.references).toHaveLength(2); - expect(ast.scopes[2].bindings.arg2.references).toHaveLength(1); - expect(ast.scopes[2].bindings.shadowed.references).toHaveLength(4); - // reused - used in inner scope - expect(ast.scopes[0].bindings.reused.references).toHaveLength(4); - // compound - used in first innermost scope only - expect(ast.scopes[4].bindings.compound.references).toHaveLength(2); - // compound - used in last innermost scope only - expect(ast.scopes[5].bindings.compound.references).toHaveLength(3); -}); +let c!: ReturnType; +beforeAll(() => (c = buildParser())); test('declarations', () => { - expectParsedProgram(` + c.expectParsedProgram(` float a, b = 1.0, c = a; vec2 texcoord1, texcoord2; vec3 position; @@ -230,21 +17,21 @@ test('declarations', () => { test('headers', () => { // The following includes the varying/attribute case which only works in GL // ES 1.00, and will need to be updated when the switch is implemented - expectParsedProgram(` + c.expectParsedProgram(` precision mediump float; precision highp int; - in vec4 varName; - out vec4 varName; + in vec4 varName1; + out vec4 varName2; - varying vec4 varName, blarName; - uniform vec4 varName; - attribute vec4 varName; + varying vec4 varName3, blarName; + uniform vec4 varName4; + attribute vec4 varName5; `); }); test('if statement', () => { - expectParsedStatement( + c.expectParsedStatement( `if(i != 0) { aFunction(); } else if(i == 2) { bFunction(); } else { cFunction(); }`, @@ -255,7 +42,7 @@ else { cFunction(); }`, }); test('do while loop', () => { - expectParsedStatement( + c.expectParsedStatement( ` do { aFunction(); @@ -269,7 +56,7 @@ test('do while loop', () => { }); test('standard while loop', () => { - expectParsedStatement( + c.expectParsedStatement( ` while(i <= 99) { aFunction(); @@ -284,12 +71,12 @@ test('standard while loop', () => { test('for loops', () => { // Infinite for loop - expectParsedStatement(` + c.expectParsedStatement(` for(;;) { } `); // For loop with jump statements - expectParsedStatement( + c.expectParsedStatement( ` for(int a = 0; b <= 99; c++) { break; @@ -301,7 +88,7 @@ test('for loops', () => { { quiet: true } ); // Loop with condition variable declaration (GLSL ES 3.00 only) - expectParsedStatement(` + c.expectParsedStatement(` for(int i = 0; bool x = false; i++) {} `); }); @@ -309,7 +96,7 @@ test('for loops', () => { test('switch error', () => { // Test the semantic analysis case expect(() => - parser.parse( + c.parse( `void main() { switch (easingId) { result = cubicIn(); @@ -321,7 +108,7 @@ test('switch error', () => { }); test('switch statement', () => { - expectParsedStatement( + c.expectParsedStatement( ` switch (easingId) { case 0: @@ -341,31 +128,31 @@ test('switch statement', () => { test('qualifier declarations', () => { // The expected node here is "qualifier_declarator", which would be nice to // test for at some point, maybe when doing more AST analysis - expectParsedProgram(` + c.expectParsedProgram(` invariant precise in a, b,c; `); }); test('number notations', () => { // Integer hex notation - expectParsedStatement(`highp uint value = 0x1234u;`); - expectParsedStatement(`uint c = 0xffffffff;`); - expectParsedStatement(`uint d = 0xffffffffU;`); + c.expectParsedStatement(`highp uint value = 0x1234u;`); + c.expectParsedStatement(`uint c = 0xffffffff;`); + c.expectParsedStatement(`uint d = 0xffffffffU;`); // Octal - expectParsedStatement(`uint d = 021234;`); + c.expectParsedStatement(`uint d = 021234;`); // Double precision floating point - expectParsedStatement(`double c, d = 2.0LF;`); + c.expectParsedStatement(`double c, d = 2.0LF;`); // uint - expectParsedStatement(`uint k = 3u;`); - expectParsedStatement(`uint f = -1u;`); + c.expectParsedStatement(`uint k = 3u;`); + c.expectParsedStatement(`uint f = -1u;`); }); test('layout', () => { - expectParsedProgram(` + c.expectParsedProgram(` layout(location = 4, component = 2) in vec2 a; - layout(location = 3) in vec4 normal; + layout(location = 3) in vec4 normal1; layout(location = 9) in mat4 transforms[2]; - layout(location = 3) in vec4 normal; + layout(location = 3) in vec4 normal2; const int start = 6; layout(location = start + 2) in vec4 p; @@ -398,12 +185,12 @@ test('layout', () => { `); }); -test('comments', () => { - expectParsedProgram( +test('parses comments', () => { + c.expectParsedProgram( ` /* starting comment */ // hi - void main(x) { + void main(float x) { /* comment */// hi /* comment */ // hi statement(); // hi @@ -414,8 +201,8 @@ test('comments', () => { ); }); -test('functions', () => { - expectParsedProgram(` +test('parses functions', () => { + c.expectParsedProgram(` // Prototypes vec4 f(in vec4 x, out vec4 y); int newFunction(in bvec4 aBvec4, // read-only @@ -427,15 +214,15 @@ test('functions', () => { }); test('parses function_call . postfix_expression', () => { - expectParsedStatement('texture().rgb;', { quiet: true }); + c.expectParsedStatement('texture().rgb;', { quiet: true }); }); test('parses postfix_expression as function_identifier', () => { - expectParsedStatement('a().length();', { quiet: true }); + c.expectParsedStatement('a().length();', { quiet: true }); }); test('parses postfix expressions after non-function calls (aka map.length())', () => { - expectParsedProgram( + c.expectParsedProgram( ` void main() { float y = x().length(); @@ -449,33 +236,30 @@ void main() { }); test('postfix, unary, binary expressions', () => { - expectParsedStatement('x ++ + 1.0 + + 2.0;'); -}); - -test('parses a test file', () => { - expectParsedProgram(preprocess(testFile)); + c.expectParsedStatement('x ++ + 1.0 + + 2.0;', { quiet: true }); }); test('operators', () => { - expectParsedStatement('1 || 2 && 2 ^^ 3 >> 4 << 5;'); + c.expectParsedStatement('1 || 2 && 2 ^^ 3 >> 4 << 5;'); }); test('declaration', () => { - expectParsedStatement('const float x = 1.0, y = 2.0;'); + c.expectParsedStatement('const float x = 1.0, y = 2.0;'); }); test('assignment', () => { - expectParsedStatement('x |= 1.0;'); + c.expectParsedStatement('x |= 1.0;', { quiet: true }); }); test('ternary', () => { - expectParsedStatement( - 'float y = x == 1.0 ? x == 2.0 ? 1.0 : 3.0 : x == 3.0 ? 4.0 : 5.0;' + c.expectParsedStatement( + 'float y = x == 1.0 ? x == 2.0 ? 1.0 : 3.0 : x == 3.0 ? 4.0 : 5.0;', + { quiet: true } ); }); test('structs', () => { - expectParsedProgram(` + c.expectParsedProgram(` struct light { float intensity; vec3 position, color; @@ -487,7 +271,7 @@ test('structs', () => { }); test('buffer variables', () => { - expectParsedProgram(` + c.expectParsedProgram(` buffer b { float u[]; vec4 v[]; @@ -496,13 +280,13 @@ test('buffer variables', () => { }); test('arrays', () => { - expectParsedProgram(` + c.expectParsedProgram(` float frequencies[3]; uniform vec4 lightPosition[4]; struct light { int a; }; light lights[]; const int numLights = 2; - light lights[numLights]; + light lights2[numLights]; buffer b { float u[]; @@ -511,7 +295,7 @@ test('arrays', () => { // Array initializers float array[3] = float[3](1.0, 2.0, 3.0); - float array[3] = float[](1.0, 2.0, 3.0); + float array2[3] = float[](1.0, 2.0, 3.0); // Function with array as return type float[5] foo() { } @@ -519,7 +303,7 @@ test('arrays', () => { }); test('initializer list', () => { - expectParsedProgram(` + c.expectParsedProgram(` vec4 a[3][2] = { vec4[2](vec4(0.0), vec4(1.0)), vec4[2](vec4(0.0), vec4(1.0)), @@ -529,7 +313,7 @@ test('initializer list', () => { }); test('subroutines', () => { - expectParsedProgram(` + c.expectParsedProgram(` subroutine vec4 colorRedBlue(); // option 1 @@ -544,208 +328,9 @@ test('subroutines', () => { `); }); -test('struct constructor', () => { - const ast = parseSrc(` -struct light { - float intensity; - vec3 position; -}; -light lightVar = light(3.0, vec3(1.0, 2.0, 3.0)); -`); - expect(ast.scopes[0].types.light.references).toHaveLength(3); -}); - -test('overloaded scope test', () => { - const ast = parseSrc(` -vec4 overloaded(vec4 x) { - return x; -} -float overloaded(float x) { - return x; -}`); - expect(ast.scopes[0].functions.overloaded.references).toHaveLength(2); -}); - -test('overriding glsl builtin function', () => { - // "noise" is a built-in GLSL function that should be identified and renamed - const ast = parseSrc(` -float noise() {} -float fn() { - uv += noise(); -} -`); - - renameFunctions(ast.scopes[0], (name) => `${name}_FUNCTION`); - expect(generate(ast)).toBe(` -float noise_FUNCTION() {} -float fn_FUNCTION() { - uv += noise_FUNCTION(); -} -`); -}); - -test('rename bindings and functions', () => { - const ast = parseSrc( - ` -float a, b = 1.0, c = a; -mat2x2 myMat = mat2( vec2( 1.0, 0.0 ), vec2( 0.0, 1.0 ) ); -struct { - float s; - float t; -} structArr[]; -struct structType { - float s; - float t; -}; -structType z; - -float shadowed; -float reused; -float unused; -void x() {} -vec3 fnName(float arg1, vec3 arg2) { - float shadowed = arg1; - float y = x().length(); - structArr[0].x++; - - if(true) { - float x = shadowed + 1 + reused; - } - - { - float compound; - compound = shadowed + reused; - } - - { - float compound; - compound = shadowed + reused + compound; - } -} -vec4 LinearToLinear( in vec4 value ) { - return value; -} -vec4 mapTexelToLinear( vec4 value ) { return LinearToLinear( value ); } -vec4 linearToOutputTexel( vec4 value ) { return LinearToLinear( value ); } -`, - { quiet: true } - ); - - renameBindings(ast.scopes[0], (name) => `${name}_VARIABLE`); - renameFunctions(ast.scopes[0], (name) => `${name}_FUNCTION`); - - // console.log('scopes:', debugScopes(ast.scopes)); - expect(generate(ast)).toBe(` -float a_VARIABLE, b_VARIABLE = 1.0, c_VARIABLE = a_VARIABLE; -mat2x2 myMat_VARIABLE = mat2( vec2( 1.0, 0.0 ), vec2( 0.0, 1.0 ) ); -struct { - float s; - float t; -} structArr_VARIABLE[]; -struct structType { - float s; - float t; -}; -structType z_VARIABLE; - -float shadowed_VARIABLE; -float reused_VARIABLE; -float unused_VARIABLE; -void x_FUNCTION() {} -vec3 fnName_FUNCTION(float arg1, vec3 arg2) { - float shadowed = arg1; - float y = x_FUNCTION().length(); - structArr_VARIABLE[0].x++; - - if(true) { - float x = shadowed + 1 + reused_VARIABLE; - } - - { - float compound; - compound = shadowed + reused_VARIABLE; - } - - { - float compound; - compound = shadowed + reused_VARIABLE + compound; - } -} -vec4 LinearToLinear_FUNCTION( in vec4 value ) { - return value; -} -vec4 mapTexelToLinear_FUNCTION( vec4 value ) { return LinearToLinear_FUNCTION( value ); } -vec4 linearToOutputTexel_FUNCTION( vec4 value ) { return LinearToLinear_FUNCTION( value ); } -`); -}); - -test('detecting struct scope and usage', () => { - const ast = parseSrc(` -struct StructName { - vec3 color; -}; -StructName reflectedLight = StructName(vec3(0.0)); -void main() { - struct StructName { - vec3 color; - }; - StructName ref = StructName(); -} -`); - - renameBindings(ast.scopes[0], (name) => `${name}_y`); - renameTypes(ast.scopes[0], (name) => `${name}_x`); - - expect(Object.keys(ast.scopes[0].functions)).toEqual(['main']); - expect(Object.keys(ast.scopes[0].bindings)).toEqual(['reflectedLight']); - expect(Object.keys(ast.scopes[0].types)).toEqual(['StructName']); - expect(ast.scopes[0].types.StructName.references).toHaveLength(3); - - expect(Object.keys(ast.scopes[1].types)).toEqual(['StructName']); - - // console.log(generate(ast)); -}); - -test('fn args shadowing global scope identified as separate bindings', () => { - const ast = parseSrc(` -attribute vec3 position; -vec3 func(vec3 position) { - return position; -}`); - renameBindings(ast.scopes[0], (name) => - name === 'position' ? 'renamed' : name - ); - // The func arg "position" shadows the global binding, it should be untouched - expect(generate(ast)).toBe(` -attribute vec3 renamed; -vec3 func(vec3 position) { - return position; -}`); -}); - -test('I do not yet know what to do with layout()', () => { - const ast = parseSrc(` -layout(std140,column_major) uniform; -float a; -uniform Material -{ -uniform vec2 vProp; -};`); - - // This shouldn't crash - see the comment block in renameBindings() - renameBindings(ast.scopes[0], (name) => `${name}_x`); - expect(generate(ast)).toBe(` -layout(std140,column_major) uniform; -float a_x; -uniform Material -{ -uniform vec2 vProp; -};`); -}); - test('Locations with location disabled', () => { const src = `void main() {}`; - const ast = parseSrc(src); // default argument is no location information + const ast = c.parseSrc(src); // default argument is no location information expect(ast.program[0].location).toBe(undefined); expect(ast.scopes[0].location).toBe(undefined); }); @@ -759,7 +344,7 @@ void main() { float x = 1.0; } }`; - const ast = parseSrc(src, { includeLocation: true }); + const ast = c.parseSrc(src, { includeLocation: true }); // The main fn location should start at "void" expect(ast.program[0].location).toStrictEqual({ start: { line: 2, column: 1, offset: 16 }, @@ -785,3 +370,13 @@ void main() { end: { line: 7, column: 3, offset: 73 }, }); }); + +test('fails on error', () => { + expect(() => + c.parse( + `float a; + float a;`, + { failOnWarn: true } + ) + ).toThrow(/duplicate variable declaration: "a"/); +}); diff --git a/src/parser/parser.d.ts b/src/parser/parser.d.ts index 123933d..4c79cae 100644 --- a/src/parser/parser.d.ts +++ b/src/parser/parser.d.ts @@ -4,6 +4,14 @@ export type ParserOptions = Partial<{ quiet: boolean; grammarSource: string; includeLocation: boolean; + failOnWarn: boolean; + tracer: { + trace: (e: { + type: 'rule.enter' | 'rule.match' | 'rule.fail'; + rule: string; + result: any; + }) => void; + }; }>; // Allow to fetch util functions from parser directly. I'd like to inline those diff --git a/src/parser/scope.test.ts b/src/parser/scope.test.ts new file mode 100644 index 0000000..2aa72e7 --- /dev/null +++ b/src/parser/scope.test.ts @@ -0,0 +1,550 @@ +import generate from './generator'; +import { renameBindings, renameFunctions, renameTypes } from './utils'; +import { UNKNOWN_TYPE } from './grammar'; +import { buildParser, nextWarn } from './test-helpers'; + +let c!: ReturnType; +beforeAll(() => (c = buildParser())); + +test('scope bindings and type names', () => { + const ast = c.parseSrc(` +float selfref, b = 1.0, c = selfref; +vec2 texcoord1, texcoord2; +vec3 position; +vec4 myRGBA; +ivec2 textureLookup; +bvec3 less; +float arr1[5] = float[5](3.4, 4.2, 5.0, 5.2, 1.1); +vec4[2] arr2[3]; +vec4[3][2] arr3; +vec3 fnName() {} +struct light { + float intensity; + vec3 position; +}; +coherent buffer Block { + readonly vec4 member1; + vec4 member2; +};`); + // debugAst(ast); + expect(Object.keys(ast.scopes[0].bindings)).toEqual([ + 'selfref', + 'b', + 'c', + 'texcoord1', + 'texcoord2', + 'position', + 'myRGBA', + 'textureLookup', + 'less', + 'arr1', + 'arr2', + 'arr3', + 'Block', + ]); + expect(Object.keys(ast.scopes[0].functions)).toEqual(['fnName']); + expect(Object.keys(ast.scopes[0].types)).toEqual(['light']); +}); + +test('scope references', () => { + const ast = c.parseSrc( + ` +float selfref, b = 1.0, c = selfref; +mat2x2 myMat = mat2( vec2( 1.0, 0.0 ), vec2( 0.0, 1.0 ) ); +struct { + float s; + float t; +} structArr[]; +struct structType { + float s; + float t; +}; +structType z; + +float protoFn(float x); + +float shadowed; +float reused; +float unused; +void useMe() {} +vec3 fnName(float arg1, vec3 arg2) { + float shadowed = arg1; + structArr[0].x++; + + if(true) { + float x = shadowed + 1 + reused; + } + + { + float compound; + compound = shadowed + reused; + } + + { + float compound; + compound = shadowed + reused + compound; + } + unknown(); + + MyStruct dataArray[1] = { + {1.0} + }; + + protoFn(1.0); + useMe(); +}`, + { quiet: true } + ); + expect(ast.scopes[0].bindings.selfref.references).toHaveLength(2); + expect(ast.scopes[0].bindings.b.references).toHaveLength(1); + expect(ast.scopes[0].bindings.c.references).toHaveLength(1); + expect(ast.scopes[0].bindings.myMat.references).toHaveLength(1); + expect(ast.scopes[0].bindings.structArr.references).toHaveLength(2); + expect(ast.scopes[0].bindings.shadowed.references).toHaveLength(1); + expect(ast.scopes[0].types.structType.references).toHaveLength(2); + expect(ast.scopes[0].functions.useMe['void: void'].references).toHaveLength( + 2 + ); + expect(ast.scopes[2].bindings.arg1.references).toHaveLength(2); + expect(ast.scopes[2].bindings.arg2.references).toHaveLength(1); + expect(ast.scopes[2].bindings.shadowed.references).toHaveLength(4); + // reused - used in inner scope + expect(ast.scopes[0].bindings.reused.references).toHaveLength(4); + // compound - used in first innermost scope only + expect(ast.scopes[4].bindings.compound.references).toHaveLength(2); + // compound - used in last innermost scope only + expect(ast.scopes[5].bindings.compound.references).toHaveLength(3); + + expect( + ast.scopes[0].functions.unknown['UNKNOWN TYPE: void'].references + ).toHaveLength(1); + expect( + ast.scopes[0].functions.unknown['UNKNOWN TYPE: void'].declaration + ).toBe(undefined); +}); + +test('scope binding declarations', () => { + const ast = c.parseSrc( + ` +float selfref, b = 1.0, c = selfref; +void main() { + selfref += d; +}`, + { quiet: true } + ); + expect(ast.scopes[0].bindings.selfref.references).toHaveLength(3); + expect(ast.scopes[0].bindings.selfref.declaration).toBeTruthy(); + expect(ast.scopes[0].bindings.b.references).toHaveLength(1); + expect(ast.scopes[0].bindings.b.declaration).toBeTruthy(); + expect(ast.scopes[0].bindings.c.references).toHaveLength(1); + expect(ast.scopes[0].bindings.c.declaration).toBeTruthy(); + + expect(ast.scopes[1].bindings.d.references).toHaveLength(1); + expect(ast.scopes[1].bindings.d.declaration).toBeFalsy(); +}); + +test('struct constructor identified in scope', () => { + const ast = c.parseSrc(` +struct light { + float intensity; + vec3 position; +}; +light lightVar = light(3.0, vec3(1.0, 2.0, 3.0)); +`); + expect(ast.scopes[0].types.light.references).toHaveLength(3); +}); + +test('function overloaded scope', () => { + const ast = c.parseSrc(` +vec4 overloaded(vec4 x) { + return x; +} +float overloaded(float x) { + return x; +}`); + expect(Object.entries(ast.scopes[0].functions.overloaded)).toHaveLength(2); +}); + +test('overriding glsl builtin function', () => { + // "noise" is a built-in GLSL function that should be identified and renamed + const ast = c.parseSrc(` +float noise() {} +float fn() { + vec2 uv; + uv += noise(); +} +`); + + expect(ast.scopes[0].functions.noise); + renameFunctions(ast.scopes[0], (name) => `${name}_FUNCTION`); + expect(generate(ast)).toBe(` +float noise_FUNCTION() {} +float fn_FUNCTION() { + vec2 uv; + uv += noise_FUNCTION(); +} +`); +}); + +test('rename bindings and functions', () => { + const ast = c.parseSrc( + ` +float selfref, b = 1.0, c = selfref; +mat2x2 myMat = mat2( vec2( 1.0, 0.0 ), vec2( 0.0, 1.0 ) ); +struct { + float s; + float t; +} structArr[]; +struct structType { + float s; + float t; +}; +structType z; + +float shadowed; +float reused; +float unused; +void x() {} +vec3 fnName(float arg1, vec3 arg2) { + float shadowed = arg1; + float y = x().length(); + structArr[0].x++; + + if(true) { + float x = shadowed + 1 + reused; + } + + { + float compound; + compound = shadowed + reused; + } + + { + float compound; + compound = shadowed + reused + compound; + } +} +vec4 LinearToLinear( in vec4 value ) { + return value; +} +vec4 mapTexelToLinear( vec4 value ) { return LinearToLinear( value ); } +vec4 linearToOutputTexel( vec4 value ) { return LinearToLinear( value ); } +`, + { quiet: true } + ); + + renameBindings(ast.scopes[0], (name) => `${name}_VARIABLE`); + renameFunctions(ast.scopes[0], (name) => `${name}_FUNCTION`); + + expect(generate(ast)).toBe(` +float selfref_VARIABLE, b_VARIABLE = 1.0, c_VARIABLE = selfref_VARIABLE; +mat2x2 myMat_VARIABLE = mat2( vec2( 1.0, 0.0 ), vec2( 0.0, 1.0 ) ); +struct { + float s; + float t; +} structArr_VARIABLE[]; +struct structType { + float s; + float t; +}; +structType z_VARIABLE; + +float shadowed_VARIABLE; +float reused_VARIABLE; +float unused_VARIABLE; +void x_FUNCTION() {} +vec3 fnName_FUNCTION(float arg1, vec3 arg2) { + float shadowed = arg1; + float y = x_FUNCTION().length(); + structArr_VARIABLE[0].x++; + + if(true) { + float x = shadowed + 1 + reused_VARIABLE; + } + + { + float compound; + compound = shadowed + reused_VARIABLE; + } + + { + float compound; + compound = shadowed + reused_VARIABLE + compound; + } +} +vec4 LinearToLinear_FUNCTION( in vec4 value ) { + return value; +} +vec4 mapTexelToLinear_FUNCTION( vec4 value ) { return LinearToLinear_FUNCTION( value ); } +vec4 linearToOutputTexel_FUNCTION( vec4 value ) { return LinearToLinear_FUNCTION( value ); } +`); +}); + +test('detecting struct scope and usage', () => { + const ast = c.parseSrc(` +struct StructName { + vec3 color; +}; +struct OtherStruct { + StructName inner; +}; +StructName proto(StructName x, StructName[3]); + +subroutine StructName colorRedBlue(); +subroutine (colorRedBlue) StructName redColor() { + return StructName(1.0, 0.0, 0.0, 1.0); +} + +StructName reflectedLight = StructName(vec3(0.0)); +StructName main(in StructName x, StructName[3] y) { + StructName ref = StructName(); + float a = 1.0 + StructName(1.0).color.x; + struct StructName { + vec3 color; + }; + StructName ref2 = StructName(); + float a2 = 1.0 + StructName(1.0).color.x; +} +`); + renameTypes(ast.scopes[0], (name) => `${name}_x`); + expect(generate(ast)).toBe(` +struct StructName_x { + vec3 color; +}; +struct OtherStruct_x { + StructName_x inner; +}; +StructName_x proto(StructName_x x, StructName_x[3]); + +subroutine StructName_x colorRedBlue(); +subroutine (colorRedBlue) StructName_x redColor() { + return StructName_x(1.0, 0.0, 0.0, 1.0); +} + +StructName_x reflectedLight = StructName_x(vec3(0.0)); +StructName_x main(in StructName_x x, StructName_x[3] y) { + StructName_x ref = StructName_x(); + float a = 1.0 + StructName_x(1.0).color.x; + struct StructName { + vec3 color; + }; + StructName ref2 = StructName(); + float a2 = 1.0 + StructName(1.0).color.x; +} +`); + // Ensure structs aren't added to global function scope since they should be + // identified as types + expect(Object.keys(ast.scopes[0].functions)).toEqual([ + 'proto', + 'colorRedBlue', + 'redColor', + 'main', + ]); + expect(Object.keys(ast.scopes[0].bindings)).toEqual(['reflectedLight']); + expect(Object.keys(ast.scopes[0].types)).toEqual([ + 'StructName', + 'OtherStruct', + ]); + expect(ast.scopes[0].types.StructName.references).toHaveLength(16); + + // Inner struct definition should be found in inner fn scope + expect(Object.keys(ast.scopes[2].types)).toEqual(['StructName']); +}); + +test('fn args shadowing global scope identified as separate bindings', () => { + const ast = c.parseSrc(` +attribute vec3 position; +vec3 func(vec3 position) { + return position; +}`); + renameBindings(ast.scopes[0], (name) => + name === 'position' ? 'renamed' : name + ); + // The func arg "position" shadows the global binding, it should be untouched + expect(generate(ast)).toBe(` +attribute vec3 renamed; +vec3 func(vec3 position) { + return position; +}`); +}); + +test('I do not yet know what to do with layout()', () => { + const ast = c.parseSrc(` +layout(std140,column_major) uniform; +float a; +uniform Material +{ +uniform vec2 vProp; +};`); + + // This shouldn't crash - see the comment block in renameBindings() + renameBindings(ast.scopes[0], (name) => `${name}_x`); + expect(generate(ast)).toBe(` +layout(std140,column_major) uniform; +float a_x; +uniform Material +{ +uniform vec2 vProp; +};`); +}); + +test(`(regression) ensure self-referenced variables don't appear as types`, () => { + const ast = c.parseSrc(` +float a = 1.0, c = a; +`); + expect(Object.keys(ast.scopes[0].types)).toEqual([]); +}); + +test('identifies a declared function with references', () => { + const ast = c.parseSrc(` +vec4[3] main(float a, vec3 b) {} +void x() { + float a = 1.0; + float b = 1.0; + main(a, b); +} +`); + const signature = 'vec4[3]: float, vec3'; + // Should have found no types + expect(ast.scopes[0].types).toMatchObject({}); + // Should have found one overload signature + expect(ast.scopes[0].functions).toHaveProperty('main'); + expect(ast.scopes[0].functions.main).toHaveProperty(signature); + expect(Object.keys(ast.scopes[0].functions.main)).toHaveLength(1); + // Should be declared with references + expect(ast.scopes[0].functions.main[signature].declaration).toBeTruthy(); + expect(ast.scopes[0].functions.main[signature].references).toHaveLength(2); +}); + +test('does not match function overload with different argument length', () => { + const ast = c.parseSrc( + ` +float main(float a, float b) {} +void x() { + main(a, b, c); +} +`, + { quiet: true } + ); + + const unknownSig = `${UNKNOWN_TYPE}: ${UNKNOWN_TYPE}, ${UNKNOWN_TYPE}, ${UNKNOWN_TYPE}`; + const knownSig = `float: float, float`; + // Should have found no types + expect(ast.scopes[0].types).toMatchObject({}); + // Should have found one overload signature + expect(ast.scopes[0].functions).toHaveProperty('main'); + expect(ast.scopes[0].functions.main).toHaveProperty(knownSig); + expect(ast.scopes[0].functions.main).toHaveProperty(unknownSig); + expect(Object.keys(ast.scopes[0].functions.main)).toHaveLength(2); + // Declaration should not match bad overload + expect(ast.scopes[0].functions.main[knownSig].declaration).toBeTruthy(); + expect(ast.scopes[0].functions.main[knownSig].references).toHaveLength(1); + // Bad call should not match definition + expect(ast.scopes[0].functions.main[unknownSig].declaration).toBeFalsy(); + expect(ast.scopes[0].functions.main[unknownSig].references).toHaveLength(1); +}); + +test('handles declared, undeclared, and unknown function cases', () => { + const ast = c.parseSrc( + ` +// Prototype for undeclared function +float main(float, float, float[3]); + +// Prototype and definition for declared function +float main(float a, float b); +float main(float a, float b) {} + +void x() { + main(a, b); + main(a, b, c); + main(a, b, c, d); +} +`, + { quiet: true } + ); + + const defSig = `float: float, float`; + const undefSig = `float: float, float, float[3]`; + const unknownSig = `${UNKNOWN_TYPE}: ${UNKNOWN_TYPE}, ${UNKNOWN_TYPE}, ${UNKNOWN_TYPE}, ${UNKNOWN_TYPE}`; + + // Should have found no types + expect(ast.scopes[0].types).toMatchObject({}); + + // Should have found 3 overload signatures. One overload for defined, one for + // undefined, and one for the unknown call + expect(ast.scopes[0].functions).toHaveProperty('main'); + expect(Object.keys(ast.scopes[0].functions.main)).toHaveLength(3); + expect(ast.scopes[0].functions.main).toHaveProperty(defSig); + expect(ast.scopes[0].functions.main).toHaveProperty(undefSig); + expect(ast.scopes[0].functions.main).toHaveProperty(unknownSig); + + // Defined function has prototype, definition + expect(ast.scopes[0].functions.main[defSig].declaration).toBeTruthy(); + expect(ast.scopes[0].functions.main[defSig].references).toHaveLength(3); + + // Undeclared call has prototype and call, but no declaration + expect(ast.scopes[0].functions.main[undefSig].declaration).toBeFalsy(); + expect(ast.scopes[0].functions.main[undefSig].references).toHaveLength(2); + + // Unknown function is hanging out by itself + expect(ast.scopes[0].functions.main[unknownSig].declaration).toBeFalsy(); + expect(ast.scopes[0].functions.main[unknownSig].references).toHaveLength(1); +}); + +test('warns on undeclared functions and structs', () => { + const next = nextWarn(); + + c.parseSrc(` +MyStruct x = MyStruct(); +void main() { + a(); + a(1); + z += 1; +} +struct MyStruct { float y; }; +`); + + expect(next()).toContain('undeclared function: "MyStruct"'); + expect(next()).toContain('undeclared type: "MyStruct"'); + expect(next()).toContain('undeclared function: "a"'); + expect(next()).toContain('No matching overload for function: "a"'); + expect(next()).toContain('Encountered undefined variable: "z"'); + expect(next()).toContain('Type "MyStruct" was used before it was declared'); +}); + +test('warns on duplicate declarations', () => { + const next = nextWarn(); + + c.parseSrc(` +struct MyStruct { float y; }; +struct MyStruct { float y; }; +float dupefloat = 1.0; +float dupefloat = 1.0; +float dupefn(float b); +float dupefn(float); +void dupefn() {} +void dupefn() {} +`); + + expect(next()).toContain('duplicate type declaration: "MyStruct"'); + expect(next()).toContain('duplicate variable declaration: "dupefloat"'); + expect(next()).toContain('duplicate function prototype: "dupefn"'); + expect(next()).toContain('duplicate function definition: "dupefn"'); +}); + +test('undeclared variables are added to the expected scope', () => { + const ast = c.parseSrc( + ` +void a() { + MyStruct x; + a(); +} +`, + { quiet: true } + ); + // Function should get added to global scope + expect(ast.scopes[0].types).toMatchObject({}); + expect(ast.scopes[0].functions).toHaveProperty('a'); + // Struct should get added to inner scope + expect(ast.scopes[1].types).toHaveProperty('MyStruct'); +}); diff --git a/src/parser/scope.ts b/src/parser/scope.ts new file mode 100644 index 0000000..a4feaef --- /dev/null +++ b/src/parser/scope.ts @@ -0,0 +1,222 @@ +// This file is compiled and inlined in /glsl-grammar.pegjs. See build-parser.sh +// and note that file is called in parse.test.ts +import { + AstNode, + LocationObject, + ArraySpecifierNode, + FunctionPrototypeNode, + KeywordNode, + FunctionNode, + FunctionCallNode, + TypeNameNode, +} from '../ast'; +import { xor } from './utils'; + +export type TypeScopeEntry = { + declaration?: TypeNameNode; + references: TypeNameNode[]; +}; +export type TypeScopeIndex = { + [name: string]: TypeScopeEntry; +}; +export type ScopeEntry = { declaration?: AstNode; references: AstNode[] }; +export type ScopeIndex = { + [name: string]: ScopeEntry; +}; +export type FunctionOverloadDefinition = { + returnType: string; + parameterTypes: string[]; + declaration?: FunctionNode; + references: AstNode[]; +}; +export type FunctionOverloadIndex = { + [signature: string]: FunctionOverloadDefinition; +}; +export type FunctionScopeIndex = { + [name: string]: FunctionOverloadIndex; +}; + +export type Scope = { + name: string; + parent?: Scope; + bindings: ScopeIndex; + types: TypeScopeIndex; + functions: FunctionScopeIndex; + location?: LocationObject; +}; + +export const UNKNOWN_TYPE = 'UNKNOWN TYPE'; + +export type FunctionSignature = [ + returnType: string, + parameterTypes: string[], + signature: string +]; + +export const makeScopeIndex = ( + firstReference: AstNode, + declaration?: AstNode +): ScopeEntry => ({ + declaration, + references: [firstReference], +}); + +export const findTypeScope = ( + scope: Scope | undefined, + typeName: string +): Scope | null => { + if (!scope) { + return null; + } + if (typeName in scope.types) { + return scope; + } + return findTypeScope(scope.parent, typeName); +}; + +export const isDeclaredType = (scope: Scope, typeName: string) => + findTypeScope(scope, typeName) !== null; + +export const findBindingScope = ( + scope: Scope | undefined, + name: string +): Scope | null => { + if (!scope) { + return null; + } + if (name in scope.bindings) { + return scope; + } + return findBindingScope(scope.parent, name); +}; + +export const extractConstant = (expression: AstNode): string => { + let result = UNKNOWN_TYPE; + // Keyword case, like float + if ('token' in expression) { + result = expression.token; + // User defined type + } else if ( + 'identifier' in expression && + typeof expression.identifier === 'string' + ) { + result = expression.identifier; + } else { + console.warn(result, expression); + } + return result; +}; + +export const quantifiersSignature = (quantifier: ArraySpecifierNode[]) => + quantifier.map((q) => `[${extractConstant(q.expression)}]`).join(''); + +export const functionDeclarationSignature = ( + node: FunctionNode | FunctionPrototypeNode +): FunctionSignature => { + const proto = node.type === 'function' ? node.prototype : node; + const { specifier } = proto.header.returnType; + const quantifiers = specifier.quantifier || []; + + const parameterTypes = proto?.parameters?.map(({ specifier, quantifier }) => { + // todo: saving place on putting quantifiers here + const quantifiers = + // vec4[1][2] param + specifier.quantifier || + // vec4 param[1][3] + quantifier || + []; + return `${extractConstant(specifier.specifier)}${quantifiersSignature( + quantifiers + )}`; + }) || ['void']; + + const returnType = `${ + (specifier.specifier as KeywordNode).token + }${quantifiersSignature(quantifiers)}`; + + return [ + returnType, + parameterTypes, + `${returnType}: ${parameterTypes.join(', ')}`, + ]; +}; + +export const doSignaturesMatch = ( + definitionSignature: string, + definition: FunctionOverloadDefinition, + callSignature: FunctionSignature +) => { + if (definitionSignature === callSignature[0]) { + return true; + } + const left = [definition.returnType, ...definition.parameterTypes]; + const right = [callSignature[0], ...callSignature[1]]; + + // Special case. When comparing "a()" to "a(1)", a() has paramater VOID, and + // a(1) has type UNKNOWN. This will pass as true in the final check of this + // function, even though it's not. + if (left.length === 2 && xor(left[1] === 'void', right[1] === 'void')) { + return false; + } + + return ( + left.length === right.length && + left.every( + (type, index) => + type === right[index] || + type === UNKNOWN_TYPE || + right[index] === UNKNOWN_TYPE + ) + ); +}; + +export const findOverloadDefinition = ( + signature: FunctionSignature, + index: FunctionOverloadIndex +): FunctionOverloadDefinition | undefined => { + return Object.entries(index).reduce< + ReturnType + >((found, [overloadSignature, overloadDefinition]) => { + return ( + found || + (doSignaturesMatch(overloadSignature, overloadDefinition, signature) + ? overloadDefinition + : undefined) + ); + }, undefined); +}; + +export const functionUseSignature = ( + node: FunctionCallNode +): FunctionSignature => { + const parameterTypes = + node.args.length === 0 + ? ['void'] + : node.args + .filter((arg) => (arg as any).literal !== ',') + .map(() => UNKNOWN_TYPE); + const returnType = UNKNOWN_TYPE; + return [ + returnType, + parameterTypes, + `${returnType}: ${parameterTypes.join(', ')}`, + ]; +}; + +export const newOverloadIndex = ( + returnType: string, + parameterTypes: string[], + firstReference: AstNode, + declaration?: FunctionNode +): FunctionOverloadDefinition => ({ + returnType, + parameterTypes, + declaration, + references: [firstReference], +}); + +export const findGlobalScope = (scope: Scope): Scope => + scope.parent ? findGlobalScope(scope.parent) : scope; + +export const isDeclaredFunction = (scope: Scope, fnName: string) => + fnName in findGlobalScope(scope).functions; diff --git a/src/parser/test-helpers.ts b/src/parser/test-helpers.ts new file mode 100644 index 0000000..3a0396e --- /dev/null +++ b/src/parser/test-helpers.ts @@ -0,0 +1,194 @@ +import { execSync } from 'child_process'; +import { GrammarError } from 'peggy'; +import util from 'util'; +import generate from './generator'; +import { AstNode, FunctionNode, Program } from '../ast'; +import { Parse, ParserOptions } from './parser'; +import { FunctionScopeIndex, Scope, ScopeIndex } from './scope'; + +export const inspect = (arg: any) => + console.log(util.inspect(arg, false, null, true)); + +export const nextWarn = () => { + console.warn = jest.fn(); + let i = 0; + // @ts-ignore + const mock = console.warn.mock; + return () => mock.calls[i++][0]; +}; + +type Context = { + parse: Parse; + parseSrc: ParseSrc; +}; + +export const buildParser = () => { + execSync( + 'npx peggy --cache -o src/parser/parser.js src/parser/glsl-grammar.pegjs' + ); + const parse = require('./parser').parse as Parse; + const ps = parseSrc(parse); + const ctx: Context = { + parse, + parseSrc: ps, + }; + return { + parse, + parseSrc: ps, + debugSrc: debugSrc(ctx), + debugStatement: debugStatement(ctx), + expectParsedStatement: expectParsedStatement(ctx), + parseStatement: parseStatement(ctx), + expectParsedProgram: expectParsedProgram(ctx), + }; +}; + +// Keeping this around in case I need to figure out how to do tracing again +// Most of this ceremony around building a parser is dealing with Peggy's error +// format() function, where the grammarSource has to line up in generate() and +// format() to get nicely formatted errors if there's a syntax error in the +// grammar +// const buildParser = (file: string) => { +// const grammar = fileContents(file); +// try { +// return peggy.generate(grammar, { +// grammarSource: file, +// cache: true, +// trace: false, +// }); +// } catch (e) { +// const err = e as SyntaxError; +// if ('format' in err && typeof err.format === 'function') { +// console.error(err.format([{ source: file, text: grammar }])); +// } +// throw e; +// } +// }; + +export const debugEntry = (bindings: ScopeIndex) => { + return Object.entries(bindings).map( + ([k, v]) => + `${k}: (${v.references.length} references, ${ + v.declaration ? '' : 'un' + }declared): ${v.references.map((r) => r.type).join(', ')}` + ); +}; +export const debugFunctionEntry = (bindings: FunctionScopeIndex) => + Object.entries(bindings).flatMap(([name, overloads]) => + Object.entries(overloads).map( + ([signature, overload]) => + `${name} (${signature}): (${overload.references.length} references, ${ + overload.declaration ? '' : 'un' + }declared): ${overload.references.map((r) => r.type).join(', ')}` + ) + ); + +export const debugScopes = (astOrScopes: Program | Scope[]) => + console.log( + 'Scopes:', + 'scopes' in astOrScopes + ? astOrScopes.scopes + : astOrScopes.map((s) => ({ + name: s.name, + types: debugEntry(s.types), + bindings: debugEntry(s.bindings), + functions: debugFunctionEntry(s.functions), + })) + ); + +const middle = /\/\* start \*\/((.|[\r\n])+)(\/\* end \*\/)?/m; + +type ParseSrc = (src: string, options?: ParserOptions) => Program; +const parseSrc = (parse: Parse): ParseSrc => (src, options = {}) => { + const grammarSource = ''; + try { + return parse(src, { + ...options, + grammarSource, + tracer: { + trace: (type) => { + if ( + type.type === 'rule.match' && + type.rule !== 'whitespace' && + type.rule !== 'single_comment' && + type.rule !== 'comment' && + type.rule !== 'digit_sequence' && + type.rule !== 'digit' && + type.rule !== 'fractional_constant' && + type.rule !== 'floating_constant' && + type.rule !== 'translation_unit' && + type.rule !== 'start' && + type.rule !== 'external_declaration' && + type.rule !== 'SEMICOLON' && + type.rule !== 'terminal' && + type.rule !== '_' + ) { + if (type.rule === 'IDENTIFIER' || type.rule === 'TYPE_NAME') { + console.log( + '\x1b[35mMatch literal\x1b[0m', + type.rule, + type.result + ); + } else { + console.log('\x1b[35mMatch\x1b[0m', type.rule); + } + } + // if (type.type === 'rule.fail') { + // console.log('fail', type.rule); + // } + }, + }, + }); + } catch (e) { + const err = e as GrammarError; + if ('format' in err) { + console.error(err.format([{ source: grammarSource, text: src }])); + } + console.error(`Error parsing lexeme!\n"${src}"`); + throw err; + } +}; + +const debugSrc = ({ parseSrc }: Context) => (src: string) => { + inspect(parseSrc(src).program); +}; + +const debugStatement = ({ parseSrc }: Context) => (stmt: AstNode) => { + const program = `void main() {/* start */${stmt}/* end */}`; + const ast = parseSrc(program); + inspect((ast.program[0] as FunctionNode).body.statements[0]); +}; + +const expectParsedStatement = ({ parseSrc }: Context) => ( + src: string, + options = {} +) => { + const program = `void main() {/* start */${src}/* end */}`; + const ast = parseSrc(program, options); + const glsl = generate(ast); + if (glsl !== program) { + inspect(ast.program[0]); + // @ts-ignore + expect(glsl.match(middle)[1]).toBe(src); + } +}; + +const parseStatement = ({ parseSrc }: Context) => ( + src: string, + options: ParserOptions = {} +) => { + const program = `void main() {${src}}`; + return parseSrc(program, options); +}; + +const expectParsedProgram = ({ parseSrc }: Context) => ( + src: string, + options?: ParserOptions +) => { + const ast = parseSrc(src, options); + const glsl = generate(ast); + if (glsl !== src) { + inspect(ast); + expect(glsl).toBe(src); + } +}; diff --git a/src/parser/utils.ts b/src/parser/utils.ts index f7b0035..6a5f896 100644 --- a/src/parser/utils.ts +++ b/src/parser/utils.ts @@ -1,4 +1,5 @@ -import type { AstNode, Scope } from '../ast'; +import type { AstNode } from '../ast'; +import { Scope } from './scope'; export const renameBindings = ( scope: Scope, @@ -10,24 +11,17 @@ export const renameBindings = ( node.identifier.identifier = mangle(node.identifier.identifier, node); } else if (node.type === 'identifier') { node.identifier = mangle(node.identifier, node); - } else if ( - node.type === 'parameter_declaration' && - 'identifier' in node.declaration - ) { - node.declaration.identifier.identifier = mangle( - node.declaration.identifier.identifier, - node - ); - } else if (node.type === 'interface_declarator') { - /* intentionally empty, for + } else if (node.type === 'parameter_declaration' && node.identifier) { + node.identifier.identifier = mangle(node.identifier.identifier, node); + /* Ignore case of: layout(std140,column_major) uniform; uniform Material { uniform vec2 prop; } */ - } else { - console.log(node); + } else if (node.type !== 'interface_declarator') { + console.warn('Unknown binding node', node); throw new Error(`Binding for type ${node.type} not recognized`); } }); @@ -40,21 +34,11 @@ export const renameTypes = ( ) => { Object.entries(scope.types).forEach(([name, type]) => { type.references.forEach((node) => { - if (node.type === 'struct') { - node.typeName.identifier = mangle(node.typeName.identifier, node); - } else if (node.type === 'identifier') { + if (node.type === 'type_name') { node.identifier = mangle(node.identifier, node); - } else if ( - node.type === 'function_call' && - 'specifier' in node.identifier - ) { - node.identifier.specifier.identifier = mangle( - node.identifier.specifier.identifier, - node - ); } else { - console.log(node); - throw new Error(`Binding for type ${node.type} not recognized`); + console.warn('Unknown type node', node); + throw new Error(`Type ${node.type} not recognized`); } }); }); @@ -64,35 +48,44 @@ export const renameFunctions = ( scope: Scope, mangle: (name: string, node: AstNode) => string ) => { - Object.entries(scope.functions).forEach(([name, binding]) => { - binding.references.forEach((node) => { - if (node.type === 'function') { - node['prototype'].header.name.identifier = mangle( - node['prototype'].header.name.identifier, - node - ); - } else if ( - node.type === 'function_call' && - node.identifier.type === 'postfix' - ) { - node.identifier.expression.identifier.specifier.identifier = mangle( - node.identifier.expression.identifier.specifier.identifier, - node - ); - } else if ( - node.type === 'function_call' && - 'specifier' in node.identifier - ) { - node.identifier.specifier.identifier = mangle( - node.identifier.specifier.identifier, - node - ); - // Structs type names also become constructors. However, their renaming is - // handled by bindings - } else if (node.type !== 'struct') { - console.log(node); - throw new Error(`Function for type ${node.type} not recognized`); - } + Object.entries(scope.functions).forEach(([fnName, overloads]) => { + Object.entries(overloads).forEach(([signature, overload]) => { + overload.references.forEach((node) => { + if (node.type === 'function') { + node['prototype'].header.name.identifier = mangle( + node['prototype'].header.name.identifier, + node + ); + } else if ( + node.type === 'function_call' && + node.identifier.type === 'postfix' + ) { + // @ts-ignore + const specifier = node.identifier.expression.identifier.specifier; + if (specifier) { + specifier.identifier = mangle(specifier.identifier, node); + } else { + console.warn('Unknown function node to rename', node); + throw new Error( + `Function specifier type ${node.type} not recognized` + ); + } + } else if ( + node.type === 'function_call' && + 'specifier' in node.identifier && + 'identifier' in node.identifier.specifier + ) { + node.identifier.specifier.identifier = mangle( + node.identifier.specifier.identifier, + node + ); + } else { + console.warn('Unknown function node to rename', node); + throw new Error(`Function for type ${node.type} not recognized`); + } + }); }); }); }; + +export const xor = (a: any, b: any): boolean => (a || b) && !(a && b); diff --git a/src/preprocessor/preprocessor.ts b/src/preprocessor/preprocessor.ts index c84709d..7321fa9 100644 --- a/src/preprocessor/preprocessor.ts +++ b/src/preprocessor/preprocessor.ts @@ -1,4 +1,4 @@ -import { visit, Path, NodeVisitor, AstNode } from '../ast'; +import { NodeVisitor, Path, visit } from '../ast/visit'; import { PreprocessorAstNode, PreprocessorConditionalNode, @@ -129,11 +129,13 @@ type NodeEvaluator = ( visit: (node: PreprocessorAstNode) => any ) => any; -export type NodeEvaluators = { - [NodeType in PreprocessorAstNode['type']]: NodeEvaluator< - Extract - >; -}; +export type NodeEvaluators = Partial< + { + [NodeType in PreprocessorAstNode['type']]: NodeEvaluator< + Extract + >; + } +>; const evaluate = (ast: PreprocessorAstNode, evaluators: NodeEvaluators) => { const visit = (node: PreprocessorAstNode) => { @@ -141,6 +143,8 @@ const evaluate = (ast: PreprocessorAstNode, evaluators: NodeEvaluators) => { if (!evaluator) { throw new Error(`No evaluate() evaluator for ${node.type}`); } + + // I can't figure out why evalutor has node type never here // @ts-ignore return evaluator(node, visit); }; @@ -275,10 +279,8 @@ const expandInExpressions = ( ...expressions: PreprocessorAstNode[] ) => { expressions.filter(identity).forEach((expression) => { - // @ts-ignore - visit(expression, { + visitPreprocessedAst(expression, { unary_defined: { - // @ts-ignore enter: (path) => { path.skip(); }, @@ -304,7 +306,6 @@ const evaluateIfPart = (macros: Macros, ifPart: PreprocessorAstNode) => { // TODO: Are all of these operators equivalent between javascript and GLSL? const evaluteExpression = (node: PreprocessorAstNode, macros: Macros) => - // @ts-ignore evaluate(node, { // TODO: Handle non-base-10 numbers. Should these be parsed in the peg grammar? int_constant: (node) => parseInt(node.token, 10), @@ -407,7 +408,9 @@ const evaluteExpression = (node: PreprocessorAstNode, macros: Macros) => }, }); -const shouldPreserve = (preserve: NodePreservers = {}) => (path: Path) => { +const shouldPreserve = (preserve: NodePreservers = {}) => ( + path: PathOverride +) => { const test = preserve?.[path.node.type]; return typeof test === 'function' ? test(path) : test; }; @@ -418,8 +421,7 @@ const shouldPreserve = (preserve: NodePreservers = {}) => (path: Path) => { // visitor/evaluator/path pattern. I took a stab at it but it become tricky to // track all the nested generics. Instead, I hack re-cast the visit function // here, which at least gives some minor type safety. -// @ts-ignore -const visitPreprocessedAst = visit as ( +type VisitorOverride = ( ast: PreprocessorAstNode | PreprocessorProgram, visitors: { [NodeType in PreprocessorAstNode['type']]?: NodeVisitor< @@ -428,6 +430,27 @@ const visitPreprocessedAst = visit as ( } ) => void; +// @ts-ignore +const visitPreprocessedAst = visit as VisitorOverride; + +type PathOverride = { + node: NodeType; + parent: PreprocessorAstNode | undefined; + parentPath: Path | undefined; + key: string | undefined; + index: number | undefined; + skip: () => void; + remove: () => void; + replaceWith: (replacer: PreprocessorAstNode) => void; + findParent: (test: (p: Path) => boolean) => Path | undefined; + + skipped?: boolean; + removed?: boolean; + replaced?: any; +}; +const convertPath = (p: Path) => + (p as unknown) as PathOverride; + /** * Perform the preprocessing logic, aka the "preprocessing" phase of the compiler. * Expand macros, evaluate conditionals, etc @@ -437,13 +460,16 @@ const visitPreprocessedAst = visit as ( * TODO: Handle __LINE__ and other constants. */ -export type NodePreservers = { [nodeType: string]: (path: any) => boolean }; +export type NodePreservers = { + [nodeType: string]: (path: PathOverride) => boolean; +}; export type PreprocessorOptions = { defines?: { [definitionName: string]: object }; preserve?: NodePreservers; preserveComments?: boolean; stopOnError?: boolean; + // ignoreMacro?: boolean; }; const preprocessAst = ( @@ -455,14 +481,15 @@ const preprocessAst = ( {} ); // const defineValues = { ...options.defines }; - // @ts-ignore - const { preserve, ignoreMacro } = options; + + // const { preserve, ignoreMacro } = options; + const { preserve } = options; const preserveNode = shouldPreserve(preserve); visitPreprocessedAst(program, { conditional: { - // @ts-ignore - enter: (path) => { + enter: (initialPath) => { + const path = convertPath(initialPath); const { node } = path; // TODO: Determining if we need to handle edge case conditionals here if (preserveNode(path)) { @@ -483,9 +510,7 @@ const preprocessAst = ( if (evaluateIfPart(macros, node.ifPart)) { // Yuck! So much type casting in this file - path.replaceWith( - (node as PreprocessorConditionalNode).ifPart.body as AstNode - ); + path.replaceWith(node.ifPart.body); // Keeping this commented out block in case I can find a way to // conditionally evaluate shaders // path.replaceWith({ @@ -501,12 +526,12 @@ const preprocessAst = ( res || (evaluteExpression(elif.expression, macros) && // path/visit hack to remove type error - (path.replaceWith(elif.body as AstNode), true)), + (path.replaceWith(elif.body as PreprocessorAstNode), true)), false ); if (!elseBranchHit) { if (node.elsePart) { - path.replaceWith(node.elsePart.body as AstNode); + path.replaceWith(node.elsePart.body as PreprocessorAstNode); } else { path.remove(); } @@ -515,12 +540,14 @@ const preprocessAst = ( }, }, text: { - enter: (path) => { + enter: (initialPath) => { + const path = convertPath(initialPath); path.node.text = expandMacros(path.node.text, macros); }, }, define_arguments: { - enter: (path) => { + enter: (initialPath) => { + const path = convertPath(initialPath); const { identifier: { identifier }, body, @@ -532,7 +559,8 @@ const preprocessAst = ( }, }, define: { - enter: (path) => { + enter: (initialPath) => { + const path = convertPath(initialPath); const { identifier: { identifier }, body, @@ -549,13 +577,15 @@ const preprocessAst = ( }, }, undef: { - enter: (path) => { + enter: (initialPath) => { + const path = convertPath(initialPath); delete macros[path.node.identifier.identifier]; !preserveNode(path) && path.remove(); }, }, error: { - enter: (path) => { + enter: (initialPath) => { + const path = convertPath(initialPath); if (options.stopOnError) { throw new Error(path.node.message); } @@ -563,23 +593,27 @@ const preprocessAst = ( }, }, pragma: { - enter: (path) => { + enter: (initialPath) => { + const path = convertPath(initialPath); !preserveNode(path) && path.remove(); }, }, version: { - enter: (path) => { + enter: (initialPath) => { + const path = convertPath(initialPath); !preserveNode(path) && path.remove(); }, }, extension: { - enter: (path) => { + enter: (initialPath) => { + const path = convertPath(initialPath); !preserveNode(path) && path.remove(); }, }, // TODO: Causes a failure line: { - enter: (path) => { + enter: (initialPath) => { + const path = convertPath(initialPath); !preserveNode(path) && path.remove(); }, }, diff --git a/tsconfig.json b/tsconfig.json index ce2b873..fc25992 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -1,27 +1,12 @@ { "compilerOptions": { - /* Visit https://aka.ms/tsconfig.json to read more about this file */ - - /* Projects */ - // "incremental": true, /* Enable incremental compilation */ - // "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */ - // "tsBuildInfoFile": "./", /* Specify the folder for .tsbuildinfo incremental compilation files. */ - // "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects */ - // "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */ - // "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */ - - /* Language and Environment */ - "target": "es5", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */ - // "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */ - // "jsx": "preserve", /* Specify what JSX code is generated. */ - // "experimentalDecorators": true, /* Enable experimental support for TC39 stage 2 draft decorators. */ - // "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */ - // "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h' */ - // "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */ - // "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using `jsx: react-jsx*`.` */ - // "reactNamespace": "", /* Specify the object invoked for `createElement`. This only applies when targeting `react` JSX emit. */ - // "noLib": true, /* Disable including any library files, including the default lib.d.ts. */ - // "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */ + "incremental": true, + "target": "es5", + + // This is for VSCode. Without this line, VSCode's Typescript server + // includes *DOM* types in typechecks, and complains that location() is + // window.location, when in fact it's peggy's location() function. + "lib": ["ESNext"], /* Modules */ "module": "commonjs", /* Specify what module code is generated. */ From cd5257c58b15d7bb7ff77f4fd6efc8c164f4c50d Mon Sep 17 00:00:00 2001 From: Andrew Ray Date: Sat, 8 Jul 2023 12:01:17 -0700 Subject: [PATCH 2/8] More type narrowing --- .gitignore | 1 + package.json | 2 +- src/ast/ast-types.ts | 72 +++++++++++++++++++++++++++----------------- 3 files changed, 46 insertions(+), 29 deletions(-) diff --git a/.gitignore b/.gitignore index d6f31e4..2da1884 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ dist .DS_Store tmp src/parser/parser.js +tsconfig.tsbuildinfo \ No newline at end of file diff --git a/package.json b/package.json index df1de0c..78f1d91 100644 --- a/package.json +++ b/package.json @@ -3,7 +3,7 @@ "engines": { "node": ">=16" }, - "version": "2.0.0-beta.0", + "version": "2.0.0-beta.1", "description": "A GLSL ES 1.0 and 3.0 parser and preprocessor that can preserve whitespace and comments", "scripts": { "prepare": "npm run build && ./prepublish.sh", diff --git a/src/ast/ast-types.ts b/src/ast/ast-types.ts index 95d1d55..adbcf6e 100644 --- a/src/ast/ast-types.ts +++ b/src/ast/ast-types.ts @@ -6,6 +6,7 @@ import { Scope } from '../parser/scope'; +// The overall result of parsing, which incldues the AST and scopes export interface Program { type: 'program'; program: AstNode[]; @@ -14,6 +15,7 @@ export interface Program { wsEnd?: string; } +// Optional source code location info, set by { includeLocation: true } export type LocationInfo = { offset: number; line: number; column: number }; export type LocationObject = { @@ -27,9 +29,18 @@ export interface BaseNode { type Whitespace = string | string[]; -export interface LiteralNode extends BaseNode { +// Types reused across nodes +export type TypeQualifiers = ( + | KeywordNode + | SubroutineQualifierNode + | LayoutQualifierNode +)[]; +export type Semicolon = LiteralNode<';'>; +export type Comma = LiteralNode<','>; + +export interface LiteralNode extends BaseNode { type: 'literal'; - literal: string; + literal: Literal; whitespace: Whitespace; } @@ -81,7 +92,7 @@ export interface BoolConstantNode extends BaseNode { export interface BreakStatementNode extends BaseNode { type: 'break_statement'; break: KeywordNode; - semi: LiteralNode; + semi: Semicolon; } export interface CompoundStatementNode extends BaseNode { @@ -93,22 +104,27 @@ export interface CompoundStatementNode extends BaseNode { export interface ConditionExpressionNode extends BaseNode { type: 'condition_expression'; - specified_type: AstNode; + specified_type: FullySpecifiedTypeNode; identifier: IdentifierNode; operator: LiteralNode; - initializer: AstNode; + initializer: InitializerListNode; } export interface ContinueStatementNode extends BaseNode { type: 'continue_statement'; continue: KeywordNode; - semi: LiteralNode; + semi: LiteralNode<';'>; } export interface DeclarationStatementNode extends BaseNode { type: 'declaration_statement'; - declaration: AstNode; - semi: LiteralNode; + declaration: + | PrecisionNode + | InterfaceDeclaratorNode + | QualifierDeclaratorNode + | DeclaratorListNode + | FunctionHeaderNode; + semi: LiteralNode<';'>; } export interface DeclarationNode extends BaseNode { @@ -121,9 +137,9 @@ export interface DeclarationNode extends BaseNode { export interface DeclaratorListNode extends BaseNode { type: 'declarator_list'; - specified_type: AstNode; + specified_type: FullySpecifiedTypeNode; declarations: AstNode[]; - commas: LiteralNode[]; + commas: Comma[]; } export interface DefaultCaseNode extends BaseNode { @@ -136,7 +152,7 @@ export interface DefaultCaseNode extends BaseNode { export interface DiscardStatementNode extends BaseNode { type: 'discard_statement'; discard: KeywordNode; - semi: LiteralNode; + semi: Semicolon; } export interface DoStatementNode extends BaseNode { @@ -147,7 +163,7 @@ export interface DoStatementNode extends BaseNode { lp: LiteralNode; expression: AstNode; rp: LiteralNode; - semi: LiteralNode; + semi: Semicolon; } export interface DoubleConstantNode extends BaseNode { @@ -159,7 +175,7 @@ export interface DoubleConstantNode extends BaseNode { export interface ExpressionStatementNode extends BaseNode { type: 'expression_statement'; expression: AstNode; - semi: LiteralNode; + semi: Semicolon; } export interface FieldSelectionNode extends BaseNode { @@ -191,16 +207,16 @@ export interface ForStatementNode extends BaseNode { body: CompoundStatementNode | SimpleStatement; lp: LiteralNode; init: AstNode; - initSemi: LiteralNode; + initSemi: Semicolon; condition: ConditionExpressionNode; - conditionSemi: LiteralNode; + conditionSemi: Semicolon; operation: AstNode; rp: LiteralNode; } export interface FullySpecifiedTypeNode extends BaseNode { type: 'fully_specified_type'; - qualifiers: AstNode[]; + qualifiers?: TypeQualifiers; specifier: TypeSpecifierNode; } @@ -229,7 +245,7 @@ export interface FunctionPrototypeNode extends BaseNode { type: 'function_prototype'; header: FunctionHeaderNode; parameters: ParameterDeclarationNode[]; - commas: LiteralNode[]; + commas: Comma[]; rp: LiteralNode; } @@ -254,7 +270,7 @@ export interface InitializerListNode extends BaseNode { type: 'initializer_list'; lb: LiteralNode; initializers: AstNode[]; - commas: LiteralNode[]; + commas: Comma[]; rb: LiteralNode; } @@ -266,8 +282,8 @@ export interface IntConstantNode extends BaseNode { export interface InterfaceDeclaratorNode extends BaseNode { type: 'interface_declarator'; - qualifiers: AstNode; - interface_type: AstNode; + qualifiers: TypeQualifiers; + interface_type: IdentifierNode; lp: LiteralNode; declarations: AstNode; rp: LiteralNode; @@ -285,8 +301,8 @@ export interface LayoutQualifierNode extends BaseNode { type: 'layout_qualifier'; layout: KeywordNode; lp: LiteralNode; - qualifiers: AstNode[]; - commas: LiteralNode[]; + qualifiers: LayoutQualifierIdNode[]; + commas: Comma[]; rp: LiteralNode; } @@ -319,9 +335,9 @@ export interface PreprocessorNode extends BaseNode { export interface QualifierDeclaratorNode extends BaseNode { type: 'qualifier_declarator'; - qualifiers: AstNode[]; + qualifiers: TypeQualifiers; declarations: IdentifierNode[]; - commas: LiteralNode[]; + commas: Comma[]; } export interface QuantifiedIdentifierNode extends BaseNode { @@ -341,7 +357,7 @@ export interface ReturnStatementNode extends BaseNode { type: 'return_statement'; return: KeywordNode; expression: AstNode; - semi: LiteralNode; + semi: Semicolon; } export interface StructNode extends BaseNode { @@ -356,14 +372,14 @@ export interface StructNode extends BaseNode { export interface StructDeclarationNode extends BaseNode { type: 'struct_declaration'; declaration: StructDeclaratorNode; - semi: LiteralNode; + semi: Semicolon; } export interface StructDeclaratorNode extends BaseNode { type: 'struct_declarator'; specified_type: FullySpecifiedTypeNode; declarations: QuantifiedIdentifierNode[]; - commas: LiteralNode[]; + commas: Comma[]; } export interface SubroutineQualifierNode extends BaseNode { @@ -371,7 +387,7 @@ export interface SubroutineQualifierNode extends BaseNode { subroutine: KeywordNode; lp: LiteralNode; type_names: TypeNameNode[]; - commas: LiteralNode[]; + commas: Comma[]; rp: LiteralNode; } From 06200f3cd586a8b7f56e5caf76d153f2ac117b0f Mon Sep 17 00:00:00 2001 From: Andrew Ray Date: Sat, 8 Jul 2023 15:03:16 -0700 Subject: [PATCH 3/8] Some additional type narrowing --- package.json | 2 +- src/ast/ast-types.ts | 186 +++++++++++++++++++++++++------------------ src/ast/ast.test.ts | 2 +- 3 files changed, 111 insertions(+), 79 deletions(-) diff --git a/package.json b/package.json index 78f1d91..e5d3196 100644 --- a/package.json +++ b/package.json @@ -3,7 +3,7 @@ "engines": { "node": ">=16" }, - "version": "2.0.0-beta.1", + "version": "2.0.0-beta.2", "description": "A GLSL ES 1.0 and 3.0 parser and preprocessor that can preserve whitespace and comments", "scripts": { "prepare": "npm run build && ./prepublish.sh", diff --git a/src/ast/ast-types.ts b/src/ast/ast-types.ts index adbcf6e..b1f5bb1 100644 --- a/src/ast/ast-types.ts +++ b/src/ast/ast-types.ts @@ -9,7 +9,7 @@ import { Scope } from '../parser/scope'; // The overall result of parsing, which incldues the AST and scopes export interface Program { type: 'program'; - program: AstNode[]; + program: (PreprocessorNode | DeclarationStatementNode | FunctionNode)[]; scopes: Scope[]; wsStart?: string; wsEnd?: string; @@ -38,15 +38,45 @@ export type TypeQualifiers = ( export type Semicolon = LiteralNode<';'>; export type Comma = LiteralNode<','>; +// This is my best guess at what can be in an expression. It's probably wrong! +export type Expression = + | LiteralNode + | KeywordNode + | IdentifierNode + | TypeNameNode + | ArraySpecifierNode + | AssignmentNode + | BinaryNode + | BoolConstantNode + | ConditionExpressionNode + | DefaultCaseNode + | DoubleConstantNode + | FieldSelectionNode + | FloatConstantNode + | FullySpecifiedTypeNode + | FunctionCallNode + | GroupNode + | InitializerListNode + | IntConstantNode + | PostfixNode + | PreprocessorNode + | QuantifiedIdentifierNode + | QuantifierNode + | SwitchCaseNode + | TernaryNode + | TypeSpecifierNode + | UintConstantNode + | UnaryNode; + export interface LiteralNode extends BaseNode { type: 'literal'; literal: Literal; whitespace: Whitespace; } -export interface KeywordNode extends BaseNode { +export interface KeywordNode extends BaseNode { type: 'keyword'; - token: string; + token: Token; whitespace: Whitespace; } @@ -64,15 +94,17 @@ export interface TypeNameNode extends BaseNode { export interface ArraySpecifierNode extends BaseNode { type: 'array_specifier'; - lb: LiteralNode; - expression: AstNode; - rb: LiteralNode; + lb: LiteralNode<'['>; + expression: Expression; + rb: LiteralNode<']'>; } export interface AssignmentNode extends BaseNode { type: 'assignment'; left: AstNode; - operator: LiteralNode; + operator: LiteralNode< + '=' | '*=' | '/=' | '%=' | '+=' | '-=' | '<<="' | '>>=' | '&=' | '^=' | '|=' + >; right: AstNode; } @@ -91,15 +123,15 @@ export interface BoolConstantNode extends BaseNode { export interface BreakStatementNode extends BaseNode { type: 'break_statement'; - break: KeywordNode; + break: KeywordNode<'break'>; semi: Semicolon; } export interface CompoundStatementNode extends BaseNode { type: 'compound_statement'; - lb: LiteralNode; + lb: LiteralNode<'['>; statements: AstNode[]; - rb: LiteralNode; + rb: LiteralNode<']'>; } export interface ConditionExpressionNode extends BaseNode { @@ -112,8 +144,8 @@ export interface ConditionExpressionNode extends BaseNode { export interface ContinueStatementNode extends BaseNode { type: 'continue_statement'; - continue: KeywordNode; - semi: LiteralNode<';'>; + continue: KeywordNode<'continue'>; + semi: Semicolon; } export interface DeclarationStatementNode extends BaseNode { @@ -124,7 +156,7 @@ export interface DeclarationStatementNode extends BaseNode { | QualifierDeclaratorNode | DeclaratorListNode | FunctionHeaderNode; - semi: LiteralNode<';'>; + semi: Semicolon; } export interface DeclarationNode extends BaseNode { @@ -138,31 +170,31 @@ export interface DeclarationNode extends BaseNode { export interface DeclaratorListNode extends BaseNode { type: 'declarator_list'; specified_type: FullySpecifiedTypeNode; - declarations: AstNode[]; + declarations: DeclarationNode[]; commas: Comma[]; } export interface DefaultCaseNode extends BaseNode { type: 'default_case'; statements: []; - default: AstNode; - colon: LiteralNode; + default: KeywordNode<'default'>; + colon: LiteralNode<':'>; } export interface DiscardStatementNode extends BaseNode { type: 'discard_statement'; - discard: KeywordNode; + discard: KeywordNode<'discard'>; semi: Semicolon; } export interface DoStatementNode extends BaseNode { type: 'do_statement'; - do: KeywordNode; + do: KeywordNode<'do'>; body: AstNode; - while: KeywordNode; - lp: LiteralNode; - expression: AstNode; - rp: LiteralNode; + while: KeywordNode<'while'>; + lp: LiteralNode<'('>; + expression: Expression; + rp: LiteralNode<')'>; semi: Semicolon; } @@ -174,7 +206,7 @@ export interface DoubleConstantNode extends BaseNode { export interface ExpressionStatementNode extends BaseNode { type: 'expression_statement'; - expression: AstNode; + expression: Expression; semi: Semicolon; } @@ -203,15 +235,15 @@ type SimpleStatement = export interface ForStatementNode extends BaseNode { type: 'for_statement'; - for: KeywordNode; + for: KeywordNode<'for'>; body: CompoundStatementNode | SimpleStatement; - lp: LiteralNode; + lp: LiteralNode<'('>; init: AstNode; initSemi: Semicolon; condition: ConditionExpressionNode; conditionSemi: Semicolon; operation: AstNode; - rp: LiteralNode; + rp: LiteralNode<')'>; } export interface FullySpecifiedTypeNode extends BaseNode { @@ -228,17 +260,17 @@ export interface FunctionNode extends BaseNode { export interface FunctionCallNode extends BaseNode { type: 'function_call'; - identifier: AstNode; - lp: LiteralNode; + identifier: TypeSpecifierNode | PostfixNode; + lp: LiteralNode<'('>; args: AstNode[]; - rp: LiteralNode; + rp: LiteralNode<')'>; } export interface FunctionHeaderNode extends BaseNode { type: 'function_header'; returnType: FullySpecifiedTypeNode; name: IdentifierNode; - lp: LiteralNode; + lp: LiteralNode<'('>; } export interface FunctionPrototypeNode extends BaseNode { @@ -246,32 +278,32 @@ export interface FunctionPrototypeNode extends BaseNode { header: FunctionHeaderNode; parameters: ParameterDeclarationNode[]; commas: Comma[]; - rp: LiteralNode; + rp: LiteralNode<')'>; } export interface GroupNode extends BaseNode { type: 'group'; - lp: LiteralNode; - expression: AstNode; - rp: LiteralNode; + lp: LiteralNode<'('>; + expression: Expression; + rp: LiteralNode<')'>; } export interface IfStatementNode extends BaseNode { type: 'if_statement'; - if: KeywordNode; + if: KeywordNode<'if'>; body: AstNode; - lp: LiteralNode; + lp: LiteralNode<'('>; condition: AstNode; - rp: LiteralNode; + rp: LiteralNode<')'>; else: AstNode[]; } export interface InitializerListNode extends BaseNode { type: 'initializer_list'; - lb: LiteralNode; + lb: LiteralNode<'['>; initializers: AstNode[]; commas: Comma[]; - rb: LiteralNode; + rb: LiteralNode<']'>; } export interface IntConstantNode extends BaseNode { @@ -284,9 +316,9 @@ export interface InterfaceDeclaratorNode extends BaseNode { type: 'interface_declarator'; qualifiers: TypeQualifiers; interface_type: IdentifierNode; - lp: LiteralNode; + lp: LiteralNode<'('>; declarations: AstNode; - rp: LiteralNode; + rp: LiteralNode<')'>; identifier?: QuantifiedIdentifierNode; } @@ -294,21 +326,21 @@ export interface LayoutQualifierIdNode extends BaseNode { type: 'layout_qualifier_id'; identifier: IdentifierNode; operator: LiteralNode; - expression: AstNode; + expression: Expression; } export interface LayoutQualifierNode extends BaseNode { type: 'layout_qualifier'; - layout: KeywordNode; - lp: LiteralNode; + layout: KeywordNode<'layout'>; + lp: LiteralNode<'('>; qualifiers: LayoutQualifierIdNode[]; commas: Comma[]; - rp: LiteralNode; + rp: LiteralNode<')'>; } export interface ParameterDeclarationNode extends BaseNode { type: 'parameter_declaration'; - qualifier: AstNode[]; + qualifier: KeywordNode[]; specifier: TypeSpecifierNode; identifier: IdentifierNode; quantifier: ArraySpecifierNode[]; @@ -316,14 +348,14 @@ export interface ParameterDeclarationNode extends BaseNode { export interface PostfixNode extends BaseNode { type: 'postfix'; - expression: AstNode; + expression: Expression; postfix: AstNode; } export interface PrecisionNode extends BaseNode { type: 'precision'; - prefix: KeywordNode; - qualifier: KeywordNode; + prefix: KeywordNode<'prefix'>; + qualifier: KeywordNode<'highp' | 'mediump' | 'lowp'>; specifier: TypeSpecifierNode; } @@ -348,24 +380,24 @@ export interface QuantifiedIdentifierNode extends BaseNode { export interface QuantifierNode extends BaseNode { type: 'quantifier'; - lb: LiteralNode; - expression: AstNode; - rb: LiteralNode; + lb: LiteralNode<'['>; + expression: Expression; + rb: LiteralNode<']'>; } export interface ReturnStatementNode extends BaseNode { type: 'return_statement'; - return: KeywordNode; - expression: AstNode; + return: KeywordNode<'return'>; + expression: Expression; semi: Semicolon; } export interface StructNode extends BaseNode { type: 'struct'; - lb: LiteralNode; - declarations: AstNode[]; - rb: LiteralNode; - struct: KeywordNode; + lb: LiteralNode<'['>; + declarations: StructDeclarationNode[]; + rb: LiteralNode<']'>; + struct: KeywordNode<'struct'>; typeName: TypeNameNode; } @@ -384,39 +416,39 @@ export interface StructDeclaratorNode extends BaseNode { export interface SubroutineQualifierNode extends BaseNode { type: 'subroutine_qualifier'; - subroutine: KeywordNode; - lp: LiteralNode; + subroutine: KeywordNode<'subroutine'>; + lp: LiteralNode<'('>; type_names: TypeNameNode[]; commas: Comma[]; - rp: LiteralNode; + rp: LiteralNode<')'>; } export interface SwitchCaseNode extends BaseNode { type: 'switch_case'; statements: []; - case: AstNode; + case: KeywordNode<'case'>; test: AstNode; - colon: LiteralNode; + colon: LiteralNode<':'>; } export interface SwitchStatementNode extends BaseNode { type: 'switch_statement'; - switch: KeywordNode; - lp: LiteralNode; - expression: AstNode; - rp: LiteralNode; - lb: LiteralNode; + switch: KeywordNode<'switch'>; + lp: LiteralNode<'('>; + expression: Expression; + rp: LiteralNode<')'>; + lb: LiteralNode<'['>; cases: AstNode[]; - rb: LiteralNode; + rb: LiteralNode<']'>; } export interface TernaryNode extends BaseNode { type: 'ternary'; - expression: AstNode; - question: LiteralNode; + expression: Expression; + question: LiteralNode<'?'>; left: AstNode; right: AstNode; - colon: LiteralNode; + colon: LiteralNode<':'>; } export interface TypeSpecifierNode extends BaseNode { @@ -433,16 +465,16 @@ export interface UintConstantNode extends BaseNode { export interface UnaryNode extends BaseNode { type: 'unary'; - operator: LiteralNode; - expression: AstNode; + operator: LiteralNode<'++' | '--' | '+' | '-' | '!' | '~'>; + expression: Expression; } export interface WhileStatementNode extends BaseNode { type: 'while_statement'; - while: KeywordNode; - lp: LiteralNode; + while: KeywordNode<'while'>; + lp: LiteralNode<'('>; condition: AstNode; - rp: LiteralNode; + rp: LiteralNode<')'>; body: AstNode; } diff --git a/src/ast/ast.test.ts b/src/ast/ast.test.ts index c73d948..593f694 100644 --- a/src/ast/ast.test.ts +++ b/src/ast/ast.test.ts @@ -1,7 +1,7 @@ import { AstNode, BinaryNode, IdentifierNode, LiteralNode } from './ast-types'; import { visit } from './visit'; -const literal = (literal: string): LiteralNode => ({ +const literal = (literal: T): LiteralNode => ({ type: 'literal', literal, whitespace: '', From 33f5070aa547e2da71f9cd2781668aa1ea4cc888 Mon Sep 17 00:00:00 2001 From: Andrew Ray Date: Sat, 8 Jul 2023 16:20:31 -0700 Subject: [PATCH 4/8] Changing operator -> equal as that is only valid case, and making initializer optional --- package.json | 2 +- src/ast/ast-types.ts | 4 ++-- src/parser/generator.ts | 2 +- src/parser/glsl-grammar.pegjs | 18 +++++++++--------- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/package.json b/package.json index e5d3196..ae9c97d 100644 --- a/package.json +++ b/package.json @@ -3,7 +3,7 @@ "engines": { "node": ">=16" }, - "version": "2.0.0-beta.2", + "version": "2.0.0-beta.3", "description": "A GLSL ES 1.0 and 3.0 parser and preprocessor that can preserve whitespace and comments", "scripts": { "prepare": "npm run build && ./prepublish.sh", diff --git a/src/ast/ast-types.ts b/src/ast/ast-types.ts index b1f5bb1..93d8247 100644 --- a/src/ast/ast-types.ts +++ b/src/ast/ast-types.ts @@ -163,8 +163,8 @@ export interface DeclarationNode extends BaseNode { type: 'declaration'; identifier: IdentifierNode; quantifier: ArraySpecifierNode[]; - operator: LiteralNode; - initializer: AstNode; + equal?: LiteralNode<'='>; + initializer?: AstNode; } export interface DeclaratorListNode extends BaseNode { diff --git a/src/parser/generator.ts b/src/parser/generator.ts index 66d2cee..8b0622c 100644 --- a/src/parser/generator.ts +++ b/src/parser/generator.ts @@ -85,7 +85,7 @@ const generators: NodeGenerators = { declaration: (node) => generate(node.identifier) + generate(node.quantifier) + - generate(node.operator) + + generate(node.equal) + generate(node.initializer), declarator_list: (node) => generate(node.specified_type) + diff --git a/src/parser/glsl-grammar.pegjs b/src/parser/glsl-grammar.pegjs index 898704e..3a9bd0d 100644 --- a/src/parser/glsl-grammar.pegjs +++ b/src/parser/glsl-grammar.pegjs @@ -835,10 +835,10 @@ init_declarator_list_statement )* semi:SEMICOLON { const declarations = [ - head.declaration, ...tail.map(t => t[1]) + head.partial.declaration, ...tail.map(t => t[1]) ].filter(decl => !!decl.identifier); - addTypeIfFound(context.scope, head.specified_type); + addTypeIfFound(context.scope, head.partial.specified_type); // initial_declaration also adds bindings to support "int a = 1, b = a;" createBindings(context.scope, ...tail.map(t => t[1]).map(decl => [decl.identifier.identifier, decl])); @@ -847,7 +847,7 @@ init_declarator_list_statement node: node( 'declarator_list', { - specified_type: head.specified_type, + specified_type: head.partial.specified_type, declarations, commas: tail.map(t => t[0]) } @@ -862,10 +862,10 @@ subsequent_declaration suffix:( EQUAL initializer )? { - const [operator, initializer] = suffix || []; + const [equal, initializer] = suffix || []; return node( 'declaration', - { identifier, quantifier, operator, initializer } + { identifier, quantifier, equal, initializer } ); } @@ -881,7 +881,7 @@ initial_declaration // No gaurantee of a suffix because fully_specified_type contains a // type_specifier which includes structs and type_names const [identifier, quantifier, suffix_tail] = suffix || []; - const [operator, initializer] = suffix_tail || []; + const [equal, initializer] = suffix_tail || []; // This production is used as part of init_declarator_list, where we also // add bindings, but I add bindings here to support "int a = 1, b = a;" @@ -891,13 +891,13 @@ initial_declaration // Break out the specified type so it can be grouped into the // declarator_list - return { + return partial({ declaration: node( 'declaration', - { identifier, quantifier, operator, initializer } + { identifier, quantifier, equal, initializer } ), specified_type - }; + }); } fully_specified_type From b33af1f55611ca96942559f9ebfc0bfe3e796958 Mon Sep 17 00:00:00 2001 From: Andrew Ray Date: Sat, 8 Jul 2023 17:31:59 -0700 Subject: [PATCH 5/8] Type narrowing on function scope index --- package.json | 2 +- src/parser/scope.ts | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/package.json b/package.json index ae9c97d..49c328e 100644 --- a/package.json +++ b/package.json @@ -3,7 +3,7 @@ "engines": { "node": ">=16" }, - "version": "2.0.0-beta.3", + "version": "2.0.0-beta.4", "description": "A GLSL ES 1.0 and 3.0 parser and preprocessor that can preserve whitespace and comments", "scripts": { "prepare": "npm run build && ./prepublish.sh", diff --git a/src/parser/scope.ts b/src/parser/scope.ts index a4feaef..9955f83 100644 --- a/src/parser/scope.ts +++ b/src/parser/scope.ts @@ -27,7 +27,7 @@ export type FunctionOverloadDefinition = { returnType: string; parameterTypes: string[]; declaration?: FunctionNode; - references: AstNode[]; + references: (FunctionNode | FunctionCallNode | FunctionPrototypeNode)[]; }; export type FunctionOverloadIndex = { [signature: string]: FunctionOverloadDefinition; @@ -206,7 +206,7 @@ export const functionUseSignature = ( export const newOverloadIndex = ( returnType: string, parameterTypes: string[], - firstReference: AstNode, + firstReference: FunctionNode | FunctionCallNode | FunctionPrototypeNode, declaration?: FunctionNode ): FunctionOverloadDefinition => ({ returnType, From f53e96e6d622ecb9efd75c1b6b9b63da3fe388ad Mon Sep 17 00:00:00 2001 From: Andrew Ray Date: Sat, 8 Jul 2023 21:25:43 -0700 Subject: [PATCH 6/8] Adding extra test --- src/parser/glsl-grammar.pegjs | 5 +++++ src/parser/parse.test.ts | 36 ++++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/src/parser/glsl-grammar.pegjs b/src/parser/glsl-grammar.pegjs index 3a9bd0d..3b1626b 100644 --- a/src/parser/glsl-grammar.pegjs +++ b/src/parser/glsl-grammar.pegjs @@ -324,6 +324,11 @@ QUESTION = token:"?" _:_? { return node('literal', { literal: token, whitespace: IDENTIFIER = !keyword identifier:$([A-Za-z_] [A-Za-z_0-9]*) _:_? { return node('identifier', { identifier, whitespace: _ }); } + +// TODO: type_name is used at the end of "type_specifier_nonarray" which already +// excludes many keywords. Is there an opportunity for speed-up here by inlining +// a negative keyword predicate? And moving keywords into a Set and using & {} +// predicate? TYPE_NAME = !keyword identifier:$([A-Za-z_] [A-Za-z_0-9]*) _:_? { return node('type_name', { identifier, whitespace: _ }); } diff --git a/src/parser/parse.test.ts b/src/parser/parse.test.ts index a0eab17..6c08748 100644 --- a/src/parser/parse.test.ts +++ b/src/parser/parse.test.ts @@ -1,4 +1,5 @@ -import { buildParser } from './test-helpers'; +import { AstNode, TypeSpecifierNode, visit } from '../ast'; +import { buildParser, inspect } from './test-helpers'; let c!: ReturnType; beforeAll(() => (c = buildParser())); @@ -335,6 +336,39 @@ test('Locations with location disabled', () => { expect(ast.scopes[0].location).toBe(undefined); }); +test('built-in function names should be identified as keywords', () => { + console.warn = jest.fn(); + + const src = ` +void main() { + void x = texture2D(); +}`; + const ast = c.parseSrc(src); + + // Built-ins should not appear in scope + expect(ast.scopes[0].functions).not.toHaveProperty('texture2D'); + expect(ast.scopes[1].functions).not.toHaveProperty('texture2D'); + + let specifier: TypeSpecifierNode; + visit(ast, { + function_call: { + enter: (path) => { + inspect(path.node); + if (path.node.identifier.type === 'type_specifier') { + specifier = path.node.identifier; + } + }, + }, + }); + + // Builtins like texture2D should be recognized as a type_name since that's + // how user defined functions are treated + expect(specifier!.specifier.type).toBe('type_name'); + + // Should not warn about built in function call being undefined + expect(console.warn).not.toHaveBeenCalled(); +}); + test('Parser locations', () => { const src = `// Some comment void main() { From 2a707023bab936f6823696d834ff1a6b5d4b1034 Mon Sep 17 00:00:00 2001 From: Andrew Ray Date: Sun, 9 Jul 2023 00:53:28 -0700 Subject: [PATCH 7/8] Fixing bug where function headers should be identifiers if they are not type names --- package.json | 2 +- src/ast/ast-types.ts | 2 +- src/parser/glsl-grammar.pegjs | 105 ++++++++++++++++++++++++---------- src/parser/parse.test.ts | 13 ++--- src/parser/scope.test.ts | 11 ++++ src/parser/utils.ts | 5 ++ 6 files changed, 98 insertions(+), 40 deletions(-) diff --git a/package.json b/package.json index 49c328e..045fb22 100644 --- a/package.json +++ b/package.json @@ -3,7 +3,7 @@ "engines": { "node": ">=16" }, - "version": "2.0.0-beta.4", + "version": "2.0.0-beta.5", "description": "A GLSL ES 1.0 and 3.0 parser and preprocessor that can preserve whitespace and comments", "scripts": { "prepare": "npm run build && ./prepublish.sh", diff --git a/src/ast/ast-types.ts b/src/ast/ast-types.ts index 93d8247..850dec1 100644 --- a/src/ast/ast-types.ts +++ b/src/ast/ast-types.ts @@ -260,7 +260,7 @@ export interface FunctionNode extends BaseNode { export interface FunctionCallNode extends BaseNode { type: 'function_call'; - identifier: TypeSpecifierNode | PostfixNode; + identifier: IdentifierNode | TypeSpecifierNode | PostfixNode; lp: LiteralNode<'('>; args: AstNode[]; rp: LiteralNode<')'>; diff --git a/src/parser/glsl-grammar.pegjs b/src/parser/glsl-grammar.pegjs index 3b1626b..5594657 100644 --- a/src/parser/glsl-grammar.pegjs +++ b/src/parser/glsl-grammar.pegjs @@ -5,6 +5,8 @@ * ordering. * * Full grammar reference: https://www.khronos.org/registry/OpenGL/specs/gl/GLSLangSpec.4.40.pdf + * Other helpful resources: + * - The ANGLE GLSL Yacc grammar https://github.com/google/angle/blob/main/src/compiler/translator/glslang.y */ // Global parser definitions, shared between all parsers @@ -420,43 +422,83 @@ function_call // Identify the function name, if present. Note: The inner postfix branch // below probably means there's a discrepancy in how the postfix fn is // identified, depending on the prefix. - const fnName = + let fnIdentifier = identifier.type === 'postfix' ? identifier.expression.identifier - ? // Handles the case where the postfix is x().length() - identifier.expression.identifier.specifier.identifier - : // Handles the case where the postfix is x.length() - identifier.expression.specifier.identifier - : // Not a postfix, a normal function call. A function_call name is a - // "type_specifier" which can be "float[3](...)" or a TYPE_NAME. If - // it's a TYPE_NAME, it will have an identifier, so add it to the - // referenced scope. If it's a constructor (the "float" case) it - // won't, so this will be null - identifier.specifier.identifier; + // Handles the case where the postfix is x().length() + ? identifier.expression.identifier.specifier + // Handles the case where the postfix is x.length() + : identifier.expression.specifier + // Non-built-in-type (like "vec4") function call + : identifier.specifier; + + let fnName = fnIdentifier.identifier; const n = node('function_call', { ...identifierPartial, args: args || [], rp }); - // Scope check for function call - if( - fnName && - // You can override built-in functions like "noise", so only add "noise" - // to scope usage if it's declared by the user - (isDeclaredFunction(context.scope, fnName) || !builtIns.has(fnName)) - ) { - // Structs constructors look like function calls. If this is a struct, - // track it as such. Otherwise it becomes a function reference - if(isDeclaredType(context.scope, fnName)) { - if(identifier.type === 'type_specifier') { - addTypeReference( - context.scope, - fnName, - identifier.specifier - ); + const isDeclaredFn = isDeclaredFunction(context.scope, fnName); + const isBuiltIn = builtIns.has(fnName); + const isType = isDeclaredType(context.scope, fnName); + + // fnName will be undefined here if the identifier is a keyword + // constructor (like "vec4()"). We don't care about scope/renaming in + // these cases + if(fnName) { + /* + * This complexity is from the intentional choice of the parser to allow + * for undeclared structs and functions, combined with the fact that + * struct names can be used as function constructors. There are two + * cases where this matters: + * 1. "MyStruct()" when MyStruct isn't defined + * 2. "texture2D()" which is a built-in function call + * In the Khronos grammar, the first case is supposed to fail, because + * when it checks TYPE_NAME, it doesn't find it declared, and then it + * moves on to the second case, which is what texture2D does. In the + * Khronos grammar, POSTFIX then catches the IDENTIFIER in both cases. In + * this parser, TYPE_NAME catches it, because it's ambiguous if this is + * a type or an identifier, since we alllow undefined types. Fortunately + * this is the only place in the grammar where a TYPE_NAME and IDENTIFIER + * could be used in the same place, so we only have to handle this here. + * + * So once we define the function_call, we need to check if we really did + * hit a type name, or not, or a built in (like "texture2D()"), here + * we mutate the function header to be an identifier rather than a type. + */ + if(!isType && fnIdentifier.type === 'type_name' && (!isDeclaredFn || isBuiltIn)) { + fnIdentifier = node('identifier', { + identifier: fnIdentifier.identifier, + whitespace: fnIdentifier.whitespace + }); + if(n.identifier.type === 'postfix') { + n.identifier.expression.identifier = fnIdentifier; + } else { + n.identifier = fnIdentifier; + } + } + + // Now do the scope check + if( + // You can override built-in functions like "noise", so only add + // "noise" to scope usage if it's declared by the user + (isDeclaredFn || !isBuiltIn) + ) { + // Struct constructors look like function calls. If this is a struct, + // treat it as a type. + if(isType) { + if(identifier.type === 'type_specifier') { + addTypeReference( + context.scope, + fnName, + identifier.specifier + ); + } else { + throw new Error(`Unknown function call identifier type ${ + identifier.type + }. Please file a bug against @shaderfrog/glsl-parser and incldue your source grammar.`) + } } else { - throw new Error(`Unknown function call identifier type ${identifier.type}. Please file a bug against @shaderfrog/glsl-parser and incldue your source grammar.`) + addFunctionCallReference(context.scope, fnName, n); } - } else { - addFunctionCallReference(context.scope, fnName, n); } } @@ -493,6 +535,9 @@ function_identifier / head:type_specifier suffix:function_suffix? lp:LEFT_PAREN { return partial({ head: [head, suffix], lp }); } + // / head:IDENTIFIER lp:LEFT_PAREN { + // return partial({ head: [head], lp }); + // } ) { return partial({ lp: identifier.partial.lp, diff --git a/src/parser/parse.test.ts b/src/parser/parse.test.ts index 6c08748..ca65be4 100644 --- a/src/parser/parse.test.ts +++ b/src/parser/parse.test.ts @@ -1,4 +1,4 @@ -import { AstNode, TypeSpecifierNode, visit } from '../ast'; +import { AstNode, FunctionCallNode, TypeSpecifierNode, visit } from '../ast'; import { buildParser, inspect } from './test-helpers'; let c!: ReturnType; @@ -349,21 +349,18 @@ void main() { expect(ast.scopes[0].functions).not.toHaveProperty('texture2D'); expect(ast.scopes[1].functions).not.toHaveProperty('texture2D'); - let specifier: TypeSpecifierNode; + let call: FunctionCallNode; visit(ast, { function_call: { enter: (path) => { - inspect(path.node); - if (path.node.identifier.type === 'type_specifier') { - specifier = path.node.identifier; - } + call = path.node; }, }, }); - // Builtins like texture2D should be recognized as a type_name since that's + // Builtins like texture2D should be recognized as a identifier since that's // how user defined functions are treated - expect(specifier!.specifier.type).toBe('type_name'); + expect(call!.identifier.type).toBe('identifier'); // Should not warn about built in function call being undefined expect(console.warn).not.toHaveBeenCalled(); diff --git a/src/parser/scope.test.ts b/src/parser/scope.test.ts index 2aa72e7..81cf626 100644 --- a/src/parser/scope.test.ts +++ b/src/parser/scope.test.ts @@ -548,3 +548,14 @@ void a() { // Struct should get added to inner scope expect(ast.scopes[1].types).toHaveProperty('MyStruct'); }); + +test('postfix is added to scope', () => { + const ast = c.parseSrc(` +void a() {} +void main() { + float y = a().xyz; + float z = a().length(); +}`); + const a = Object.values(ast.scopes[0].functions.a)[0]; + expect(a.references).toHaveLength(3); +}); diff --git a/src/parser/utils.ts b/src/parser/utils.ts index 6a5f896..8ae9eed 100644 --- a/src/parser/utils.ts +++ b/src/parser/utils.ts @@ -79,6 +79,11 @@ export const renameFunctions = ( node.identifier.specifier.identifier, node ); + } else if ( + node.type === 'function_call' && + node.identifier.type === 'identifier' + ) { + node.identifier.identifier = mangle(node.identifier.identifier, node); } else { console.warn('Unknown function node to rename', node); throw new Error(`Function for type ${node.type} not recognized`); From d1c2510ac8f9a6330bffc4a047abcd8d3014680d Mon Sep 17 00:00:00 2001 From: Andrew Ray Date: Sat, 22 Jul 2023 13:47:34 -0700 Subject: [PATCH 8/8] 2.0 release --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 045fb22..2828b15 100644 --- a/package.json +++ b/package.json @@ -3,7 +3,7 @@ "engines": { "node": ">=16" }, - "version": "2.0.0-beta.5", + "version": "2.0.0", "description": "A GLSL ES 1.0 and 3.0 parser and preprocessor that can preserve whitespace and comments", "scripts": { "prepare": "npm run build && ./prepublish.sh",