Skip to content

Commit

Permalink
Merge pull request yoctoproject#8 from savoirfairelinux/Feature-10094…
Browse files Browse the repository at this point in the history
…-Improve-syntax-parser

Feat: Implement basic semantic highlighting for functions and variables
  • Loading branch information
WilsonZiweiWang authored Sep 29, 2023
2 parents 1bde4b6 + 449a2ef commit ed193fe
Show file tree
Hide file tree
Showing 3 changed files with 187 additions and 3 deletions.
169 changes: 169 additions & 0 deletions client/src/BitBakeDocumentSemanticTokensProvider.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
import { SemanticTokensLegend, SemanticTokensBuilder } from 'vscode'
import type { TextDocument, CancellationToken, SemanticTokens, DocumentSemanticTokensProvider } from 'vscode'

const tokenTypes = new Map<string, number>()
const tokenModifiers = new Map<string, number>()

interface IParsedToken {
line: number
startCharacter: number
length: number
tokenType: string
tokenModifiers: string[]
}

const generateSemanticTokensLegend = (): SemanticTokensLegend => {
const tokenTypesLegend = [
'function', 'variable'
]
tokenTypesLegend.forEach((tokenType, index) => tokenTypes.set(tokenType, index))

const tokenModifiersLegend = [
'declaration'
]
tokenModifiersLegend.forEach((tokenModifier, index) => tokenModifiers.set(tokenModifier, index))

return new SemanticTokensLegend(tokenTypesLegend, tokenModifiersLegend)
}

export const legend = generateSemanticTokensLegend()

export class BitBakeDocumentSemanticTokensProvider implements DocumentSemanticTokensProvider {
async provideDocumentSemanticTokens (document: TextDocument, token: CancellationToken): Promise<SemanticTokens> {
const allTokens = this._parseText(document.getText())
const builder = new SemanticTokensBuilder(legend)
allTokens.forEach((token) => {
builder.push(token.line, token.startCharacter, token.length, this._encodeTokenType(token.tokenType), this._encodeTokenModifiers(token.tokenModifiers))
})
return builder.build()
}

// Check node_modules/@types/vscode/index.d.ts for more encoding details
private _encodeTokenType (tokenType: string): number {
if (tokenTypes.has(tokenType)) {
return Number(tokenTypes.get(tokenType))
} else if (tokenType === 'notInLegend') {
return tokenTypes.size + 2
}
return 0
}

private _encodeTokenModifiers (strTokenModifiers: string[]): number {
let result = 0
for (let i = 0; i < strTokenModifiers.length; i++) {
const tokenModifier = strTokenModifiers[i]
if (tokenModifiers.has(tokenModifier)) {
result = result | (1 << Number(tokenModifiers.get(tokenModifier)))
} else if (tokenModifier === 'notInLegend') {
result = result | (1 << tokenModifiers.size + 2)
}
}
return result
}

private _parseText (text: string): IParsedToken[] {
/**
* This function traverse the document twice. In the first traverse, the function and variable declarations will be matched and stored in an array which will be used to construct another array of regex for the second traverse. In the second traverse, the array of regex is used to match the function and variable references.
*/

const resultTokens: IParsedToken[] = []
const lines = text.split(/\r\n|\r|\n/)
let declaredFunctions: string[] = []
let declaredVariables: string[] = []

const functionStartsWithDefRegex = /(?<!(#.*)|'.*|".*)(?<=def\s+)(?<name>[a-zA-Z_][\w_]*)(?<operator>:(append|prepend|remove))?(?=\s*\()/
const functionStartsWithPythonRegex = /(?<!(#.*)|'.*|".*)(?<=python\s+)(?<name>[a-zA-Z_][\w_]*)(?<operator>:(append|prepend|remove))?(?=\s*\()/
const shellFunctionsRegex = /(?<!(#.*)|'.*|".*)(?<name>[a-zA-Z_][\w_]*)(?<operator>:(append|prepend|remove))?(?=\s*\(.*\)\s*\{)/
const anonymousFunctionsRegex = /(\bpython)(?=\s*\(.*\)\s*\{)/
const variableDeclarationRegex = /(?<name>[a-zA-Z_][\w_]*)(?<operator>:(append|prepend|remove))?(?=\s*=\s*)(?!\s*\()/

// First traverse: Match functions and variables declarations
for (let i = 0; i < lines.length; i++) {
const line = lines[i]

const matchFunctionStartsWithDef = functionStartsWithDefRegex.exec(line)
const matchFunctionStartsWithPython = functionStartsWithPythonRegex.exec(line)
const matchShellFunction = shellFunctionsRegex.exec(line)
const matchVariableDeclaration = variableDeclarationRegex.exec(line)
const matchAnonymousFunction = anonymousFunctionsRegex.exec(line)

if (matchAnonymousFunction !== null) {
// Avoid mistakening anonymous functions as shell function declaration
continue
}
if (matchFunctionStartsWithDef?.groups?.name.length !== undefined) {
resultTokens.push({
line: i,
startCharacter: matchFunctionStartsWithDef.index,
length: matchFunctionStartsWithDef?.groups?.name.length,
tokenType: 'function',
tokenModifiers: ['declaration']
})
declaredFunctions.push(matchFunctionStartsWithDef?.groups?.name)
} else if (matchFunctionStartsWithPython?.groups?.name.length !== undefined) {
resultTokens.push({
line: i,
startCharacter: matchFunctionStartsWithPython.index,
length: matchFunctionStartsWithPython?.groups?.name.length,
tokenType: 'function',
tokenModifiers: ['declaration']
})

declaredFunctions.push(matchFunctionStartsWithPython?.groups?.name)
} else if (matchShellFunction?.groups?.name.length !== undefined) {
resultTokens.push({
line: i,
startCharacter: matchShellFunction.index,
length: matchShellFunction.groups?.name.length,
tokenType: 'function',
tokenModifiers: ['declaration']
})

declaredFunctions.push(matchShellFunction.groups?.name)
} else if (matchVariableDeclaration?.groups?.name.length !== undefined) {
resultTokens.push({
line: i,
startCharacter: matchVariableDeclaration.index,
length: matchVariableDeclaration.groups?.name.length,
tokenType: 'variable',
tokenModifiers: ['declaration']
})

declaredVariables.push(matchVariableDeclaration.groups?.name)
}
}
// Remove duplicates
declaredFunctions = [...new Set(declaredFunctions)]
declaredVariables = [...new Set(declaredVariables)]
// Sort the regex from longest to shortest for precise matching
declaredFunctions.sort((prev, next) => next.length - prev.length)
declaredVariables.sort((prev, next) => next.length - prev.length)

const declaredObjectsRegex: Array<{ regex: RegExp, type: string }> = []

declaredFunctions.forEach(declaredFunction => {
declaredObjectsRegex.push({ regex: new RegExp(`(?<!(#.*)|'.*|".*)(?<!python|def\\s+)${declaredFunction}\\b`, 'g'), type: 'function' })
})
declaredVariables.forEach(declaredVariable => {
declaredObjectsRegex.push({ regex: new RegExp(`(?<!(#.*)|'.*|".*)\\b${declaredVariable}\\b(?!\\s*=\\s*)`, 'g'), type: 'variable' })
})
// Second traverse: Match functions and variables references
for (let i = 0; i < lines.length; i++) {
const line = lines[i]

for (let j = 0; j < declaredObjectsRegex.length; ++j) {
const match = declaredObjectsRegex[j].regex.exec(line)
if (match !== null) {
resultTokens.push({
line: i,
startCharacter: match.index,
length: match[0].length,
tokenType: declaredObjectsRegex[j].type,
tokenModifiers: []
})
}
}
}
return resultTokens
}
}
6 changes: 5 additions & 1 deletion client/src/extension.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@

import * as path from 'path'

import { workspace } from 'vscode'
import { workspace, languages } from 'vscode'
import { LanguageClient, TransportKind } from 'vscode-languageclient/node'

import type { ExtensionContext } from 'vscode'
import type { LanguageClientOptions, ServerOptions } from 'vscode-languageclient/node'

import { legend, BitBakeDocumentSemanticTokensProvider } from './BitBakeDocumentSemanticTokensProvider'

let client: LanguageClient
export async function activate (context: ExtensionContext): Promise<void> {
console.log('Congratulations, your extension "BitBake" is now active!')
Expand Down Expand Up @@ -48,6 +50,8 @@ export async function activate (context: ExtensionContext): Promise<void> {

// Start the client and launch the server
await client.start()

context.subscriptions.push(languages.registerDocumentSemanticTokensProvider({ language: 'bitbake', scheme: 'file' }, new BitBakeDocumentSemanticTokensProvider(), legend))
}

export function deactivate (): Thenable<void> | undefined {
Expand Down
15 changes: 13 additions & 2 deletions client/syntaxes/bitbake.tmLanguage.json
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@
}
},
"functions": {
"begin": "\\s*([a-zA-Z0-9_-][\\w-]*)(:(append|prepend|remove))?(?=\\s*\\()",
"begin": "\\s*([a-zA-Z_][\\w_]*)(:(append|prepend|remove))?(?=\\s*\\()",
"beginCaptures": {
"1": {
"name": "entity.name.function.python.bb"
Expand Down Expand Up @@ -189,6 +189,17 @@
{
"include": "#keywords"
},
{
"match": "(\\[)",
"name": "support.function.bracket-open.bb"
},
{
"match": "(\\])",
"name": "support.function.bracket-close.bb"
},
{
"include": "#numeric"
},
{
"include": "#functions"
},
Expand All @@ -207,7 +218,7 @@
]
},
"variable-name": {
"match": "([a-zA-Z0-9_-][\\w-]*)(?!\\s*\\()",
"match": "([a-zA-Z_][\\w_]*)",
"captures": {
"1": {
"name": "variable.other.names.bb"
Expand Down

0 comments on commit ed193fe

Please sign in to comment.