Skip to content

Commit

Permalink
Merge pull request #41 from code-hike/comment-detection
Browse files Browse the repository at this point in the history
Better comment detection
  • Loading branch information
pomber authored Jul 13, 2024
2 parents 6b596bc + e2c59bd commit 18098e5
Show file tree
Hide file tree
Showing 9 changed files with 243 additions and 9 deletions.
5 changes: 5 additions & 0 deletions .changeset/early-peas-protect.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@code-hike/lighter": patch
---

Better comment detection
2 changes: 1 addition & 1 deletion lib/dist/browser.esm.mjs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion lib/dist/index.cjs.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion lib/dist/index.esm.mjs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion lib/dist/worker.esm.mjs

Large diffs are not rendered by default.

59 changes: 57 additions & 2 deletions lib/src/comments.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ export function extractCommentsFromCode(
const { annotations, lineWithoutComments } = getAnnotationsFromLine(
line,
annotationExtractor,
lineNumber
lineNumber,
lang
);

allAnnotations.push(...annotations);
Expand Down Expand Up @@ -99,14 +100,68 @@ export function extractCommentsFromCode(
return { newCode, annotations };
}

// these are the langs that dont have a PUNCTUATION token
const prefixes = {
"actionscript-3": "//",
ada: "--",
asm: "#",
dart: "//",
fsharp: "//",
graphql: "#",
http: "#",
rust: "//",
sparql: "#",
wgsl: "//",
jsonnet: "//",
kql: "//",
zenscript: "//",
kusto: "//",
turtle: "#",
abap: "*",
beancount: ";",
kotlin: "//",
hlsl: "//",
berry: "#",
cypher: "//",
elm: "--",
nix: "#",
viml: '"',
solidity: "//",
bat: "REM",
shaderlab: "//",
sas: "*",
};

function getAnnotationsFromLine(
tokens: Token[],
annotationExtractor: AnnotationExtractor,
lineNumber: number
lineNumber: number,
lang: string
): {
annotations: RawAnnotation[];
lineWithoutComments: Token[] | null;
} {
// convert prefix to PUNCTUATION
if (
lang in prefixes &&
tokens.some((token) => token.style.color === COMMENT)
) {
const prefix = prefixes[lang];
tokens = tokens.flatMap((token) => {
if (token.style.color === COMMENT && token.content.startsWith(prefix)) {
const content = token.content.slice(prefix.length);
const t = [
{ content: prefix, style: { color: PUNCTUATION } },
] as Token[];
if (content.length) {
t.push({ content, style: token.style });
}
return t;
}
return [token];
});
}

// if no punctuation return empty
if (!tokens.some((token) => token.style.color === PUNCTUATION)) {
return { annotations: [], lineWithoutComments: tokens };
Expand Down
4 changes: 2 additions & 2 deletions lib/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -185,12 +185,12 @@ export async function extractAnnotations(
}

await preloadGrammars([lang]);
const { grammar } = getGrammar(lang);
const { grammar, langId } = getGrammar(lang);

const { newCode, annotations } = extractCommentsFromCode(
code,
grammar,
lang,
langId,
annotationExtractor
);

Expand Down
5 changes: 4 additions & 1 deletion lib/test/annotations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,10 @@ function C() {
}

const extractor = (comment: string) => {
const regex = /\s*(!?[\w-]+)?(\([^\)]*\)|\[[^\]]*\])?(.*)$/;
const annotationPrefix = "!";
const regex = new RegExp(
`\\s*(${annotationPrefix}?[\\w-]+)?(\\([^\\)]*\\)|\\[[^\\]]*\\])?(.*)$`
);
const match = comment.match(regex);
const name = match[1];
const rangeString = match[2];
Expand Down
171 changes: 171 additions & 0 deletions lib/test/comments.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
import { describe, expect, test } from "vitest";
import { extractAnnotations, highlight } from "..";
let codes = [
// Single-line comment using //
["// foo", "actionscript-3"],
["// foo", "apex"],
["// foo", "c#"],
["// foo", "cpp"],
["// foo", "cs"],
["// foo", "csharp"],
["// foo", "dart"],
["// foo", "f#"],
["// foo", "go"],
["// foo", "groovy"],
["// foo", "java"],
["// foo", "javascript"],
["// foo", "js"],
["// foo", "jsx"],
["// foo", "less"],
["// foo", "objective-c"],
["// foo", "objective-cpp"],
["// foo", "rust"],
["// foo", "scala"],
["// foo", "swift"],
["// foo", "typescript"],
["// foo", "ts"],
["// foo", "tsx"],
["// foo", "verilog"],
["// foo", "wgsl"],
["// foo", "jison"],
["// foo", "jsonnet"],
["// foo", "kql"],
["// foo", "zenscript"],
["// foo", "kusto"],

// Single-line comment using #
["# foo", "asm"],
["# foo", "bash"],
["# foo", "coffee"],
["# foo", "crystal"],
["# foo", "docker"],
["# foo", "dockerfile"],
["# foo", "elixir"],
["# foo", "fish"],
["# foo", "gdscript"],
["# foo", "graphql"],
["# foo", "http"],
["# foo", "ini"],
["# foo", "julia"],
["# foo", "make"],
["# foo", "makefile"],
["# foo", "perl"],
["# foo", "perl6"],
["# foo", "python"],
["# foo", "py"],
["# foo", "r"],
["# foo", "raku"],
["# foo", "shell"],
["# foo", "shellscript"],
["# foo", "tcl"],
["# foo", "toml"],
["# foo", "txt"],
["# foo", "yaml"],
["# foo", "yml"],
["# foo", "zsh"],
["# foo", "turtle"],

// Single-line comment using ;
["; foo", "lisp"],
["; foo", "clj"],
["; foo", "clojure"],
["; foo", "scheme"],
[`" foo"`, "smalltalk"],

// Single-line comment using --
["-- foo", "ada"],
["-- foo", "haskell"],
["-- foo", "sql"],
["-- foo", "lua"],

// Single-line comment using %
["% foo", "matlab"],
["% foo", "tex"],

// Special single-line comment formats
["{ foo}", "pascal"],
["# foo", "sh"],
["# foo", "sparql"],
["# foo", "shell"],
["# foo", "sh"],
["# foo", "tcl"],
["<!-- foo-->", "vue-html"],
["* foo", "abap"],
["; foo", "beancount"],
["' foo", "vb"],
["<!-- foo-->", "html"],

// more
["# foo", "imba"],
["// foo", "kotlin"],
["; foo", "clj"],
["; foo", "clojure"],
["% foo", "erl"],
["<%# foo%>", "erb"],
["% foo", "erlang"],
["// foo", "glimmer-js"],
["// foo", "glimmer-ts"],
["; foo", "reg"],
["* foo", "stata"],
[`" foo`, "vim"],
[`" foo`, "viml"],
[`" foo`, "vimscript"],
["// foo", "hlsl"],
["# foo", "berry"],
["// foo", "cypher"],
["-- foo", "elm"],
["# foo", "nix"],
["// foo", "solidity"],
["REM foo", "bat"],
["REM foo", "batch"],
["// foo", "shader"],
["// foo", "shaderlab"],
["* foo", "sas"],

// fail
// ["// foo", "apl"],
// ["# foo", "shellsession"],
// ["(* foo *)", "ocaml"],
];

// codes = [
// // test
// ["// foo", "actionscript-3"],
// ];

describe.each(codes)("extract annotations", (code, lang) => {
test(lang, async () => {
let comments = [];
const extracted = await extractAnnotations(code, lang, (comment) => {
comments.push(comment);
return null;
});

// if (comments.length === 0) {
// const h = await highlight(extracted.code, lang, "dark-plus", {
// scopes: true,
// });
// const line = h.lines[0];
// if (line.length == 1) {
// const token = line[0];
// if (token.scopes[0].startsWith("comment.line")) {
// console.log(lang, token.content, token.scopes);
// } else {
// console.log(lang, token.content, token.scopes);
// }
// } else {
// console.log(line);
// // TODO fix this
// }
// }

// const h = await highlight(extracted.code, lang, "dark-plus", {
// scopes: true,
// });
// const line = h.lines[0];
// console.log(line);

expect(comments).toHaveLength(1);
expect(comments[0]).toBe(" foo");
});
});

0 comments on commit 18098e5

Please sign in to comment.