Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a custom syntax highlight file to support new R 4.0 pipe #2290

Open
wants to merge 26 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
776c9f6
Revert hack for supporting new syntax highlighter
cderv Jan 19, 2022
45b191b
Add support for pipebind operator too
cderv Jan 19, 2022
28ca32a
remove hack in `pdf_document()` too
cderv Jan 19, 2022
80cf60b
Add syntax-definition argument for all formats
cderv Jan 19, 2022
04f3284
Add a arg building function like the other
cderv Jan 19, 2022
b62fe2d
Add test
cderv Jan 19, 2022
d56e562
`--syntax-definition` is supported since Pandoc 2.0
cderv Jan 19, 2022
a5addb2
Adapt test to pandoc requirement
cderv Jan 19, 2022
8a515d9
put comment below and not at the top of the document
cderv Jan 19, 2022
26038ee
Try removing the problematic definition in DOCTYPE
cderv Jan 19, 2022
50ecbc3
Revert "Try removing the problematic definition in DOCTYPE"
cderv Jan 20, 2022
07a7309
Only add the syntax file for Pandoc > 2.15
cderv Jan 20, 2022
5f6e8e6
Merge commit '6fc53d89826491100c77f7790169f8afd629907c'
cderv Mar 9, 2022
5514393
Update to r.xml
cderv Mar 9, 2022
3bbce2b
Adapt comment
cderv Mar 9, 2022
7f8ad89
Add also modified markdown.xml version
cderv Mar 9, 2022
a3664a6
Add generic tests for bundled language files correct addition
cderv Mar 9, 2022
303b504
Update logic about setting default file
cderv Mar 10, 2022
0cfdc1c
Merge branch 'main' into new-syntax-highlight
cderv Mar 10, 2022
fd6b58b
probably requires pandoc escaping of path
cderv Mar 10, 2022
e475b7b
Enter debugging mode in GHA
cderv Mar 10, 2022
5fa9866
not all pandoc format support highlight
cderv Mar 10, 2022
2d02368
Revert "probably requires pandoc escaping of path"
cderv Mar 10, 2022
e481f4e
Add a test for last change
cderv Mar 10, 2022
d5b3495
Revert "Enter debugging mode in GHA"
cderv Mar 10, 2022
6ad1799
try to use the ... argument instead
yihui Mar 10, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ export(pandoc_metadata_arg)
export(pandoc_options)
export(pandoc_path_arg)
export(pandoc_self_contained_html)
export(pandoc_syntax_definition_args)
export(pandoc_template)
export(pandoc_toc_args)
export(pandoc_variable_arg)
Expand Down
18 changes: 4 additions & 14 deletions R/html_document_base.R
Original file line number Diff line number Diff line change
Expand Up @@ -158,20 +158,14 @@ html_document_base <- function(theme = NULL,
}

post_processor <- function(metadata, input_file, output_file, clean, verbose) {
# read the output file
output_str <- read_utf8(output_file)

# TODO: remove this temporary fix after the syntax highlighting problem is
# fixed in Pandoc https://github.com/rstudio/bookdown/issues/1157
s1 <- '<span class="sc">|</span><span class="er">&gt;</span>'
s2 <- '<span class="ot">=</span><span class="er">&gt;</span>'

# if there are no preserved chunks to restore and no resource to copy then no
# post-processing is necessary
if ((length(preserved_chunks) == 0 && !isTRUE(copy_resources) && self_contained) &&
!length(c(grep(s1, output_str, fixed = TRUE), grep(s2, output_str, fixed = TRUE))))
if (length(preserved_chunks) == 0 && !isTRUE(copy_resources) && self_contained)
return(output_file)

# read the output file
output_str <- read_utf8(output_file)

# if we preserved chunks, restore them
if (length(preserved_chunks) > 0) {
# Pandoc adds an empty <p></p> around the IDs of preserved chunks, and we
Expand Down Expand Up @@ -209,10 +203,6 @@ html_document_base <- function(theme = NULL,
output_str <- process_images(output_str, image_relative)
}

# fix the issue mentioned in TODO above
output_str <- gsub(s1, '<span class="sc">|&gt;</span>', output_str, fixed = TRUE)
output_str <- gsub(s2, '<span class="ot">=&gt;</span>', output_str, fixed = TRUE)

write_utf8(output_str, output_file)
output_file
}
Expand Down
7 changes: 7 additions & 0 deletions R/pandoc.R
Original file line number Diff line number Diff line change
Expand Up @@ -808,6 +808,13 @@ pandoc_lua_filter_args <- function(lua_files) {
if (pandoc2.0()) c(rbind("--lua-filter", pandoc_path_arg(lua_files)))
}

#' @rdname pandoc_args
#' @param xml_files Character vector of file paths to KDE syntax files. Paths
#' will be transformed by \code{\link{pandoc_path_arg}}.
#' @export
pandoc_syntax_definition_args <- function(xml_files) {
if (pandoc2.0()) c(rbind("--syntax-definition", pandoc_path_arg(xml_files)))
}

# quote args if they need it
quoted <- function(args) {
Expand Down
12 changes: 0 additions & 12 deletions R/pdf_document.R
Original file line number Diff line number Diff line change
Expand Up @@ -196,17 +196,6 @@ pdf_document <- function(toc = FALSE,
output_dir)
}

post_processor <- function(metadata, input_file, output_file, clean, verbose) {
# TODO: remove this temporary fix after the syntax highlighting problem is
# fixed in Pandoc https://github.com/rstudio/bookdown/issues/1157
x <- read_utf8(output_file)
s <- '\\SpecialCharTok{|}\\ErrorTok{\\textgreater{}}'
if (length(grep(s, x, fixed = TRUE)) == 0) return(output_file)
x <- gsub(s, '\\SpecialCharTok{|\\textgreater{}}', x, fixed = TRUE)
write_utf8(x, output_file)
output_file
}

intermediates_generator <- function(...) {
general_intermediates_generator(saved_files_dir, ...)
}
Expand All @@ -226,7 +215,6 @@ pdf_document <- function(toc = FALSE,
keep_md = keep_md,
df_print = df_print,
pre_processor = pre_processor,
post_processor = post_processor,
intermediates_generator = intermediates_generator
)
}
Expand Down
18 changes: 18 additions & 0 deletions R/render.R
Original file line number Diff line number Diff line change
Expand Up @@ -885,6 +885,7 @@ render <- function(input,
input <- path.expand(input)
output <- path.expand(output)

# Tweak Pandoc argument for all formats
pandoc_args <- output_format$pandoc$args

# if Lua filters are provided, add the command line switch
Expand All @@ -895,6 +896,11 @@ render <- function(input,
}
pandoc_args <- c(lua_filters, pandoc_args)

# if pandoc highlighting is used, add the syntax definition file
# supporting new pipe operator
# TODO: remove when updated upstream
pandoc_args <- add_syntax_definition(pandoc_args)

# in case the output format turns on the --file-scope flag, run its
# file_scope function to split the input into multiple files
input_files <- input
Expand Down Expand Up @@ -1210,3 +1216,15 @@ file_scope_split <- function(input, fun) {

unlist(input_files)
}

add_syntax_definition <- function(args) {
# do not add if not Pandoc highlighting
if (detect_pattern("--no-highlight", args)) return(args)
# do not add if user provided another r.xml file
if (detect_pattern("--syntax-definition", args) &&
detect_pattern("r\\.xml", args)) {
return(args)
}
# otherwise add our file
c(args, pandoc_syntax_definition_args(pkg_file_highlight("r.xml")))
}
4 changes: 4 additions & 0 deletions R/util.R
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,10 @@ join <- function(..., sep = "", collapse = "") {
paste(..., sep = sep, collapse = collapse)
}

detect_pattern <- function(pattern, vec, ...) {
any(grepl(pattern, vec, ...))
}

shell_exec <- function(cmd, intern = FALSE, wait = TRUE, ...) {
if (Sys.info()[["sysname"]] == "Windows")
shell(cmd, intern = intern, wait = wait, ...)
Expand Down
175 changes: 175 additions & 0 deletions inst/rmarkdown/highlight/r.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE language>
<!-- Kate 2.5 (KDE 3.5) highlighting module for R
based on an earlier version by E.L. Willighagen. Code folding code by Ben Goodrich
version 2.0: (c) 2006 Thomas Friedrichsmeier, Arne Henningsen, and the RKWard Team
license: GPL v2
Kate : http://kate.kde.org/
R : http://www.r-project.org/
RKWard : http://rkward.kde.org/
-->
<!-- Manually edited by C.DERVIEUX, RStudio:
From https://invent.kde.org/frameworks/syntax-highlighting/-/blob/master/data/syntax/r.xml
with adjustment to support new R 4.0 syntax: |> and =>
-->
<language version="12" kateversion="5.0" name="R Script" section="Scientific" extensions="*.R;*.r;*.S;*.s;*.q" mimetype="" license="GPLv2">
<highlighting>

<list name="controls">
<item>for</item>
<item>in</item>
<item>next</item>
<item>break</item>
<item>while</item>
<item>repeat</item>
<item>if</item>
<item>else</item>
<item>switch</item>
<item>function</item>
</list>
<list name="words">
<item>TRUE</item>
<item>FALSE</item>
<item>NULL</item>
<item>NA</item>
<item>NA_integer_</item>
<item>NA_real_</item>
<item>NA_complex_</item>
<item>NA_character_</item>
<item>Inf</item>
<item>NaN</item>
</list>

<contexts>
<!-- This context is really only good for detecting unexpected closing braces '}'. Since opening braces go to ctx0 (and nesting in there), this context is only active on the base level -->
<context attribute="Normal Text" lineEndContext="#stay" name="level0">
<IncludeRules context="CommonRules"/>

<AnyChar attribute="Error" context="#stay" String="})"/>
</context>

<context attribute="Normal Text" lineEndContext="#stay" name="ctx0">
<IncludeRules context="CommonRules"/>

<DetectChar attribute="Symbol" context="#pop" char="}" endRegion="Brace1" />
<DetectChar attribute="Error" context="#stay" char=")"/>
</context>

<context attribute="Normal Text" lineEndContext="#stay" name="parenthesis">
<LineContinue attribute="Operator" context="#stay"/>
<DetectChar attribute="Symbol" context="#pop" char=")"/>

<RegExpr attribute="Identifier" context="#stay" String="[a-zA-Z_\.][0-9a-zA-Z_\.]*[\s]*[:]?=(?=[^=]|$)"/>

<IncludeRules context="CommonRules"/>
<DetectChar attribute="Error" context="#stay" char="}" />
</context>

<context attribute="String" lineEndContext="#stay" name="string">
<DetectChar attribute="String" context="#pop" char="&quot;"/>
<HlCStringChar attribute="String Char" context="#stay"/>
</context>

<context attribute="String" lineEndContext="#stay" name="string2">
<DetectChar attribute="String" context="#pop" char="'"/>
<HlCStringChar attribute="String Char" context="#stay"/>
</context>

<context attribute="Identifier" lineEndContext="#stay" name="backquotedsymbol">
<DetectChar attribute="String" context="#pop" char="`"/>
<HlCStringChar attribute="String Char" context="#stay"/>
</context>

<context attribute="Normal Text" lineEndContext="#stay" name="operator_rhs" fallthrough="true" fallthroughContext="#pop">
<!-- While there is nothing of interest, stay in the context -->
<DetectSpaces />
<IncludeRules context="FindComments"/>
<!-- Operators other than +, -, and ! directly after another operator are an error. -->
<Detect2Chars attribute="Error" context="#stay" char="!" char1="="/>
<AnyChar attribute="Error" context="#stay" String="*/&lt;&gt;=|&amp;:^@$~"/>
</context>

<context attribute="Normal Text" lineEndContext="#stay" name="FindComments">
<Detect2Chars attribute="Headline" context="Headline" char="#" char1="#"/>
<DetectChar attribute="Comment" context="Comment" char="#"/>
</context>
<context attribute="Headline" lineEndContext="#pop" name="Headline">
<DetectSpaces />
<IncludeRules context="##Comments" />
</context>
<context attribute="Comment" lineEndContext="#pop" name="Comment">
<DetectSpaces />
<IncludeRules context="##Comments" />
</context>

<!-- This context is not really used, but contains the common rules -->
<context name="CommonRules" lineEndContext="#stay" attribute="Normal Text" >
<DetectSpaces />
<IncludeRules context="FindComments"/>
<DetectChar attribute="String" context="string" char="&quot;"/>
<DetectChar attribute="String" context="string2" char="'"/>
<DetectChar attribute="String" context="backquotedsymbol" char="`"/>
<keyword attribute="Control Structure" context="#stay" String="controls"/>
<keyword attribute="Reserved Words" context="#stay" String="words"/>
<Float attribute="Float" context="#stay"/>
<Int attribute="Int" context="#stay"/>
<RegExpr attribute="Keyword" context="#stay" String="[a-zA-Z_]+[a-zA-Z_\.0-9]*(?=[\s]*[(])|\.[a-zA-Z_\.]+[a-zA-Z_\.0-9]*(?=[\s]*[(])"/>
<DetectChar attribute="Symbol" context="parenthesis" char="("/>

<!-- For (assignment) operators, enter a new context operator_rhs to check what follows (generally, that should not be another op) -->
<StringDetect attribute="Assign" context="operator_rhs" String="&lt;&lt;-"/>
<Detect2Chars attribute="Assign" context="operator_rhs" char="&lt;" char1="-"/>
<StringDetect attribute="Assign" context="operator_rhs" String="-&gt;&gt;"/>
<Detect2Chars attribute="Assign" context="operator_rhs" char="-" char1="&gt;"/>
<!-- Handle new pipebind operator -->
<RegExpr attribute="Assign" context="operator_rhs" String="=(?!(=|&gt;))"/>
<Detect2Chars attribute="Operator" context="operator_rhs" char="*" char1="*"/>
<Detect2Chars attribute="Operator" context="operator_rhs" char="&lt;" char1="="/>
<Detect2Chars attribute="Operator" context="operator_rhs" char="&gt;" char1="="/>
<Detect2Chars attribute="Operator" context="operator_rhs" char="=" char1="="/>
<Detect2Chars attribute="Operator" context="operator_rhs" char="!" char1="="/>
<!-- New pipe operator -->
<Detect2Chars attribute="Operator" context="operator_rhs" char="|" char1="&gt;"/>
<!-- New pipebind operator -->
<Detect2Chars attribute="Operator" context="operator_rhs" char="=" char1="&gt;"/>
<Detect2Chars attribute="Operator" context="operator_rhs" char="|" char1="|"/>
<Detect2Chars attribute="Operator" context="operator_rhs" char="&amp;" char1="&amp;"/>
<StringDetect attribute="Operator" context="operator_rhs" String=":::"/>
<Detect2Chars attribute="Operator" context="operator_rhs" char=":" char1=":"/>
<AnyChar attribute="Operator" context="operator_rhs" String="+-*/&lt;&gt;=!|&amp;:^@$~"/>
<RangeDetect attribute="Operator" context="operator_rhs" char="%" char1="%"/>

<DetectChar attribute="Symbol" context="ctx0" char="{" beginRegion="Brace1" />

<!-- This is needed only to assist variable based indentation -->
<AnyChar attribute="Symbol" context="#stay" String="[]" />
</context>
</contexts>

<itemDatas>
<itemData name="Normal Text" defStyleNum="dsNormal" spellChecking="false"/>
<itemData name="Symbol" defStyleNum="dsNormal" spellChecking="false"/>
<itemData name="Keyword" defStyleNum="dsFunction" spellChecking="false"/>
<itemData name="Identifier" defStyleNum="dsAttribute" spellChecking="false"/>
<itemData name="String" defStyleNum="dsString"/>
<itemData name="Headline" defStyleNum="dsDocumentation" bold="1"/>
<itemData name="Comment" defStyleNum="dsComment"/>
<itemData name="Assign" defStyleNum="dsOthers" bold="1" italic="0" spellChecking="false"/>
<itemData name="Control Structure" defStyleNum="dsControlFlow" spellChecking="false"/>
<itemData name="Reserved Words" defStyleNum="dsConstant" spellChecking="false"/>
<itemData name="Error" defStyleNum="dsError" spellChecking="false"/>
<itemData name="Operator" defStyleNum="dsSpecialChar" spellChecking="false"/>
<itemData name="String Char" defStyleNum="dsSpecialChar" spellChecking="false"/>
<itemData name="Float" defStyleNum="dsFloat" spellChecking="false"/>
<itemData name="Int" defStyleNum="dsDecVal" spellChecking="false"/>
</itemDatas>
</highlighting>

<general>
<comments>
<comment name="singleLine" start="#"/>
</comments>
<keywords casesensitive="true" weakDeliminator="." additionalDeliminator="$"/>
</general>
</language>
<!-- kate: replace-tabs off; -->
6 changes: 6 additions & 0 deletions man/pandoc_args.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions tests/testthat/test-render.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,14 @@ test_that("file_scope split correctly input file", {
expect_snapshot_file(splitted[1])
expect_snapshot_file(splitted[2])
})

test_that("syntax definition file is correctly added", {
expect_identical(add_syntax_definition("--no-highlight"), "--no-highlight")
dummy_xml <- pandoc_syntax_definition_args("dummy/r.xml")
if (!pandoc_available(2.0)) {
expect_identical(add_syntax_definition("arg1"), "arg1")
}
skip_if_not_pandoc("2.0")
expect_identical(add_syntax_definition(c("arg1", dummy_xml)), c("arg1", dummy_xml))
expect_match(add_syntax_definition(c("arg1")), "r.xml", fixed = TRUE, all = FALSE)
})