Skip to content

Commit

Permalink
Added unescaping on the CSSCharStream level
Browse files Browse the repository at this point in the history
  • Loading branch information
phax committed Jun 12, 2023
1 parent a2b054f commit f95130b
Show file tree
Hide file tree
Showing 4 changed files with 275 additions and 20 deletions.
238 changes: 230 additions & 8 deletions ph-css/src/main/java/com/helger/css/parser/CSSCharStream.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,14 @@
import javax.annotation.Nonnegative;
import javax.annotation.Nonnull;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.helger.commons.ValueEnforcer;
import com.helger.commons.io.stream.NonBlockingPushbackReader;
import com.helger.commons.io.stream.StreamHelper;
import com.helger.commons.string.StringHelper;
import com.helger.css.reader.errorhandler.LoggingCSSParseErrorHandler;

import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;

Expand All @@ -40,9 +46,218 @@
@SuppressFBWarnings ("NM_METHOD_NAMING_CONVENTION")
public final class CSSCharStream implements CharStream
{
/**
* A special char iterator based on
* https://www.w3.org/TR/css-syntax-3/#css-filter-code-points
*
* @author Philip Helger
*/
private static final class CSSFilterCodePointsReader implements AutoCloseable
{
private static final Logger LOGGER = LoggerFactory.getLogger (CSSCharStream.CSSFilterCodePointsReader.class);

private final NonBlockingPushbackReader m_aLocalReader;

public CSSFilterCodePointsReader (@Nonnull final Reader aSrcReader)
{
// 1 char look ahead is sufficient
m_aLocalReader = new NonBlockingPushbackReader (aSrcReader, 1);
}

public void close () throws IOException
{
m_aLocalReader.close ();
}

/**
* @return Next character to come including pushing it back
*/
private int _lookaheadCodePoint () throws IOException
{
int ret = m_aLocalReader.read ();
m_aLocalReader.unread (ret);

switch (ret)
{
case 0:
ret = (char) 0xfffd;
break;
case '\f':
ret = '\n';
break;
case '\r':
// No matter if followed by \n or not
ret = '\n';
break;
}
return ret;
}

/**
* This is the method implementing
* https://www.w3.org/TR/css-syntax-3/#css-filter-code-points
*
* @return Next code point. May read 1 or 2 chars.
*/
private int _readFilteredCodePoint () throws IOException
{
// See
int ret = m_aLocalReader.read ();
switch (ret)
{
case 0:
// 0 means "unsupported character"
ret = (char) 0xfffd;
break;
case '\f':
// Form feed becomes \n
ret = '\n';
break;
case '\r':
{
// Read next
final int next = m_aLocalReader.read ();
if (next == '\n')
{
// Handle \r\n as one \n
}
else
if (next != -1)
{
// Unread the char (except EOF)
m_aLocalReader.unread (next);
}
// \r and \r\n becomes \n
ret = '\n';
break;
}
}
if (LOGGER.isTraceEnabled ())
{
if (ret == -1)
LOGGER.trace ("Read EOF");
else
LOGGER.trace ("Read " + LoggingCSSParseErrorHandler.createLoggingStringIllegalCharacter ((char) ret));
}
return ret;
}

private static boolean _isNewLine (final int c)
{
return c == '\n';
}

private static boolean _isWhitespace (final int c)
{
return _isNewLine (c) || c == '\t' || c == ' ';
}

private static boolean _isHexChar (final int c)
{
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
}

// Handle https://www.w3.org/TR/css-syntax-3/#escaping
private int _handleUnescape (final int cSrcFiltered) throws IOException
{
if (cSrcFiltered != '\\')
{
// Return as is
return cSrcFiltered;
}

// Check next char
int nCodePoint = 0;
int nHexCount = 0;
while (nHexCount < 6)
{
final int cNext = _lookaheadCodePoint ();
if (_isHexChar (cNext))
{
nHexCount++;
// Consume char
_readFilteredCodePoint ();
nCodePoint = (nCodePoint * 16) + StringHelper.getHexValue ((char) cNext);
}
else
break;
}

if (nHexCount == 0)
{
// Check if the next char is a newline
final int cNext = _lookaheadCodePoint ();
if (_isNewLine (cNext))
{
// Consume newline char
_readFilteredCodePoint ();
// Return the code point following the newline
return _readFilteredCodePoint ();
}

// Return the backslash as is
return cSrcFiltered;
}

// Hex chars found
// Check for a trailing whitespace and evtl. skip it
final int cNext = _lookaheadCodePoint ();
if (_isWhitespace (cNext))
{
// Consume char
_readFilteredCodePoint ();
}

return nCodePoint;
}

public int read (@Nonnull final char [] buf, @Nonnegative final int nOfs, @Nonnegative final int nLen)
throws IOException
{
ValueEnforcer.notNull (buf, "buf");
ValueEnforcer.isGE0 (nOfs, "Ofs");
ValueEnforcer.isGE0 (nLen, "Len");

if (LOGGER.isTraceEnabled ())
LOGGER.trace ("## read (" + nOfs + ", " + nLen + ")");

int nCharsRead = 0;
int nDstPos = nOfs;
for (int i = 0; i < nLen; ++i)
{
final int c = _readFilteredCodePoint ();
if (c == -1)
{
// EOF
break;
}

final int cCleanChar = _handleUnescape (c);

if (cCleanChar <= Character.MAX_VALUE)
{
buf[nDstPos] = (char) cCleanChar;
nCharsRead++;
nDstPos++;
}
else
{
// TODO handle code points cleanly
LOGGER.warn ("Unsupported code point found: " + cCleanChar);
}
}
if (LOGGER.isTraceEnabled ())
LOGGER.trace ("## read " + nCharsRead + " chars");

// -1 meaning EOF
return nCharsRead == 0 ? -1 : nCharsRead;
}
}

public static final int DEFAULT_TAB_SIZE = 8;
private static final int DEFAULT_BUF_SIZE = 4096;

private final Reader m_aReader;
private final CSSFilterCodePointsReader m_aReader;
private int m_nLine;
private int m_nColumn;
private int m_nAvailable;
Expand All @@ -62,7 +277,7 @@ public final class CSSCharStream implements CharStream
/** Position in buffer. */
private int m_nBufpos = -1;

private int m_nTabSize = 8;
private int m_nTabSize = DEFAULT_TAB_SIZE;
private boolean m_bTrackLineColumn = true;

public CSSCharStream (@Nonnull final Reader aReader)
Expand All @@ -75,11 +290,15 @@ private CSSCharStream (@Nonnull final Reader aReader,
@Nonnegative final int nStartColumn,
@Nonnegative final int nBufferSize)
{
ValueEnforcer.notNull (aReader, "Reader");
ValueEnforcer.isGE0 (nStartLine, "StartLine");
ValueEnforcer.isGE0 (nStartColumn, "StartColumn");
ValueEnforcer.isGE0 (nBufferSize, "BufferSize");

// Using a buffered reader gives a minimal speedup
m_aReader = StreamHelper.getBuffered (ValueEnforcer.notNull (aReader, "Reader"));
m_nLine = ValueEnforcer.isGE0 (nStartLine, "StartLine");
m_nColumn = ValueEnforcer.isGE0 (nStartColumn, "StartColumn") - 1;
m_aReader = new CSSFilterCodePointsReader (StreamHelper.getBuffered (aReader));
m_nLine = nStartLine;
m_nColumn = nStartColumn - 1;

m_nAvailable = nBufferSize;
m_nBufsize = nBufferSize;
Expand Down Expand Up @@ -355,10 +574,13 @@ public void backup (final int nAmount)
/** @return token image as String */
public String getImage ()
{
final String sImage;
if (m_nBufpos >= m_nTokenBegin)
return new String (m_aBuffer, m_nTokenBegin, m_nBufpos - m_nTokenBegin + 1);

return new String (m_aBuffer, m_nTokenBegin, m_nBufsize - m_nTokenBegin) + new String (m_aBuffer, 0, m_nBufpos + 1);
sImage = new String (m_aBuffer, m_nTokenBegin, m_nBufpos - m_nTokenBegin + 1);
else
sImage = new String (m_aBuffer, m_nTokenBegin, m_nBufsize - m_nTokenBegin) +
new String (m_aBuffer, 0, m_nBufpos + 1);
return sImage;
}

/** @return suffix */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,18 @@ public static String createLoggingStringParseError (@Nonnull final Token aLastVa
}

final StringBuilder retval = new StringBuilder (1024);
retval.append ('[').append (aLastValidToken.next.beginLine).append (':').append (aLastValidToken.next.beginColumn).append (']');
retval.append ('[')
.append (aLastValidToken.next.beginLine)
.append (':')
.append (aLastValidToken.next.beginColumn)
.append (']');
if (aLastSkippedToken != null)
{
retval.append ("-[").append (aLastSkippedToken.endLine).append (':').append (aLastSkippedToken.endColumn).append (']');
retval.append ("-[")
.append (aLastSkippedToken.endLine)
.append (':')
.append (aLastSkippedToken.endColumn)
.append (']');
}
retval.append (" Encountered");
Token aCurToken = aLastValidToken.next;
Expand All @@ -100,17 +108,22 @@ public static String createLoggingStringParseError (@Nonnull final Token aLastVa
retval.append (aTokenImageVal[TOKEN_EOF]);
break;
}
retval.append ("text '").append (aCurToken.image).append ("' corresponding to token ").append (aTokenImageVal[aCurToken.kind]);
retval.append ("text '")
.append (aCurToken.image)
.append ("' corresponding to token ")
.append (aTokenImageVal[aCurToken.kind]);
aCurToken = aCurToken.next;
}
retval.append (". ");
if (aLastSkippedToken != null)
retval.append ("Skipped until token ").append (aLastSkippedToken).append (". ");
retval.append (aExpectedTokenSequencesVal.length == 1 ? "Was expecting:" : "Was expecting one of:").append (aExpected);
retval.append (aExpectedTokenSequencesVal.length == 1 ? "Was expecting:" : "Was expecting one of:")
.append (aExpected);
return retval.toString ();
}

public void onCSSParseError (@Nonnull final ParseException aParseEx, @Nullable final Token aLastSkippedToken) throws ParseException
public void onCSSParseError (@Nonnull final ParseException aParseEx, @Nullable final Token aLastSkippedToken)
throws ParseException
{
if (aParseEx.expectedTokenSequences == null)
LOGGER.warn (aParseEx.getMessage ());
Expand Down Expand Up @@ -140,7 +153,14 @@ public static String createLoggingStringUnexpectedRule (@Nonnull final Token aCu
@Nonnull @Nonempty final String sRule,
@Nonnull @Nonempty final String sMsg)
{
return "[" + aCurrentToken.beginLine + ":" + aCurrentToken.beginColumn + "] Unexpected rule '" + sRule + "': " + sMsg;
return "[" +
aCurrentToken.beginLine +
":" +
aCurrentToken.beginColumn +
"] Unexpected rule '" +
sRule +
"': " +
sMsg;
}

public void onCSSUnexpectedRule (@Nonnull final Token aCurrentToken,
Expand All @@ -165,7 +185,8 @@ public void onCSSUnexpectedRule (@Nonnull final Token aCurrentToken,
*/
@Nonnull
@Nonempty
public static String createLoggingStringDeprecatedProperty (@Nonnull final Token aPrefixToken, @Nonnull final Token aIdentifierToken)
public static String createLoggingStringDeprecatedProperty (@Nonnull final Token aPrefixToken,
@Nonnull final Token aIdentifierToken)
{
return "[" +
aPrefixToken.beginLine +
Expand Down Expand Up @@ -217,7 +238,12 @@ public void onCSSBrowserCompliantSkip (@Nullable final ParseException ex,
@Nonempty
public static String createLoggingStringIllegalCharacter (final char cIllegalChar)
{
return "Found illegal character: " + cIllegalChar + " (0x" + StringHelper.getHexStringLeadingZero (cIllegalChar, 4) + ")";
final String sCharHex = "0x" + StringHelper.getHexStringLeadingZero (cIllegalChar, 4);
final String sPrintableChar = cIllegalChar <= 32 || cIllegalChar > 255 ? sCharHex : cIllegalChar +
" (" +
sCharHex +
")";
return "Found illegal character: " + sPrintableChar;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,15 @@ public final class Issue91Test
@Test
public void testUnescape1 ()
{
final String sCSS = "div { \73\72\63\3a\35 }";
final String sCSS = "div { \\73\\72\\63\\3a\\35 }";
final CascadingStyleSheet aCSS = CSSReader.readFromStringReader (sCSS,
new CSSReaderSettings ().setCSSVersion (ECSSVersion.LATEST)
.setBrowserCompliantMode (true));
assertNotNull (aCSS);
assertEquals (1, aCSS.getStyleRuleCount ());

final CSSStyleRule aSR = aCSS.getStyleRuleAtIndex (0);
assertEquals (2, aSR.getDeclarationCount ());
assertEquals (1, aSR.getDeclarationCount ());

assertEquals ("div{src:5}",
new CSSWriter (new CSSWriterSettings ().setOptimizedOutput (true)).setWriteHeaderText (false)
Expand Down
Loading

0 comments on commit f95130b

Please sign in to comment.