From 574a3ad4587a399ce258bfc358b6d2bf986880ea Mon Sep 17 00:00:00 2001 From: dgelessus <dgelessus@users.noreply.github.com> Date: Tue, 28 May 2024 14:43:12 +0200 Subject: [PATCH] Remove newline check from Lexer.unread and document the issue instead Unfortunately, the check causes errors in our EventBLexer and this isn't easy to fix. --- .../java/org/sablecc/sablecc/lexer/Lexer.java | 17 +++++++++++++---- .../resources/org/sablecc/sablecc/lexer.txt | 17 +++++++++++++---- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/sablecc/sablecc/lexer/Lexer.java b/src/main/java/org/sablecc/sablecc/lexer/Lexer.java index 438b84d..c38142a 100644 --- a/src/main/java/org/sablecc/sablecc/lexer/Lexer.java +++ b/src/main/java/org/sablecc/sablecc/lexer/Lexer.java @@ -390,6 +390,19 @@ public class Lexer } /** + * <p> + * Push the given token's text back onto the input. + * Note that the lexer state is <i>not</i> restored, + * so a following {@link #next()}/{@link #peek()} call may result in a different token. + * </p> + * <p> + * <b>Note:</b> + * If the token text contains newlines, + * the caller must ensure that CR+LF pairs are unread in their entirety. + * If only one half of a CR+LF pair is unread, + * the line numbers will be incorrect when it is lexed again. + * </p> + * * @param tok the token to push back onto the input * @throws IOException when thrown by {@link PushbackReader#unread(int)} */ @@ -397,10 +410,6 @@ public class Lexer { String tokenText = tok.getText(); int length = tokenText.length(); - if(this.cr || (length > 0 && tokenText.charAt(0) == '\n')) - { - throw new IOException("Cannot unread a token containing a partial newline"); - } for(int i = length - 1; i >= 0; i--) { diff --git a/src/main/resources/org/sablecc/sablecc/lexer.txt b/src/main/resources/org/sablecc/sablecc/lexer.txt index 1577bea..c877b18 100644 --- a/src/main/resources/org/sablecc/sablecc/lexer.txt +++ b/src/main/resources/org/sablecc/sablecc/lexer.txt @@ -284,6 +284,19 @@ Macro:LexerBody } /** + * <p> + * Push the given token's text back onto the input. + * Note that the lexer state is <i>not</i> restored, + * so a following {@link #next()}/{@link #peek()} call may result in a different token. + * </p> + * <p> + * <b>Note:</b> + * If the token text contains newlines, + * the caller must ensure that CR+LF pairs are unread in their entirety. + * If only one half of a CR+LF pair is unread, + * the line numbers will be incorrect when it is lexed again. + * </p> + * * @param tok the token to push back onto the input * @throws IOException when thrown by {@link PushbackReader#unread(int)} */ @@ -291,10 +304,6 @@ Macro:LexerBody { String tokenText = tok.getText(); int length = tokenText.length(); - if(this.cr || (length > 0 && tokenText.charAt(0) == '\n')) - { - throw new IOException("Cannot unread a token containing a partial newline"); - } for(int i = length - 1; i >= 0; i--) { -- GitLab