diff --git a/src/main/java/org/sablecc/sablecc/lexer/Lexer.java b/src/main/java/org/sablecc/sablecc/lexer/Lexer.java index 9449cdeab3fc5aeac0e64353dbb0913f202bb332..30d4aa08edd6cf929936c40da9e661b84750e377 100644 --- a/src/main/java/org/sablecc/sablecc/lexer/Lexer.java +++ b/src/main/java/org/sablecc/sablecc/lexer/Lexer.java @@ -79,24 +79,38 @@ public class Lexer { switch(c) { - case 10: + case '\n': if(this.cr) { + // If the preceding character was \r (CR), + // ignore this \n (LF) character and don't increase the line or column. this.cr = false; } else { + // If there was no preceding \r (CR) character, + // consider this \n (LF) character an actual newline. this.line++; this.pos = 0; } break; - case 13: - case 8232: // Unicode line separator - case 8233: // Unicode paragraph separator + case '\r': + // A \r (CR) character is always considered a newline, + // but a \n (LF) character following it (if any) will be ignored (see above). this.line++; this.pos = 0; this.cr = true; break; + case 0x2028: // Unicode line separator + case 0x2029: // Unicode paragraph separator + // A Unicode line or paragraph separator is treated like a newline, + // but doesn't take part in the special handling for CR+LF. + // FIXME This case is a workaround for a limitation in the ProB cliparser prepl protocol, which doesn't support embedded newlines. + // TODO Remove this case once that is resolved. Practically nothing else uses the Unicode line separator character. + this.line++; + this.pos = 0; + this.cr = false; + break; default: this.pos++; this.cr = false; diff --git a/src/main/resources/org/sablecc/sablecc/lexer.txt b/src/main/resources/org/sablecc/sablecc/lexer.txt index 60f697e74c802ad92781f8faf87a0c25a8576de5..506e87f389c5e4f871be9f88c31545462e96357d 100644 --- a/src/main/resources/org/sablecc/sablecc/lexer.txt +++ b/src/main/resources/org/sablecc/sablecc/lexer.txt @@ -103,24 +103,38 @@ public class Lexer { switch(c) { - case 10: + case '\n': if(this.cr) { + // If the preceding character was \r (CR), + // ignore this \n (LF) character and don't increase the line or column. this.cr = false; } else { + // If there was no preceding \r (CR) character, + // consider this \n (LF) character an actual newline. this.line++; this.pos = 0; } break; - case 13: - case 8232: // Unicode line separator - case 8233: // Unicode paragraph separator + case '\r': + // A \r (CR) character is always considered a newline, + // but a \n (LF) character following it (if any) will be ignored (see above). this.line++; this.pos = 0; this.cr = true; break; + case 0x2028: // Unicode line separator + case 0x2029: // Unicode paragraph separator + // A Unicode line or paragraph separator is treated like a newline, + // but doesn't take part in the special handling for CR+LF. + // FIXME This case is a workaround for a limitation in the ProB cliparser prepl protocol, which doesn't support embedded newlines. + // TODO Remove this case once that is resolved. Practically nothing else uses the Unicode line separator character. + this.line++; + this.pos = 0; + this.cr = false; + break; default: this.pos++; this.cr = false;