Add SableCC grammar for SableCC itself

Taken from the official SableCC 3 source releases (all 3.x versions use the same grammar).

Add SableCC grammar for SableCC itself
e781662d · dgelessus · 76da3cdc · e781662d
Commit e781662d authored 3 years ago by dgelessus
--- a/src/main/sablecc/sablecc-3x.sablecc3
+++ b/src/main/sablecc/sablecc-3x.sablecc3
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * This file is part of SableCC.                             *
+ * See the file "LICENSE" for copyright information and the  *
+ * terms and conditions for copying, distribution and        *
+ * modification of SableCC.                                  *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+/* This grammar defines the SableCC 3.x input language. */
+Package org.sablecc.sablecc; // Root Java package for generated files.
+Helpers
+/* These are character sets and regular expressions used in the
+   definition of tokens. */
+    all = [0 .. 0xFFFF];
+    lowercase = ['a' .. 'z'];
+    uppercase = ['A' .. 'Z'];
+    digit = ['0' .. '9'];
+    hex_digit = [digit + [['a' .. 'f'] + ['A' .. 'F']]];
+    tab = 9;
+    cr = 13;
+    lf = 10;
+    eol = cr lf | cr | lf; // This takes care of different platforms
+    not_cr_lf = [all - [cr + lf]];
+    not_star = [all - '*'];
+    not_star_slash = [not_star - '/'];
+    blank = (' ' | tab | eol)+;
+    short_comment = '//' not_cr_lf* eol;
+    long_comment =
+        '/*' not_star* '*'+ (not_star_slash not_star* '*'+)* '/';
+    comment = short_comment | long_comment;
+    letter = lowercase | uppercase | '_' | '$';
+    id_part = lowercase (lowercase | digit)*;
+States
+    normal, /* The first state is the initial state. */
+    package;
+Tokens
+/* These are token definitions. It is allowed to use helper regular *
+ * expressions in the body of a token definition.                   *
+ * On a given input, the longest valid definition is chosen, In     *
+ * case of a match, the definition that appears first is chosen.    *
+ * Example: on input -> 's' <- "char" will have precedence on       *
+ * "string", because it appears first.                              */
+{package}
+    pkg_id = letter (letter | digit)*;
+{normal->package}
+    package = 'Package';
+    states = 'States';
+    helpers = 'Helpers';
+    tokens = 'Tokens';
+    ignored = 'Ignored';
+    productions = 'Productions';
+    abstract = 'Abstract';
+    syntax = 'Syntax';
+    tree = 'Tree';
+    new = 'New';
+    null = 'Null';
+    token_specifier = 'T';
+    production_specifier = 'P';
+    dot = '.';
+    d_dot = '..';
+{normal, package->normal}
+    semicolon = ';';
+    equal = '=';
+    l_bkt = '[';
+    r_bkt = ']';
+    l_par = '(';
+    r_par = ')';
+    l_brace =  '{';
+    r_brace =  '}';
+    plus = '+';
+    minus = '-';
+    q_mark = '?';
+    star = '*';
+    bar = '|';
+    comma = ',';
+    slash = '/';
+    arrow = '->';
+    colon = ':';
+    id = id_part ('_' id_part)*;
+    char = ''' not_cr_lf ''';
+    dec_char = digit+;
+    hex_char = '0' ('x' | 'X') hex_digit+;
+    string = ''' [not_cr_lf - ''']+ ''';
+    blank = blank;
+    comment = comment;
+Ignored Tokens
+/* These tokens are simply ignored by the parser. */
+    blank,
+    comment;
+Productions
+/* These are the productions of the grammar. The first production is *
+ * used by the implicit start production:                            *
+ *   start = (first production) EOF;                                 *
+ * ?, * and + have the same meaning as in a regular expression.      *
+ * In case a token and a production share the same name, the use of  *
+ * P. (for production) or T. (for token) is required.                *
+ * Each alternative can be explicitely named by preceding it with a  *
+ * name enclosed in braces.                                          *
+ * Each alternative element can be explicitely named by preceding it *
+ * with a name enclosed in brackets and followed by a colon.         */
+    grammar =
+        P.package? P.helpers? P.states? P.tokens? ign_tokens? P.productions? P.ast?
+	   {-> New grammar([P.package.list_pkg_id], P.helpers, P.states, 
+	                   P.tokens, P.ign_tokens, P.productions, P.ast)
+	   };
+    package
+	   {-> [list_pkg_id]:pkg_id*} =
+        T.package pkg_name
+	   {-> [pkg_name.pkg_id] };
+    pkg_name 
+	   {-> pkg_id*} =
+        pkg_id [pkg_ids]:pkg_name_tail* semicolon
+	   {-> [pkg_id, pkg_ids.pkg_id] };
+    pkg_name_tail 
+	   {-> pkg_id } =
+        dot pkg_id
+	   {-> pkg_id };
+    helpers =
+        T.helpers [helper_defs]:helper_def+
+	   {-> New helpers([helper_defs]) };
+    helper_def =
+        id equal reg_exp semicolon
+	   {-> New helper_def(id, reg_exp) };
+    states =
+        T.states id_list semicolon
+	   {-> New states([id_list.id]) };
+    id_list 
+	   {-> id*} =
+        id [ids]:id_list_tail*
+	   {-> [id, ids.id]};
+    id_list_tail 
+	   {-> id } =
+        comma id
+	   {-> id};
+    tokens =
+        T.tokens [token_defs]:token_def+
+	   {-> New tokens([token_defs]) };
+    token_def =
+        state_list? id equal reg_exp look_ahead? semicolon
+	   {-> New token_def(state_list, id, reg_exp, look_ahead.slash, look_ahead.reg_exp) };
+    state_list =
+        l_brace id transition? [state_lists]:state_list_tail* r_brace
+	   {-> New state_list(id, transition, [state_lists])};
+    state_list_tail =
+        comma id transition?
+	   {-> New state_list_tail(id, transition) };
+    transition =
+        arrow id
+	   {-> New transition(id)};
+    ign_tokens =
+        ignored T.tokens id_list? semicolon
+	   {-> New ign_tokens([id_list.id]) };
+    look_ahead 
+	   {-> slash reg_exp} =
+        slash reg_exp
+	   {-> slash reg_exp};
+    reg_exp =
+        concat [concats]:reg_exp_tail*
+	   {-> New reg_exp([concat, concats.concat])};
+    reg_exp_tail 
+	   {-> concat } =
+        bar concat
+	   {-> concat};
+    concat =
+        [un_exps]:un_exp*
+	   {-> New concat([un_exps])};
+    un_exp =
+        basic un_op?;
+    basic =
+        {char}    P.char 
+	   {-> New basic.char(P.char)}		|
+        {set}     set 
+	   {-> New basic.set(set)}		|
+        {string}  string 
+	   {-> New basic.string(string)}	|
+        {id}      id 
+	   {-> New basic.id(id)}		|
+        {reg_exp} l_par reg_exp r_par
+	   {-> New basic.reg_exp(reg_exp)}	;
+    char =
+        {char} T.char |
+        {dec}  dec_char |
+        {hex}  hex_char;
+    set =
+        {operation} l_bkt [left]:basic  bin_op [right]:basic  r_bkt 
+	   {-> New set.operation(left, bin_op, right) } |
+        {interval}  l_bkt [left]:P.char d_dot  [right]:P.char r_bkt
+	   {-> New set.interval(left, right) };
+    un_op =
+        {star}   star 
+	   {-> New un_op.star(star)}   |
+        {q_mark} q_mark
+	   {-> New un_op.q_mark(q_mark)} |
+        {plus}   plus
+	   {-> New un_op.plus(plus)}   ;
+    bin_op =
+        {plus}  plus 
+	   {-> New bin_op.plus()}  |
+        {minus} minus
+	   {-> New bin_op.minus()} ;
+    productions =
+        T.productions [prods]:prod+
+	   {-> New productions([prods]) };
+    prod =
+        id prod_transform? equal alts semicolon
+	   {-> New prod(id, prod_transform.arrow, [prod_transform.elem], [alts.list_alt])};
+    prod_transform 
+	   {-> arrow elem*} =
+        l_brace arrow [elems]:elem* r_brace
+	   {-> arrow [elems]};
+    alts 
+	   {-> [list_alt]:alt*} =
+        alt [alts]:alts_tail*
+	   {-> [alt, alts.alt]};
+    alts_tail 
+	   {-> alt} =
+        bar alt
+	   {-> alt};
+    alt =
+        alt_name? [elems]:elem* alt_transform? 
+	   {-> New alt(alt_name.id, [elems], alt_transform)};
+    alt_transform =
+        l_brace arrow [terms]: term* r_brace
+	   {-> New alt_transform(l_brace, [terms], r_brace)};
+    term =
+        {new} new prod_name l_par params? r_par
+	   {-> New term.new(prod_name, l_par, [params.list_term]) } 		|
+        {list} l_bkt list_of_list_term? r_bkt
+	   {-> New term.list(l_bkt, [list_of_list_term.list_terms])} 	|
+        {simple} specifier? id simple_term_tail?
+	   {-> New term.simple(specifier, id, simple_term_tail.id)} 	|
+        {null} null
+	   {-> New term.null()}						;
+    list_of_list_term 
+	   {-> [list_terms]:list_term* } =
+		list_term [list_terms]:list_term_tail*
+	   {-> [list_term, list_terms.list_term] }    ;
+    list_term =
+        {new} new prod_name l_par params? r_par
+	   {-> New list_term.new(prod_name, l_par, [params.list_term])}		|
+       	{simple} specifier? id simple_term_tail? 
+	   {-> New list_term.simple(specifier, id, simple_term_tail.id)};
+    list_term_tail 
+	   {-> list_term} =
+	comma list_term
+	   {-> list_term} ;
+    simple_term_tail 
+	   {-> id} =
+        dot id
+	   {-> id};
+    prod_name =
+        id prod_name_tail?
+	   {-> New prod_name(id, prod_name_tail.id)};
+    prod_name_tail 
+	   {-> id} =
+        dot id
+	   {-> id};
+    params 
+	   {-> [list_term]:term*} =
+       term [params]:params_tail*
+	   {-> [term, params.term]};
+    params_tail 
+	   {-> term} =
+       comma term
+	   {-> term};
+    alt_name 
+	   {-> id} =
+        l_brace id r_brace
+	   {-> id};
+    elem =
+        elem_name? specifier? id un_op?
+	   {-> New elem(elem_name.id, specifier, id, un_op) };
+    elem_name 
+	   {-> id} =
+        l_bkt id r_bkt colon
+	   {-> id};
+    specifier =
+        {token}      token_specifier dot 
+	   {-> New specifier.token()} 			|
+        {production} production_specifier dot
+	   {-> New specifier.production()}		;
+    ast =
+        abstract syntax tree [prods]:ast_prod+
+	   {-> New ast([prods]) };
+    ast_prod =
+        id equal [alts]:ast_alts semicolon
+	   {-> New ast_prod(id, [alts.list_ast_alt])};
+    ast_alts 
+	   {-> [list_ast_alt]:ast_alt*} =
+        ast_alt [ast_alts]:ast_alts_tail*
+	   {-> [ast_alt, ast_alts.ast_alt]};
+    ast_alts_tail 
+	   {-> ast_alt} =
+        bar ast_alt
+	   {-> ast_alt};
+    ast_alt =
+        alt_name? [elems]:elem*
+	   {-> New ast_alt(alt_name.id, [elems])};
+/*****************************************************************************************/
+/*											 */
+/*											 */
+/*											 */
+/*											 */
+/*											 */
+/*****************************************************************************************/
+Abstract Syntax Tree
+    grammar =
+        [package]:pkg_id* P.helpers? P.states? P.tokens? P.ign_tokens? P.productions? P.ast?;
+    helpers =
+        [helper_defs]:helper_def*;
+    helper_def =
+        id reg_exp;
+    states =
+	[list_id]:id*;
+    tokens =
+        [token_defs]:token_def*;
+    token_def =
+        state_list? id reg_exp slash? [look_ahead]:reg_exp?;
+    state_list =
+        id transition? [state_lists]:state_list_tail*;
+    state_list_tail =
+        id transition?;
+    transition =
+        id;
+    ign_tokens =
+        [list_id]:id*;
+    reg_exp =
+        [concats]:concat*;
+    concat = 
+	[un_exps]: un_exp*;
+    un_exp =
+        basic un_op?;
+    basic =
+        {char}    P.char |
+        {set}     set |
+        {string}  string |
+        {id}      id |
+        {reg_exp} reg_exp;
+    char =
+        {char} T.char |
+        {dec}  dec_char |
+        {hex}  hex_char;
+    set =
+        {operation} [left]:basic bin_op [right]:basic |
+        {interval}  [left]:P.char [right]:P.char ;
+    un_op =
+        {star}   star 	|
+        {q_mark} q_mark |
+        {plus}   plus	;
+    bin_op =
+        {plus} |
+        {minus};
+    productions =
+        [prods]:prod*;
+    prod =
+        id arrow? [prod_transform]:elem* [alts]:alt*;
+    alt =
+        [alt_name]:id? [elems]:elem* alt_transform?;
+    alt_transform =
+	l_brace [terms]:term* r_brace;
+    term =
+        {new} prod_name l_par [params]:term* |
+        {list} l_bkt [list_terms]:list_term* |
+        {simple} specifier? id [simple_term_tail]:id? |
+        {null} ;
+    list_term =
+        {new} prod_name l_par [params]:term* |
+        {simple} specifier? id [simple_term_tail]:id? ;
+    prod_name =
+        id [prod_name_tail]:id? ;
+    elem =
+        [elem_name]:id? specifier? id un_op?;
+    specifier =
+        {token} 	|
+        {production} 	;
+    ast =
+        [prods]:ast_prod*;
+    ast_prod =
+        id [alts]:ast_alt*;
+    ast_alt =
+        [alt_name]:id? [elems]:elem*;