// Header
"src/grammar.rs.pp";

context "LangBuilder";

/*===================================
  Tokenizer
===================================*/

// Token types are defined with the token keyword
token Keyword;
token Identifier;
token RegExp;
token Literal;
token Symbol;

// The extract keyword will exclude the token type when parsing the AST
extract token Comment;

// Ignore whitespaces
ignore /\s+/;

// Comment
Comment /\/\/[^\n]*\n?/;
Comment /\/\*([^\*]|(\*[^\/]))*\*\//;

// Literal and RegExp, which are surrounded by either "" or //
Literal /"((\\.)|[^\\"])*"/;
RegExp /\/((\\.)|[^\\\/])*\//;

// Keywords
Keyword "ignore";
Keyword "extract";
Keyword "token";
Keyword "semantic";
Keyword "rule";
Keyword "optional";
Keyword "context";

// Special Symbols
Symbol /[{};|()=,:\.\[\]\+]/;

// Identifier is alphanumeric and underscore, but does not start with digit
Identifier /[_a-zA-Z]\w*/;

/*===================================
  Semantics
===================================*/
// Semantic types are used to augment the token types.
// For example, the token type "Identifier" can both be a rule name or a variable name
// When defining the rules, you can override the semantic type of a token
// or an entire subtree.

// Semantics
semantic Variable;
semantic Token;
semantic Semantic;
semantic Rule;
semantic HookName;
semantic HookType;
semantic ContextType;

/*===================================
  Rules
===================================*/
// There are 2 types of rules:
// Unions: rule A = B | C;
// Functional: rule A(...) <body>;

// The first rule is the target of the parser.
// The parser can be configured to generate one root node,
// or keep generating root nodes until the end of the file.

rule TopLevelStatement(body: TopLevelDefine, _: token Symbol";");

rule TopLevelDefine = 
  DefineIncludeStatement
  | DefineContextStatement
  | DefineRuleStatement
  | DefineTokenTypeStatement
  | DefineIgnoreTokenRuleStatement
  | DefineTokenRuleStatement
  | DefineSemanticStatement;

rule ("parse_include":"()")
DefineIncludeStatement(path: token Literal);

rule ("parse_context":"()")
DefineContextStatement(
    _: token Keyword"context",
    (ContextType) contextType: token Literal
);

rule ("parse_rule":"()")
DefineRuleStatement(
    _: token Keyword"rule",
    hookAttr: optional HookAttribute,
    (Rule) ruleName: token Identifier,
    body: RuleDefineBody
);

rule ("parse_hook":"Hook")
HookAttribute(
    _: token Symbol"(",
    (HookName) hookName: token Literal,
    _: token Symbol":",
    (HookType) hookType: token Literal,
    _: token Symbol")"
);

rule ("parse_rule_value":"RuleValue")
RuleDefineBody = UnionRuleBody | FunctionalRuleBody;

rule UnionRuleBody(
    _: token Symbol"=",
    (Rule) first: optional token Identifier,
    rest: optional UnionRuleListTail+
);

rule UnionRuleListTail(
    _: token Symbol"|",
    (Rule) r: token Identifier
);

rule FunctionalRuleBody(
    _: token Symbol"(",
    firstParam: optional Parameter,
    restParams: optional ParamListTail+,
    _: token Symbol")"
);

rule ParamListTail(_: token Symbol",", p: Parameter);

rule ("parse_param":"Param") Parameter(
    semAttr: optional ParamSemantic,
    (Variable) variable: token Identifier,
    _: token Symbol":",
    type: optional RuleType
);

rule ParamSemantic(
    _: token Symbol"(",
    (Semantic) semanticName: optional token Identifier,
    _: token Symbol")"
);

rule RuleType(
    kwOptional: optional token Keyword"optional",
    kwToken: optional token Keyword"token",
    id: token Identifier,
    tokenContent: optional token Literal,
    isList: optional token Symbol"+"
);

rule ("parse_token_def":"()") DefineTokenTypeStatement(
    kwExtract: optional token Keyword"extract",
    _: token Keyword"token",
    (Token) tokenType: token Identifier
);

rule ("parse_token_ignore_rule":"()")
DefineIgnoreTokenRuleStatement(_: token Keyword"ignore", value: LiteralOrRegExp);

rule ("parse_token_rule":"()") DefineTokenRuleStatement((Token) tokenType: token Identifier, value: LiteralOrRegExp);

rule LiteralOrRegExp = TokenLiteral | TokenRegExp;
rule TokenLiteral(t: token Literal);
rule TokenRegExp(t: token RegExp);

rule ("parse_semantic":"()") DefineSemanticStatement(_: token Keyword"semantic", (Semantic) id: token Identifier);