# PEGN (v1.1.1) pegn.dev/spec/nodes.pegn # Copyright 2021 Robert S Muhlestein (rob@rwx.gg) # Licensed under Apache-2 # Uses pegn.dev/spec/classes.pegn # Uses pegn.dev/spec/tokens.pegn # NOTE: assumes scanning by at least one byte at a time # (except for StateDef, which is rhetorical) Spec <-- Meta? Copyright? Licensed? Uses* ComEndLine* (Definition ComEndLine*)+ Meta <-- '# ' Grammar ' (' Version ') ' Home EndLine Copyright <-- '# Copyright ' Comment EndLine Licensed <-- '# Licensed under ' Comment EndLine Uses <-- '# Uses ' Path EndLine Path <-- (!ws unipoint)+ ComEndLine <- SP* ('# ' Comment)? EndLine Definition <- StateDef / NodeDef / ScanDef / ClassDef / TokenDef Grammar <- Name ('-' NameExt)? Version <- 'v' MajorVer '.' MinorVer '.' PatchVer ('-' PreVer)? Home <-- (!ws unipoint)+ Comment <-- (!EndLine unipoint)+ StateDef <-- StateId SP+ '<-' SP+ '# ' Comment # rhetorical ScanDef <-- CheckId SP+ '<-' SP+ Expression # convenient NodeDef <-- CheckId SP+ '<--' SP+ Expression # significant ClassDef <-- ClassId SP+ '<-' SP+ ClassExpr TokenDef <-- TokenId SP+ '<-' SP+ TokenVal (Spacing TokenVal)* ComEndLine # lower-case of identifier must be unique within grammar Identifier <- CheckId / ClassId / TokenId / StateId TokenVal <- Unicode / Binary / Hexadec / Octal / SQ String SQ Name <-- upper{2,12} # ascii, not unicode NameExt <-- visible{1,20} MajorVer <-- digit+ MinorVer <-- digit+ PatchVer <-- digit+ PreVer <-- (word / DASH)+ ('.' (word / DASH)+)* CheckId <-- (upper lower+)+ ClassId <-- ResClassId / lower (lower / UNDER lower)+ TokenId <-- ResTokenId / upper (upper / UNDER upper)+ StateId <-- UNDER upper+ Expression <-- Sequence (Spacing '/' SP+ Sequence)* ClassExpr <-- Simple (Spacing '/' SP+ Simple)* Simple <- Unicode / Binary / Hexadec / Octal / ClassId / TokenId / Range / SQ String SQ Spacing <- ComEndLine? SP+ Sequence <-- Rule (Spacing Rule)* Rule <- PosLook / NegLook / Plain Plain <-- Primary Quant? PosLook <-- '&' Primary Quant? NegLook <-- '!' Primary Quant? Primary <- Simple / CheckId / '(' Expression ')' Quant <- Optional / MinZero / MinOne / MinMax / Amount Optional <-- '?' MinZero <-- '*' MinOne <-- '+' MinMax <-- '{' Min ',' Max? '}' Min <-- digit+ Max <-- digit+ Amount <- '{' Count '}' Count <-- digit+ Range <- AlphaRange / IntRange / UniRange / BinRange / HexRange / OctRange UniRange <-- '[' Unicode '-' Unicode ']' # [u0000-u10FFFF] AlphaRange <-- '[' Letter '-' Letter ']' # [a-m] [A-Z] IntRange <-- '[' Integer '-' Integer ']' # [0-108] [0-9] BinRange <-- '[' Binary '-' Binary ']' # [b101-b1111111110101010] HexRange <-- '[' Hexadec '-' Hexadec ']' # [x20-x2F] [xFFFFF - x1FFFFF] OctRange <-- '[' Octal '-' Octal ']' # [o20-o37] String <-- quotable+ Letter <-- alpha Unicode <-- 'u' ('10' uphex{4} / uphex{4,5}) Integer <-- digit+ Binary <-- 'b' bindig+ # b1 == b00000001 (0 pad to closest byte) Hexadec <-- 'x' uphex+ Octal <-- 'o' octdig+ EndLine <-- LF / CRLF / CR ResClassId <-- 'alphanum' / 'alpha' / 'any' / 'bindig' / 'control' / 'digit' / 'hexdig' / 'lowerhex' / 'lower' / 'octdig' / 'punct' / 'quotable' / 'sign' / 'uphex' / 'upper' / 'visible' / 'ws' / 'alnum' / 'ascii' / 'blank' / 'cntrl' / 'graph' / 'print' / 'space' / 'word' / 'xdigit' / 'unipoint' ResTokenId <-- 'TAB' / 'CRLF' / 'CR' / 'LFAT' / 'SP' / 'VT' / 'FF' / 'NOT' / 'BANG' / 'DQ' / 'HASH' / 'DOLLAR' / 'PERCENT' / 'AND' / 'SQ' / 'LPAREN' / 'RPAREN' / 'STAR' / 'PLUS' / 'COMMA' / 'DASH' / 'MINUS' / 'DOT' / 'SLASH' / 'COLON' / 'SEMI' / 'LT' / 'EQ' / 'GT' / 'QUERY' / 'QUESTION' / 'AT' / 'LBRAKT' / 'BKSLASH' / 'RBRAKT' / 'CARET' / 'UNDER' / 'BKTICK' / 'LCURLY' / 'LBRACE' / 'BAR' / 'PIPE' / 'RCURLY' / 'RBRACE' / 'TILDE' / 'UNKNOWN' / 'REPLACE' / 'MAXRUNE' / 'MAXASCII' / 'MAXLATIN' / 'LARROWF' / 'RARROWF' / 'LLARROW' / 'RLARROW' / 'LARROW' / 'LF' / 'RARROW' / 'RFAT' / 'WALRUS' / 'ENDOFDATA'