From d94403d126e0c6ffde788a8f91bcacbed797420f Mon Sep 17 00:00:00 2001 From: Luke Wilson Date: Sat, 24 May 2025 15:17:04 -0500 Subject: [PATCH] Nearly complete Zig AST --- internal/zig/README.md | 1 + internal/zig/ast.go | 495 +++++++++++++++++++++++++++-- internal/zig/zig-grammar.peg | 582 +++++++++++++++++++++++++++++++++++ internal/zig/zig.go | 86 ++++-- internal/zig/zig_test.go | 14 +- 5 files changed, 1113 insertions(+), 65 deletions(-) create mode 100644 internal/zig/README.md create mode 100644 internal/zig/zig-grammar.peg diff --git a/internal/zig/README.md b/internal/zig/README.md new file mode 100644 index 0000000..b192b29 --- /dev/null +++ b/internal/zig/README.md @@ -0,0 +1 @@ +A Zig code generator based on the [Zig language specification](https://github.com/ziglang/zig-spec) PEG grammar. The grammar is included in the source so that it can easily be diffed for changes in the future. diff --git a/internal/zig/ast.go b/internal/zig/ast.go index af9aea0..f1c2265 100644 --- a/internal/zig/ast.go +++ b/internal/zig/ast.go @@ -1,50 +1,497 @@ +// Abstract Syntax Tree (AST) definitions for Zig, closely following the grammar in zig-grammar.peg package zig -// https://github.com/ziglang/zig-spec/blob/master/grammar/grammar.peg - +// Root is the top-level node of a Zig source file. type Root struct { - ContainerDocComment string // //! Doc Comment + ContainerDocComment DocComment // //! Doc Comment (optional) ContainerMembers []*ContainerMember } type ContainerMember struct { - // FIXME - Decls []Decl + DocComment DocComment // /// Doc Comment (optional) + Comptime bool // 'comptime' field (optional) + Field *ContainerField + Decl Decl // Can be nil if this is a field } -type Decl interface{} +// ContainerField represents a field in a struct/enum/union container. +type ContainerField struct { + Name string // May be empty for anonymous fields + Type TypeExpr + ByteAlign *Expr // Optional + Value Expr // Optional initializer +} +// Decl is any top-level declaration (function, variable, usingnamespace, etc). +type Decl interface { + isDecl() +} + +// FnDecl represents a function declaration. type FnDecl struct { - Name string - Params []*ParamDecl - CallConv string - ReturnType TypeExpr - Body *Block // nil means semicolon + Export bool + Extern bool + ExternName string // Optional string for extern + Inline bool + NoInline bool + ThreadLocal bool + Name string // May be empty (anonymous) + Params []*ParamDecl + ByteAlign *Expr + AddrSpace *Expr + LinkSection *Expr + CallConv *Expr + ReturnType TypeExpr + Body *Block // nil means semicolon } +func (*FnDecl) isDecl() {} + +// GlobalVarDecl represents a global variable declaration. +type GlobalVarDecl struct { + Export bool + Extern bool + ExternName string // Optional string for extern + ThreadLocal bool + Const bool + Name string + Type TypeExpr // Optional + ByteAlign *Expr + AddrSpace *Expr + LinkSection *Expr + Value Expr // Optional initializer +} + +func (*GlobalVarDecl) isDecl() {} + +// UsingNamespaceDecl represents a 'usingnamespace' declaration. +type UsingNamespaceDecl struct { + Expr Expr +} + +func (*UsingNamespaceDecl) isDecl() {} + +// ComptimeDecl represents a 'comptime' block at the container level. +type ComptimeDecl struct { + Block *Block +} + +func (*ComptimeDecl) isDecl() {} + +// TestDecl represents a 'test' declaration. +type TestDecl struct { + Name string // Optional + Block *Block +} + +func (*TestDecl) isDecl() {} + +// ParamDecl represents a function parameter. type ParamDecl struct { - DocComment string // ??? It's what it says - Name string // Can be empty - Type TypeExpr // anytype when empty + DocComment DocComment // /// Doc Comment (optional) + NoAlias bool + Comptime bool + Name string // May be empty + Type TypeExpr // 'anytype' if empty } +// Block represents a block of statements. type Block struct { - Label string + Label string // Optional Stmts []Stmt } -type Stmt interface{} +// Stmt is any statement. +type Stmt interface { + isStmt() +} -type ReturnStmt struct{} +// ReturnStmt represents a 'return' statement. +type ReturnStmt struct { + Value Expr // Optional +} +func (*ReturnStmt) isStmt() {} + +// IfStmt represents an if statement (with optional else branch and payload). +type IfStmt struct { + Cond Expr // Condition expression + Then Stmt // Then branch + Else Stmt // Optional else branch + Payload *Payload // Optional payload (for |x|, |*x|, |*x, y|, etc.) +} + +func (*IfStmt) isStmt() {} + +// IfExpr represents an if expression (with optional else branch and payload). +type IfExpr struct { + Cond Expr // Condition expression + Then Expr // Then branch + Else Expr // Optional else branch + Payload *Payload // Optional payload (for |x|, |*x|, |*x, y|, etc.) +} + +// DeferStmt represents a 'defer' or 'errdefer' statement. +type DeferStmt struct { + ErrDefer bool // True for 'errdefer', false for 'defer' + Payload *Payload // Optional payload (for |x|, |*x|, etc.) + Stmt Stmt // Statement to defer +} + +func (*DeferStmt) isStmt() {} + +// SuspendStmt represents a 'suspend' or 'nosuspend' statement. +type SuspendStmt struct { + NoSuspend bool + Stmt Stmt +} + +func (*SuspendStmt) isStmt() {} + +// BlockStmt allows a block to be used as a statement. +type BlockStmt struct { + Block *Block +} + +func (*BlockStmt) isStmt() {} + +// BreakStmt represents a 'break' statement. +type BreakStmt struct { + Label string // Optional + Value Expr // Optional +} + +func (*BreakStmt) isStmt() {} + +// ContinueStmt represents a 'continue' statement. +type ContinueStmt struct { + Label string // Optional +} + +func (*ContinueStmt) isStmt() {} + +// LoopStmt represents a for/while loop statement. +type LoopStmt struct { + Inline bool // True if 'inline' is present + Kind string // "for" or "while" + Prefix LoopPrefix // ForPrefix or WhilePrefix + Body Stmt // Loop body + Else Stmt // Optional else branch + Payload *Payload // Optional payload (for |x|, |*x|, |*x, y|, etc.) +} + +func (*LoopStmt) isStmt() {} + +// LoopPrefix is the prefix of a for/while loop. +type LoopPrefix interface{} + +// ForPrefix represents the prefix of a for loop. +type ForPrefix struct { + Args []ForArg // For loop arguments + Payload *Payload // Payload (|*x, y|, etc.) +} + +// ForArg represents an argument in a for loop. +type ForArg struct { + Expr Expr // Argument expression + From Expr // Optional (for .. or ..<) +} + +// WhilePrefix represents the prefix of a while loop. +type WhilePrefix struct { + Cond Expr // Condition expression + Payload *Payload // Optional payload (for |x|, |*x|, etc.) + Continue Expr // Optional (while continue expression) +} + +// SwitchStmt represents a switch statement. +type SwitchStmt struct { + Cond Expr + Prongs []*SwitchProng +} + +func (*SwitchStmt) isStmt() {} + +// SwitchExpr represents a switch expression. +type SwitchExpr struct { + Cond Expr + Prongs []*SwitchProng +} + +// SwitchProng represents a prong in a switch. +type SwitchProng struct { + Inline bool // True if 'inline' is present + Cases []*SwitchCase // List of cases for this prong + Payload *Payload // Optional payload (for |*x, y|, etc.) + Expr Expr // Result expression for this prong +} + +// SwitchCase represents a case in a switch. +type SwitchCase struct { + Expr Expr + To Expr // Optional (for ..) + IsElse bool +} + +// AsmExpr represents an inline assembly expression. +type AsmExpr struct { + Volatile bool + Template Expr + Outputs []*AsmOutputItem + Inputs []*AsmInputItem + Clobbers []string +} + +// AsmOutputItem represents an output operand in asm. +type AsmOutputItem struct { + Name string + Constraint string + Type TypeExpr +} + +// AsmInputItem represents an input operand in asm. +type AsmInputItem struct { + Name string + Constraint string + Expr Expr +} + +// ContainerDecl represents a struct, enum, union, or opaque declaration. +type ContainerDecl struct { + Extern bool + Packed bool + Kind string // "struct", "enum", "union", "opaque" + TagType TypeExpr // Optional (for enum/union) + Fields []*ContainerMember + DocComment DocComment +} + +// ErrorSetDecl represents an error set declaration. +type ErrorSetDecl struct { + Names []string +} + +// InitListExpr represents an initializer list. +// Exactly one of Fields, Values, or Empty must be set (non-nil/non-empty or true). +type InitListExpr struct { + Fields []*FieldInit // Field initializers (for {.foo = 1, .bar = 2}), mutually exclusive with Values/Empty + Values []Expr // Positional initializers (for {1, 2, 3}), mutually exclusive with Fields/Empty + Empty bool // True if '{}', mutually exclusive with Fields/Values +} + +// FieldInit represents a field initializer in an init list. +type FieldInit struct { + Name string + Value Expr +} + +// Identifier represents an identifier expression (variable, field, etc). +type Identifier struct { + Name string // The identifier name +} + +// Literal represents a literal value (int, float, string, char). +type Literal struct { + Kind string // "int", "float", "string", "char" + Value string // The literal value as a string +} + +// BinaryExpr represents a binary operation (e.g. +, -, *, /, etc). +type BinaryExpr struct { + Op string // Operator, e.g. "+", "-", "*", etc. + Left Expr // Left operand + Right Expr // Right operand +} + +// UnaryExpr represents a unary operation (e.g. !, -, ~, etc). +type UnaryExpr struct { + Op string // Operator, e.g. "-", "!", "~" + Expr Expr // Operand +} + +// GroupedExpr represents a parenthesized expression. +type GroupedExpr struct { + Expr Expr // The grouped expression +} + +// CallExpr represents a function call. +type CallExpr struct { + Fun Expr // Function being called + Args []Expr // Arguments to the function +} + +// FieldAccessExpr represents a field/member access as a suffix operation (e.g. foo.bar). +type FieldAccessExpr struct { + Receiver Expr // The object being accessed + Field string // The field name +} + +// IndexExpr represents an indexing operation as a suffix (e.g. arr[0]). +type IndexExpr struct { + Receiver Expr // The object being indexed + Index Expr // The index expression +} + +// ResumeExpr represents a 'resume' expression. +type ResumeExpr struct { + Expr Expr // The expression to resume +} + +// ComptimeExpr represents a 'comptime' expression. +type ComptimeExpr struct { + Expr Expr // The expression to evaluate at comptime +} + +// NosuspendExpr represents a 'nosuspend' expression. +type NosuspendExpr struct { + Expr Expr // The expression to evaluate with nosuspend +} + +// ContinueExpr represents a 'continue' expression. +type ContinueExpr struct { + Label string // Optional label +} + +// Expr is any expression. type Expr interface{} -// This will need to become a real type expr someday -type TypeExpr string +// TypeExpr is any type expression. +type TypeExpr interface{} -func (t TypeExpr) String() string { - if string(t) == "" { - return "anytype" - } - return string(t) +// DocComment represents a doc comment (/// or //! lines). +// Newlines in the string automatically add more comments in the output. +type DocComment string + +// Payload represents a control flow payload (|x|, |*x|, |*x, y|, etc). +// Each entry in Names corresponds to a variable name; the same index in Pointers is true if that name is a pointer (|*x|). +type Payload struct { + Names []string // Names in the payload, in order + Pointers []bool // True if the corresponding name is a pointer (|*x|, |*x, y|, |*x, *y|, etc.) +} + +// LabeledBlock represents a labeled block or loop (label: {...}). +type LabeledBlock struct { + Label string // The label name + Block *Block // The labeled block +} + +// LabeledTypeExpr represents a labeled type block (label: type). +type LabeledTypeExpr struct { + Label string // The label name + Type TypeExpr // The labeled type +} + +// IfTypeExpr represents an if expression at the type level. +type IfTypeExpr struct { + Cond Expr + Then TypeExpr + Else TypeExpr // Optional + Payload *Payload // Optional +} + +// ForTypeExpr represents a for expression at the type level. +type ForTypeExpr struct { + Prefix *ForPrefix + Body TypeExpr + Else TypeExpr // Optional + Payload *Payload // Optional +} + +// WhileTypeExpr represents a while expression at the type level. +type WhileTypeExpr struct { + Prefix *WhilePrefix + Body TypeExpr + Else TypeExpr // Optional + Payload *Payload // Optional +} + +// DotAsteriskExpr represents a .* +type DotAsteriskExpr struct { + Receiver Expr // The expression being dereferenced +} + +// DotQuestionExpr represents a .? +type DotQuestionExpr struct { + Receiver Expr // The expression being checked for optional +} + +// AsyncExpr represents an 'async' expression. +type AsyncExpr struct { + Expr Expr // The expression to be awaited asynchronously +} + +// TryExpr represents a 'try' expression. +type TryExpr struct { + Expr Expr // The expression to try +} + +// AwaitExpr represents an 'await' expression. +type AwaitExpr struct { + Expr Expr // The expression to await +} + +// UnreachableExpr represents the 'unreachable' keyword. +type UnreachableExpr struct{} + +// EmptyInitListExpr represents an empty initializer list '{}'. +type EmptyInitListExpr struct{} + +// PositionalInitListExpr represents a positional initializer list '{expr, ...}'. +type PositionalInitListExpr struct { + Values []Expr // Expressions in order +} + +// FieldInitListExpr represents a field initializer list '{.field = expr, ...}'. +type FieldInitListExpr struct { + Fields []*FieldInit // Field initializers +} + +// SwitchProngPayload represents a switch prong payload (|*x, y|). +type SwitchProngPayload struct { + Pointer bool + Names []string +} + +// SwitchProngCase represents a single case in a switch prong. +type SwitchProngCase struct { + Expr Expr // The case expression + To Expr // Optional, for ranges +} + +// SwitchProngFull represents a full switch prong with cases and payload. +type SwitchProngFull struct { + Inline bool + Cases []*SwitchProngCase // One or more cases + Payload *SwitchProngPayload // Optional + Expr Expr // The result expression +} + +// SwitchElseProng represents an 'else' prong in a switch. +type SwitchElseProng struct { + Expr Expr // The result expression +} + +// VarPattern represents a variable pattern for destructuring or multiple variable declarations. +type VarPattern struct { + Names []string // Variable names (single or multiple for destructuring) +} + +// VarDeclStmt represents a variable or const declaration at statement level, supporting destructuring and multi-var declarations. +type VarDeclStmt struct { + Const bool + Pattern VarPattern // Destructuring or multiple variable names + Type TypeExpr // Optional + Value Expr // Optional initializer + ByteAlign Expr // Optional + AddrSpace Expr // Optional + LinkSection Expr // Optional +} + +func (*VarDeclStmt) isStmt() {} + +// DotCallExpr represents a method call expression where a method is called on a receiver. +// For example, in the expression `foo.bar()`, `foo` is the Receiver and `bar()` is the Call. +type DotCallExpr struct { + Receiver Expr // The expression being called (e.g. foo) + Call *CallExpr } diff --git a/internal/zig/zig-grammar.peg b/internal/zig/zig-grammar.peg new file mode 100644 index 0000000..6ba89e8 --- /dev/null +++ b/internal/zig/zig-grammar.peg @@ -0,0 +1,582 @@ +Root <- skip container_doc_comment? ContainerMembers eof + +# *** Top level *** +ContainerMembers <- ContainerDeclarations (ContainerField COMMA)* (ContainerField / ContainerDeclarations) + +ContainerDeclarations <- (TestDecl / ComptimeDecl / doc_comment? KEYWORD_pub? Decl)* + +TestDecl <- KEYWORD_test (STRINGLITERALSINGLE / IDENTIFIER)? Block + +ComptimeDecl <- KEYWORD_comptime Block + +Decl + <- (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE? / KEYWORD_inline / KEYWORD_noinline)? FnProto (SEMICOLON / Block) + / (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE?)? KEYWORD_threadlocal? GlobalVarDecl + / KEYWORD_usingnamespace Expr SEMICOLON + +FnProto <- KEYWORD_fn IDENTIFIER? LPAREN ParamDeclList RPAREN ByteAlign? AddrSpace? LinkSection? CallConv? EXCLAMATIONMARK? TypeExpr + +VarDeclProto <- (KEYWORD_const / KEYWORD_var) IDENTIFIER (COLON TypeExpr)? ByteAlign? AddrSpace? LinkSection? + +GlobalVarDecl <- VarDeclProto (EQUAL Expr)? SEMICOLON + +ContainerField <- doc_comment? KEYWORD_comptime? !KEYWORD_fn (IDENTIFIER COLON)? TypeExpr ByteAlign? (EQUAL Expr)? + +# *** Block Level *** +Statement + <- KEYWORD_comptime ComptimeStatement + / KEYWORD_nosuspend BlockExprStatement + / KEYWORD_suspend BlockExprStatement + / KEYWORD_defer BlockExprStatement + / KEYWORD_errdefer Payload? BlockExprStatement + / IfStatement + / LabeledStatement + / SwitchExpr + / VarDeclExprStatement + +ComptimeStatement + <- BlockExpr + / VarDeclExprStatement + +IfStatement + <- IfPrefix BlockExpr ( KEYWORD_else Payload? Statement )? + / IfPrefix AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) + +LabeledStatement <- BlockLabel? (Block / LoopStatement) + +LoopStatement <- KEYWORD_inline? (ForStatement / WhileStatement) + +ForStatement + <- ForPrefix BlockExpr ( KEYWORD_else Statement )? + / ForPrefix AssignExpr ( SEMICOLON / KEYWORD_else Statement ) + +WhileStatement + <- WhilePrefix BlockExpr ( KEYWORD_else Payload? Statement )? + / WhilePrefix AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) + +BlockExprStatement + <- BlockExpr + / AssignExpr SEMICOLON + +BlockExpr <- BlockLabel? Block + +# An expression, assignment, or any destructure, as a statement. +VarDeclExprStatement + <- VarDeclProto (COMMA (VarDeclProto / Expr))* EQUAL Expr SEMICOLON + / Expr (AssignOp Expr / (COMMA (VarDeclProto / Expr))+ EQUAL Expr)? SEMICOLON + +# *** Expression Level *** + +# An assignment or a destructure whose LHS are all lvalue expressions. +AssignExpr <- Expr (AssignOp Expr / (COMMA Expr)+ EQUAL Expr)? + +SingleAssignExpr <- Expr (AssignOp Expr)? + +Expr <- BoolOrExpr + +BoolOrExpr <- BoolAndExpr (KEYWORD_or BoolAndExpr)* + +BoolAndExpr <- CompareExpr (KEYWORD_and CompareExpr)* + +CompareExpr <- BitwiseExpr (CompareOp BitwiseExpr)? + +BitwiseExpr <- BitShiftExpr (BitwiseOp BitShiftExpr)* + +BitShiftExpr <- AdditionExpr (BitShiftOp AdditionExpr)* + +AdditionExpr <- MultiplyExpr (AdditionOp MultiplyExpr)* + +MultiplyExpr <- PrefixExpr (MultiplyOp PrefixExpr)* + +PrefixExpr <- PrefixOp* PrimaryExpr + +PrimaryExpr + <- AsmExpr + / IfExpr + / KEYWORD_break BreakLabel? Expr? + / KEYWORD_comptime Expr + / KEYWORD_nosuspend Expr + / KEYWORD_continue BreakLabel? + / KEYWORD_resume Expr + / KEYWORD_return Expr? + / BlockLabel? LoopExpr + / Block + / CurlySuffixExpr + +IfExpr <- IfPrefix Expr (KEYWORD_else Payload? Expr)? + +Block <- LBRACE Statement* RBRACE + +LoopExpr <- KEYWORD_inline? (ForExpr / WhileExpr) + +ForExpr <- ForPrefix Expr (KEYWORD_else Expr)? + +WhileExpr <- WhilePrefix Expr (KEYWORD_else Payload? Expr)? + +CurlySuffixExpr <- TypeExpr InitList? + +InitList + <- LBRACE FieldInit (COMMA FieldInit)* COMMA? RBRACE + / LBRACE Expr (COMMA Expr)* COMMA? RBRACE + / LBRACE RBRACE + +TypeExpr <- PrefixTypeOp* ErrorUnionExpr + +ErrorUnionExpr <- SuffixExpr (EXCLAMATIONMARK TypeExpr)? + +SuffixExpr + <- KEYWORD_async PrimaryTypeExpr SuffixOp* FnCallArguments + / PrimaryTypeExpr (SuffixOp / FnCallArguments)* + +PrimaryTypeExpr + <- BUILTINIDENTIFIER FnCallArguments + / CHAR_LITERAL + / ContainerDecl + / DOT IDENTIFIER + / DOT InitList + / ErrorSetDecl + / FLOAT + / FnProto + / GroupedExpr + / LabeledTypeExpr + / IDENTIFIER + / IfTypeExpr + / INTEGER + / KEYWORD_comptime TypeExpr + / KEYWORD_error DOT IDENTIFIER + / KEYWORD_anyframe + / KEYWORD_unreachable + / STRINGLITERAL + / SwitchExpr + +ContainerDecl <- (KEYWORD_extern / KEYWORD_packed)? ContainerDeclAuto + +ErrorSetDecl <- KEYWORD_error LBRACE IdentifierList RBRACE + +GroupedExpr <- LPAREN Expr RPAREN + +IfTypeExpr <- IfPrefix TypeExpr (KEYWORD_else Payload? TypeExpr)? + +LabeledTypeExpr + <- BlockLabel Block + / BlockLabel? LoopTypeExpr + +LoopTypeExpr <- KEYWORD_inline? (ForTypeExpr / WhileTypeExpr) + +ForTypeExpr <- ForPrefix TypeExpr (KEYWORD_else TypeExpr)? + +WhileTypeExpr <- WhilePrefix TypeExpr (KEYWORD_else Payload? TypeExpr)? + +SwitchExpr <- KEYWORD_switch LPAREN Expr RPAREN LBRACE SwitchProngList RBRACE + +# *** Assembly *** +AsmExpr <- KEYWORD_asm KEYWORD_volatile? LPAREN Expr AsmOutput? RPAREN + +AsmOutput <- COLON AsmOutputList AsmInput? + +AsmOutputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERAL LPAREN (MINUSRARROW TypeExpr / IDENTIFIER) RPAREN + +AsmInput <- COLON AsmInputList AsmClobbers? + +AsmInputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERAL LPAREN Expr RPAREN + +AsmClobbers <- COLON StringList + +# *** Helper grammar *** +BreakLabel <- COLON IDENTIFIER + +BlockLabel <- IDENTIFIER COLON + +FieldInit <- DOT IDENTIFIER EQUAL Expr + +WhileContinueExpr <- COLON LPAREN AssignExpr RPAREN + +LinkSection <- KEYWORD_linksection LPAREN Expr RPAREN + +AddrSpace <- KEYWORD_addrspace LPAREN Expr RPAREN + +# Fn specific +CallConv <- KEYWORD_callconv LPAREN Expr RPAREN + +ParamDecl + <- doc_comment? (KEYWORD_noalias / KEYWORD_comptime)? (IDENTIFIER COLON)? ParamType + / DOT3 + +ParamType + <- KEYWORD_anytype + / TypeExpr + +# Control flow prefixes +IfPrefix <- KEYWORD_if LPAREN Expr RPAREN PtrPayload? + +WhilePrefix <- KEYWORD_while LPAREN Expr RPAREN PtrPayload? WhileContinueExpr? + +ForPrefix <- KEYWORD_for LPAREN ForArgumentsList RPAREN PtrListPayload + +# Payloads +Payload <- PIPE IDENTIFIER PIPE + +PtrPayload <- PIPE ASTERISK? IDENTIFIER PIPE + +PtrIndexPayload <- PIPE ASTERISK? IDENTIFIER (COMMA IDENTIFIER)? PIPE + +PtrListPayload <- PIPE ASTERISK? IDENTIFIER (COMMA ASTERISK? IDENTIFIER)* COMMA? PIPE + +# Switch specific +SwitchProng <- KEYWORD_inline? SwitchCase EQUALRARROW PtrIndexPayload? SingleAssignExpr + +SwitchCase + <- SwitchItem (COMMA SwitchItem)* COMMA? + / KEYWORD_else + +SwitchItem <- Expr (DOT3 Expr)? + +# For specific +ForArgumentsList <- ForItem (COMMA ForItem)* COMMA? + +ForItem <- Expr (DOT2 Expr?)? + +# Operators +AssignOp + <- ASTERISKEQUAL + / ASTERISKPIPEEQUAL + / SLASHEQUAL + / PERCENTEQUAL + / PLUSEQUAL + / PLUSPIPEEQUAL + / MINUSEQUAL + / MINUSPIPEEQUAL + / LARROW2EQUAL + / LARROW2PIPEEQUAL + / RARROW2EQUAL + / AMPERSANDEQUAL + / CARETEQUAL + / PIPEEQUAL + / ASTERISKPERCENTEQUAL + / PLUSPERCENTEQUAL + / MINUSPERCENTEQUAL + / EQUAL + +CompareOp + <- EQUALEQUAL + / EXCLAMATIONMARKEQUAL + / LARROW + / RARROW + / LARROWEQUAL + / RARROWEQUAL + +BitwiseOp + <- AMPERSAND + / CARET + / PIPE + / KEYWORD_orelse + / KEYWORD_catch Payload? + +BitShiftOp + <- LARROW2 + / RARROW2 + / LARROW2PIPE + +AdditionOp + <- PLUS + / MINUS + / PLUS2 + / PLUSPERCENT + / MINUSPERCENT + / PLUSPIPE + / MINUSPIPE + +MultiplyOp + <- PIPE2 + / ASTERISK + / SLASH + / PERCENT + / ASTERISK2 + / ASTERISKPERCENT + / ASTERISKPIPE + +PrefixOp + <- EXCLAMATIONMARK + / MINUS + / TILDE + / MINUSPERCENT + / AMPERSAND + / KEYWORD_try + / KEYWORD_await + +PrefixTypeOp + <- QUESTIONMARK + / KEYWORD_anyframe MINUSRARROW + / SliceTypeStart (ByteAlign / AddrSpace / KEYWORD_const / KEYWORD_volatile / KEYWORD_allowzero)* + / PtrTypeStart (AddrSpace / KEYWORD_align LPAREN Expr (COLON Expr COLON Expr)? RPAREN / KEYWORD_const / KEYWORD_volatile / KEYWORD_allowzero)* + / ArrayTypeStart + +SuffixOp + <- LBRACKET Expr (DOT2 (Expr? (COLON Expr)?)?)? RBRACKET + / DOT IDENTIFIER + / DOTASTERISK + / DOTQUESTIONMARK + +FnCallArguments <- LPAREN ExprList RPAREN + +# Ptr specific +SliceTypeStart <- LBRACKET (COLON Expr)? RBRACKET + +PtrTypeStart + <- ASTERISK + / ASTERISK2 + / LBRACKET ASTERISK (LETTERC / COLON Expr)? RBRACKET + +ArrayTypeStart <- LBRACKET Expr (COLON Expr)? RBRACKET + +# ContainerDecl specific +ContainerDeclAuto <- ContainerDeclType LBRACE container_doc_comment? ContainerMembers RBRACE + +ContainerDeclType + <- KEYWORD_struct (LPAREN Expr RPAREN)? + / KEYWORD_opaque + / KEYWORD_enum (LPAREN Expr RPAREN)? + / KEYWORD_union (LPAREN (KEYWORD_enum (LPAREN Expr RPAREN)? / Expr) RPAREN)? + +# Alignment +ByteAlign <- KEYWORD_align LPAREN Expr RPAREN + +# Lists +IdentifierList <- (doc_comment? IDENTIFIER COMMA)* (doc_comment? IDENTIFIER)? + +SwitchProngList <- (SwitchProng COMMA)* SwitchProng? + +AsmOutputList <- (AsmOutputItem COMMA)* AsmOutputItem? + +AsmInputList <- (AsmInputItem COMMA)* AsmInputItem? + +StringList <- (STRINGLITERAL COMMA)* STRINGLITERAL? + +ParamDeclList <- (ParamDecl COMMA)* ParamDecl? + +ExprList <- (Expr COMMA)* Expr? + +# *** Tokens *** +eof <- !. +bin <- [01] +bin_ <- '_'? bin +oct <- [0-7] +oct_ <- '_'? oct +hex <- [0-9a-fA-F] +hex_ <- '_'? hex +dec <- [0-9] +dec_ <- '_'? dec + +bin_int <- bin bin_* +oct_int <- oct oct_* +dec_int <- dec dec_* +hex_int <- hex hex_* + +ox80_oxBF <- [\200-\277] +oxF4 <- '\364' +ox80_ox8F <- [\200-\217] +oxF1_oxF3 <- [\361-\363] +oxF0 <- '\360' +ox90_0xBF <- [\220-\277] +oxEE_oxEF <- [\356-\357] +oxED <- '\355' +ox80_ox9F <- [\200-\237] +oxE1_oxEC <- [\341-\354] +oxE0 <- '\340' +oxA0_oxBF <- [\240-\277] +oxC2_oxDF <- [\302-\337] + +# From https://lemire.me/blog/2018/05/09/how-quickly-can-you-check-that-a-string-is-valid-unicode-utf-8/ +# First Byte Second Byte Third Byte Fourth Byte +# [0x00,0x7F] +# [0xC2,0xDF] [0x80,0xBF] +# 0xE0 [0xA0,0xBF] [0x80,0xBF] +# [0xE1,0xEC] [0x80,0xBF] [0x80,0xBF] +# 0xED [0x80,0x9F] [0x80,0xBF] +# [0xEE,0xEF] [0x80,0xBF] [0x80,0xBF] +# 0xF0 [0x90,0xBF] [0x80,0xBF] [0x80,0xBF] +# [0xF1,0xF3] [0x80,0xBF] [0x80,0xBF] [0x80,0xBF] +# 0xF4 [0x80,0x8F] [0x80,0xBF] [0x80,0xBF] + +mb_utf8_literal <- + oxF4 ox80_ox8F ox80_oxBF ox80_oxBF + / oxF1_oxF3 ox80_oxBF ox80_oxBF ox80_oxBF + / oxF0 ox90_0xBF ox80_oxBF ox80_oxBF + / oxEE_oxEF ox80_oxBF ox80_oxBF + / oxED ox80_ox9F ox80_oxBF + / oxE1_oxEC ox80_oxBF ox80_oxBF + / oxE0 oxA0_oxBF ox80_oxBF + / oxC2_oxDF ox80_oxBF + +# NOTE: `\135` is `]`. We separate to avoid this: [\000-\011\013-\046\050-\133]-\177] +# ^ ^XXXXXX +ascii_char_not_nl_slash_squote <- [\000-\011\013-\046\050-\133\136-\177] / ']' + +char_escape + <- "\\x" hex hex + / "\\u{" hex+ "}" + / "\\" [nr\\t'"] +char_char + <- mb_utf8_literal + / char_escape + / ascii_char_not_nl_slash_squote + +string_char + <- char_escape + / [^\\"\n] + +container_doc_comment <- ('//!' [^\n]* [ \n]* skip)+ +doc_comment <- ('///' [^\n]* [ \n]* skip)+ +line_comment <- '//' ![!/][^\n]* / '////' [^\n]* +line_string <- ('\\\\' [^\n]* [ \n]*)+ +skip <- ([ \n] / line_comment)* + +CHAR_LITERAL <- ['] char_char ['] skip +FLOAT + <- '0x' hex_int '.' hex_int ([pP] [-+]? dec_int)? skip + / dec_int '.' dec_int ([eE] [-+]? dec_int)? skip + / '0x' hex_int [pP] [-+]? dec_int skip + / dec_int [eE] [-+]? dec_int skip +INTEGER + <- '0b' bin_int skip + / '0o' oct_int skip + / '0x' hex_int skip + / dec_int skip +STRINGLITERALSINGLE <- ["] string_char* ["] skip +STRINGLITERAL + <- STRINGLITERALSINGLE + / (line_string skip)+ +IDENTIFIER + <- !keyword [A-Za-z_] [A-Za-z0-9_]* skip + / '@' STRINGLITERALSINGLE +BUILTINIDENTIFIER <- '@'[A-Za-z_][A-Za-z0-9_]* skip + + +AMPERSAND <- '&' ![=] skip +AMPERSANDEQUAL <- '&=' skip +ASTERISK <- '*' ![*%=|] skip +ASTERISK2 <- '**' skip +ASTERISKEQUAL <- '*=' skip +ASTERISKPERCENT <- '*%' ![=] skip +ASTERISKPERCENTEQUAL <- '*%=' skip +ASTERISKPIPE <- '*|' ![=] skip +ASTERISKPIPEEQUAL <- '*|=' skip +CARET <- '^' ![=] skip +CARETEQUAL <- '^=' skip +COLON <- ':' skip +COMMA <- ',' skip +DOT <- '.' ![*.?] skip +DOT2 <- '..' ![.] skip +DOT3 <- '...' skip +DOTASTERISK <- '.*' skip +DOTQUESTIONMARK <- '.?' skip +EQUAL <- '=' ![>=] skip +EQUALEQUAL <- '==' skip +EQUALRARROW <- '=>' skip +EXCLAMATIONMARK <- '!' ![=] skip +EXCLAMATIONMARKEQUAL <- '!=' skip +LARROW <- '<' ![<=] skip +LARROW2 <- '<<' ![=|] skip +LARROW2EQUAL <- '<<=' skip +LARROW2PIPE <- '<<|' ![=] skip +LARROW2PIPEEQUAL <- '<<|=' skip +LARROWEQUAL <- '<=' skip +LBRACE <- '{' skip +LBRACKET <- '[' skip +LPAREN <- '(' skip +MINUS <- '-' ![%=>|] skip +MINUSEQUAL <- '-=' skip +MINUSPERCENT <- '-%' ![=] skip +MINUSPERCENTEQUAL <- '-%=' skip +MINUSPIPE <- '-|' ![=] skip +MINUSPIPEEQUAL <- '-|=' skip +MINUSRARROW <- '->' skip +PERCENT <- '%' ![=] skip +PERCENTEQUAL <- '%=' skip +PIPE <- '|' ![|=] skip +PIPE2 <- '||' skip +PIPEEQUAL <- '|=' skip +PLUS <- '+' ![%+=|] skip +PLUS2 <- '++' skip +PLUSEQUAL <- '+=' skip +PLUSPERCENT <- '+%' ![=] skip +PLUSPERCENTEQUAL <- '+%=' skip +PLUSPIPE <- '+|' ![=] skip +PLUSPIPEEQUAL <- '+|=' skip +LETTERC <- 'c' skip +QUESTIONMARK <- '?' skip +RARROW <- '>' ![>=] skip +RARROW2 <- '>>' ![=] skip +RARROW2EQUAL <- '>>=' skip +RARROWEQUAL <- '>=' skip +RBRACE <- '}' skip +RBRACKET <- ']' skip +RPAREN <- ')' skip +SEMICOLON <- ';' skip +SLASH <- '/' ![=] skip +SLASHEQUAL <- '/=' skip +TILDE <- '~' skip + +end_of_word <- ![a-zA-Z0-9_] skip +KEYWORD_addrspace <- 'addrspace' end_of_word +KEYWORD_align <- 'align' end_of_word +KEYWORD_allowzero <- 'allowzero' end_of_word +KEYWORD_and <- 'and' end_of_word +KEYWORD_anyframe <- 'anyframe' end_of_word +KEYWORD_anytype <- 'anytype' end_of_word +KEYWORD_asm <- 'asm' end_of_word +KEYWORD_async <- 'async' end_of_word +KEYWORD_await <- 'await' end_of_word +KEYWORD_break <- 'break' end_of_word +KEYWORD_callconv <- 'callconv' end_of_word +KEYWORD_catch <- 'catch' end_of_word +KEYWORD_comptime <- 'comptime' end_of_word +KEYWORD_const <- 'const' end_of_word +KEYWORD_continue <- 'continue' end_of_word +KEYWORD_defer <- 'defer' end_of_word +KEYWORD_else <- 'else' end_of_word +KEYWORD_enum <- 'enum' end_of_word +KEYWORD_errdefer <- 'errdefer' end_of_word +KEYWORD_error <- 'error' end_of_word +KEYWORD_export <- 'export' end_of_word +KEYWORD_extern <- 'extern' end_of_word +KEYWORD_fn <- 'fn' end_of_word +KEYWORD_for <- 'for' end_of_word +KEYWORD_if <- 'if' end_of_word +KEYWORD_inline <- 'inline' end_of_word +KEYWORD_noalias <- 'noalias' end_of_word +KEYWORD_nosuspend <- 'nosuspend' end_of_word +KEYWORD_noinline <- 'noinline' end_of_word +KEYWORD_opaque <- 'opaque' end_of_word +KEYWORD_or <- 'or' end_of_word +KEYWORD_orelse <- 'orelse' end_of_word +KEYWORD_packed <- 'packed' end_of_word +KEYWORD_pub <- 'pub' end_of_word +KEYWORD_resume <- 'resume' end_of_word +KEYWORD_return <- 'return' end_of_word +KEYWORD_linksection <- 'linksection' end_of_word +KEYWORD_struct <- 'struct' end_of_word +KEYWORD_suspend <- 'suspend' end_of_word +KEYWORD_switch <- 'switch' end_of_word +KEYWORD_test <- 'test' end_of_word +KEYWORD_threadlocal <- 'threadlocal' end_of_word +KEYWORD_try <- 'try' end_of_word +KEYWORD_union <- 'union' end_of_word +KEYWORD_unreachable <- 'unreachable' end_of_word +KEYWORD_usingnamespace <- 'usingnamespace' end_of_word +KEYWORD_var <- 'var' end_of_word +KEYWORD_volatile <- 'volatile' end_of_word +KEYWORD_while <- 'while' end_of_word + +keyword <- KEYWORD_addrspace / KEYWORD_align / KEYWORD_allowzero / KEYWORD_and + / KEYWORD_anyframe / KEYWORD_anytype / KEYWORD_asm / KEYWORD_async + / KEYWORD_await / KEYWORD_break / KEYWORD_callconv / KEYWORD_catch + / KEYWORD_comptime / KEYWORD_const / KEYWORD_continue / KEYWORD_defer + / KEYWORD_else / KEYWORD_enum / KEYWORD_errdefer / KEYWORD_error / KEYWORD_export + / KEYWORD_extern / KEYWORD_fn / KEYWORD_for / KEYWORD_if + / KEYWORD_inline / KEYWORD_noalias / KEYWORD_nosuspend / KEYWORD_noinline + / KEYWORD_opaque / KEYWORD_or / KEYWORD_orelse / KEYWORD_packed + / KEYWORD_pub / KEYWORD_resume / KEYWORD_return / KEYWORD_linksection + / KEYWORD_struct / KEYWORD_suspend / KEYWORD_switch / KEYWORD_test + / KEYWORD_threadlocal / KEYWORD_try / KEYWORD_union / KEYWORD_unreachable + / KEYWORD_usingnamespace / KEYWORD_var / KEYWORD_volatile / KEYWORD_while diff --git a/internal/zig/zig.go b/internal/zig/zig.go index fef1d1b..e27e33e 100644 --- a/internal/zig/zig.go +++ b/internal/zig/zig.go @@ -6,32 +6,30 @@ import ( ) type formatter struct { - w io.Writer + io.Writer } func (f *formatter) WriteString(s string) (n int, err error) { - return f.w.Write([]byte(s)) + return f.Write([]byte(s)) } func (f *formatter) Writef(format string, a ...any) (err error) { - _, err = f.w.Write(fmt.Appendf(nil, format, a...)) + _, err = fmt.Fprintf(f, format, a...) return err } func Write(w io.Writer, root *Root) error { - f := &formatter{ - w: w, - } + f := &formatter{Writer: w} if root.ContainerDocComment != "" { - err := f.Writef("//! %s\n\n", root.ContainerDocComment) - if err != nil { + if err := f.Writef("//! %s\n\n", root.ContainerDocComment); err != nil { return err } } for _, member := range root.ContainerMembers { - for _, decl := range member.Decls { - if err := writeDecl(f, decl); err != nil { + // Only handle Decl for now (fields not needed for hello world) + if member.Decl != nil { + if err := writeDecl(f, member.Decl); err != nil { return err } } @@ -39,68 +37,90 @@ func Write(w io.Writer, root *Root) error { return nil } -func writeDecl(f *formatter, decl Decl) (err error) { - switch typ := decl.(type) { +func writeDecl(f *formatter, decl Decl) error { + switch fn := decl.(type) { case *FnDecl: - if err = f.Writef("fn %s(", typ.Name); err != nil { + if err := f.Writef("fn %s(", fn.Name); err != nil { return err } - if err = writeParams(f, typ.Params); err != nil { + if err := writeParams(f, fn.Params); err != nil { return err } - if err = f.Writef(") %s", typ.ReturnType); err != nil { + if _, err := f.WriteString(") "); err != nil { return err } - if err = writeBlock(f, typ.Body); err != nil { + if err := writeTypeExpr(f, fn.ReturnType); err != nil { + return err + } + if err := writeBlock(f, fn.Body); err != nil { return err } } return nil } -func writeParams(f *formatter, params []*ParamDecl) (err error) { - for _, param := range params { - if param.Name != "" { - if err = f.Writef("%s: ", param.Name); err != nil { +func writeParams(f *formatter, params []*ParamDecl) error { + for i, param := range params { + if i > 0 { + if _, err := f.WriteString(", "); err != nil { return err } } - if err = f.Writef("%s", param.Type); err != nil { + if param.Name != "" { + if _, err := f.WriteString(param.Name); err != nil { + return err + } + if _, err := f.WriteString(": "); err != nil { + return err + } + } + if err := writeTypeExpr(f, param.Type); err != nil { return err } } return nil } -func writeBlock(f *formatter, block *Block) (err error) { - if block == nil { - if _, err = f.WriteString(";"); err != nil { - return err - } +func writeTypeExpr(f *formatter, typ TypeExpr) error { + switch t := typ.(type) { + case *Identifier: + _, err := f.WriteString(t.Name) + return err + case nil: return nil + default: + // fallback: print as string + _, err := f.WriteString(fmt.Sprintf("%v", t)) + return err } - if err = f.Writef(" {\n"); err != nil { +} + +func writeBlock(f *formatter, block *Block) error { + if block == nil { + _, err := f.WriteString(";") + return err + } + if _, err := f.WriteString(" {\n"); err != nil { return err } for _, stmt := range block.Stmts { - if err = writeStmt(f, stmt); err != nil { + if err := writeStmt(f, stmt); err != nil { return err } - // Should this be the job of the formatter? - if _, err = f.WriteString("\n"); err != nil { + if _, err := f.WriteString("\n"); err != nil { return err } } - if err = f.Writef("}\n"); err != nil { + if _, err := f.WriteString("}\n"); err != nil { return err } return nil } -func writeStmt(f *formatter, stmt Stmt) (err error) { +func writeStmt(f *formatter, stmt Stmt) error { switch stmt.(type) { case *ReturnStmt: - if _, err = f.WriteString("return;"); err != nil { + if _, err := f.WriteString("return;"); err != nil { return err } } diff --git a/internal/zig/zig_test.go b/internal/zig/zig_test.go index fa2943b..83d8875 100644 --- a/internal/zig/zig_test.go +++ b/internal/zig/zig_test.go @@ -28,14 +28,12 @@ return; ContainerDocComment: "Hello, world!", ContainerMembers: []*zig.ContainerMember{ { - Decls: []zig.Decl{ - &zig.FnDecl{ - Name: "main", - ReturnType: "void", - Body: &zig.Block{ - Stmts: []zig.Stmt{ - &zig.ReturnStmt{}, - }, + Decl: &zig.FnDecl{ + Name: "main", + ReturnType: &zig.Identifier{Name: "void"}, + Body: &zig.Block{ + Stmts: []zig.Stmt{ + &zig.ReturnStmt{}, }, }, },