Compare commits

...

4 Commits

6 changed files with 1249 additions and 100 deletions

View File

@ -1,2 +1,5 @@
run:
go run internal/main.go -o hello.zig programs/hello.go && zig run hello.zig
test:
go test ./internal/zig

1
internal/zig/README.md Normal file
View File

@ -0,0 +1 @@
A Zig code generator based on the [Zig language specification](https://github.com/ziglang/zig-spec) PEG grammar. The grammar is included in the source so that it can easily be diffed for changes in the future.

View File

@ -1,50 +1,500 @@
// Abstract Syntax Tree (AST) definitions for Zig, closely following the grammar in zig-grammar.peg
package zig
// https://github.com/ziglang/zig-spec/blob/master/grammar/grammar.peg
// Root is the top-level node of a Zig source file.
type Root struct {
ContainerDocComment string // //! Doc Comment
ContainerDocComment DocComment // //! Doc Comment (optional)
ContainerMembers []*ContainerMember
}
type ContainerMember struct {
// FIXME
Decls []Decl
DocComment DocComment // /// Doc Comment (optional)
Comptime bool // 'comptime' field (optional)
Field *ContainerField
Decl Decl // Can be nil if this is a field
}
type Decl interface{}
// ContainerField represents a field in a struct/enum/union container.
type ContainerField struct {
Name string // May be empty for anonymous fields
Type TypeExpr
ByteAlign *Expr // Optional
Value Expr // Optional initializer
}
// Decl is any top-level declaration (function, variable, usingnamespace, etc).
type Decl interface {
isDecl()
}
// FnDecl represents a function declaration.
type FnDecl struct {
Name string
Export bool
Extern bool
ExternName string // Optional string for extern
Inline bool
NoInline bool
ThreadLocal bool
Name string // May be empty (anonymous)
Params []*ParamDecl
CallConv string
ByteAlign *Expr
AddrSpace *Expr
LinkSection *Expr
CallConv *Expr
ReturnType TypeExpr
Body *Block // nil means semicolon
}
type ParamDecl struct {
DocComment string // ??? It's what it says
Name string // Can be empty
Type TypeExpr // anytype when empty
func (*FnDecl) isDecl() {}
// GlobalVarDecl represents a global variable declaration.
type GlobalVarDecl struct {
Export bool
Extern bool
ExternName string // Optional string for extern
ThreadLocal bool
Const bool
Name string
Type TypeExpr // Optional
ByteAlign *Expr
AddrSpace *Expr
LinkSection *Expr
Value Expr // Optional initializer
}
func (*GlobalVarDecl) isDecl() {}
// UsingNamespaceDecl represents a 'usingnamespace' declaration.
type UsingNamespaceDecl struct {
Expr Expr
}
func (*UsingNamespaceDecl) isDecl() {}
// ComptimeDecl represents a 'comptime' block at the container level.
type ComptimeDecl struct {
Block *Block
}
func (*ComptimeDecl) isDecl() {}
// TestDecl represents a 'test' declaration.
type TestDecl struct {
Name string // Optional
Block *Block
}
func (*TestDecl) isDecl() {}
// ParamDecl represents a function parameter.
type ParamDecl struct {
DocComment DocComment // /// Doc Comment (optional)
NoAlias bool
Comptime bool
Name string // May be empty
Type TypeExpr // 'anytype' if empty
}
// Block represents a block of statements.
type Block struct {
Label string
Label string // Optional
Stmts []Stmt
}
type Stmt interface{}
// Stmt is any statement.
type Stmt interface {
isStmt()
}
type ReturnStmt struct{}
// ExprStmt represents an expression statement (e.g. a function call as a statement).
type ExprStmt struct {
Expr Expr
}
func (*ExprStmt) isStmt() {}
// VarDeclStmt represents a variable or const declaration at statement level, supporting destructuring and multi-var declarations.
type VarDeclStmt struct {
Const bool
Pattern VarPattern // Destructuring or multiple variable names
Type TypeExpr // Optional
Value Expr // Optional initializer
ByteAlign Expr // Optional
AddrSpace Expr // Optional
LinkSection Expr // Optional
}
func (*VarDeclStmt) isStmt() {}
// ReturnStmt represents a 'return' statement.
type ReturnStmt struct {
Value Expr // Optional
}
func (*ReturnStmt) isStmt() {}
// IfStmt represents an if statement (with optional else branch and payload).
type IfStmt struct {
Cond Expr // Condition expression
Then Stmt // Then branch
Else Stmt // Optional else branch
Payload *Payload // Optional payload (for |x|, |*x|, |*x, y|, etc.)
}
func (*IfStmt) isStmt() {}
// IfExpr represents an if expression (with optional else branch and payload).
type IfExpr struct {
Cond Expr // Condition expression
Then Expr // Then branch
Else Expr // Optional else branch
Payload *Payload // Optional payload (for |x|, |*x|, |*x, y|, etc.)
}
// DeferStmt represents a 'defer' or 'errdefer' statement.
type DeferStmt struct {
ErrDefer bool // True for 'errdefer', false for 'defer'
Payload *Payload // Optional payload (for |x|, |*x|, etc.)
Stmt Stmt // Statement to defer
}
func (*DeferStmt) isStmt() {}
// SuspendStmt represents a 'suspend' or 'nosuspend' statement.
type SuspendStmt struct {
NoSuspend bool
Stmt Stmt
}
func (*SuspendStmt) isStmt() {}
// BlockStmt allows a block to be used as a statement.
type BlockStmt struct {
Block *Block
}
func (*BlockStmt) isStmt() {}
// BreakStmt represents a 'break' statement.
type BreakStmt struct {
Label string // Optional
Value Expr // Optional
}
func (*BreakStmt) isStmt() {}
// ContinueStmt represents a 'continue' statement.
type ContinueStmt struct {
Label string // Optional
}
func (*ContinueStmt) isStmt() {}
// LoopStmt represents a for/while loop statement.
type LoopStmt struct {
Inline bool // True if 'inline' is present
Kind string // "for" or "while"
Prefix LoopPrefix // ForPrefix or WhilePrefix
Body Stmt // Loop body
Else Stmt // Optional else branch
Payload *Payload // Optional payload (for |x|, |*x|, |*x, y|, etc.)
}
func (*LoopStmt) isStmt() {}
// LoopPrefix is the prefix of a for/while loop.
type LoopPrefix interface{}
// ForPrefix represents the prefix of a for loop.
type ForPrefix struct {
Args []ForArg // For loop arguments
Payload *Payload // Payload (|*x, y|, etc.)
}
// ForArg represents an argument in a for loop.
type ForArg struct {
Expr Expr // Argument expression
From Expr // Optional (for .. or ..<)
}
// WhilePrefix represents the prefix of a while loop.
type WhilePrefix struct {
Cond Expr // Condition expression
Payload *Payload // Optional payload (for |x|, |*x|, etc.)
Continue Expr // Optional (while continue expression)
}
// SwitchStmt represents a switch statement.
type SwitchStmt struct {
Cond Expr
Prongs []*SwitchProng
}
func (*SwitchStmt) isStmt() {}
// SwitchExpr represents a switch expression.
type SwitchExpr struct {
Cond Expr
Prongs []*SwitchProng
}
// SwitchProng represents a prong in a switch.
type SwitchProng struct {
Inline bool // True if 'inline' is present
Cases []*SwitchCase // List of cases for this prong
Payload *Payload // Optional payload (for |*x, y|, etc.)
Expr Expr // Result expression for this prong
}
// SwitchCase represents a case in a switch.
type SwitchCase struct {
Expr Expr
To Expr // Optional (for ..)
IsElse bool
}
// AsmExpr represents an inline assembly expression.
type AsmExpr struct {
Volatile bool
Template Expr
Outputs []*AsmOutputItem
Inputs []*AsmInputItem
Clobbers []string
}
// AsmOutputItem represents an output operand in asm.
type AsmOutputItem struct {
Name string
Constraint string
Type TypeExpr
}
// AsmInputItem represents an input operand in asm.
type AsmInputItem struct {
Name string
Constraint string
Expr Expr
}
// ContainerDecl represents a struct, enum, union, or opaque declaration.
type ContainerDecl struct {
Extern bool
Packed bool
Kind string // "struct", "enum", "union", "opaque"
TagType TypeExpr // Optional (for enum/union)
Fields []*ContainerMember
DocComment DocComment
}
// ErrorSetDecl represents an error set declaration.
type ErrorSetDecl struct {
Names []string
}
// InitListExpr represents an initializer list.
// Exactly one of Fields, Values, or Empty must be set (non-nil/non-empty or true).
type InitListExpr struct {
Fields []*FieldInit // Field initializers (for {.foo = 1, .bar = 2}), mutually exclusive with Values/Empty
Values []Expr // Positional initializers (for {1, 2, 3}), mutually exclusive with Fields/Empty
Empty bool // True if '{}', mutually exclusive with Fields/Values
}
// FieldInit represents a field initializer in an init list.
type FieldInit struct {
Name string
Value Expr
}
// Identifier represents an identifier expression (variable, field, etc).
type Identifier struct {
Name string // The identifier name
}
// Literal represents a literal value (int, float, string, char).
type Literal struct {
Kind string // "int", "float", "string", "char"
Value string // The literal value as a string
}
// BinaryExpr represents a binary operation (e.g. +, -, *, /, etc).
type BinaryExpr struct {
Op string // Operator, e.g. "+", "-", "*", etc.
Left Expr // Left operand
Right Expr // Right operand
}
// UnaryExpr represents a unary operation (e.g. !, -, ~, etc).
type UnaryExpr struct {
Op string // Operator, e.g. "-", "!", "~"
Expr Expr // Operand
}
// GroupedExpr represents a parenthesized expression.
type GroupedExpr struct {
Expr Expr // The grouped expression
}
// CallExpr represents a function call.
type CallExpr struct {
Fun Expr // Function being called
Args []Expr // Arguments to the function
}
// FieldAccessExpr represents a field/member access as a suffix operation (e.g. foo.bar).
//
// Note: in order to call a function on an object, use a CallExpr with a Fun of a FieldAccessExpr.
// See TestHelloWorld for example.
type FieldAccessExpr struct {
Receiver Expr // The object being accessed
Field string // The field name
}
// IndexExpr represents an indexing operation as a suffix (e.g. arr[0]).
type IndexExpr struct {
Receiver Expr // The object being indexed
Index Expr // The index expression
}
// ResumeExpr represents a 'resume' expression.
type ResumeExpr struct {
Expr Expr // The expression to resume
}
// ComptimeExpr represents a 'comptime' expression.
type ComptimeExpr struct {
Expr Expr // The expression to evaluate at comptime
}
// NosuspendExpr represents a 'nosuspend' expression.
type NosuspendExpr struct {
Expr Expr // The expression to evaluate with nosuspend
}
// ContinueExpr represents a 'continue' expression.
type ContinueExpr struct {
Label string // Optional label
}
// Expr is any expression.
type Expr interface{}
// This will need to become a real type expr someday
type TypeExpr string
// TypeExpr is any type expression.
type TypeExpr interface{}
func (t TypeExpr) String() string {
if string(t) == "" {
return "anytype"
// DocComment represents a doc comment (/// or //! lines).
// Newlines in the string automatically add more comments in the output.
type DocComment string
// Payload represents a control flow payload (|x|, |*x|, |*x, y|, etc).
// Each entry in Names corresponds to a variable name; the same index in Pointers is true if that name is a pointer (|*x|).
type Payload struct {
Names []string // Names in the payload, in order
Pointers []bool // True if the corresponding name is a pointer (|*x|, |*x, y|, |*x, *y|, etc.)
}
return string(t)
// LabeledBlock represents a labeled block or loop (label: {...}).
type LabeledBlock struct {
Label string // The label name
Block *Block // The labeled block
}
// LabeledTypeExpr represents a labeled type block (label: type).
type LabeledTypeExpr struct {
Label string // The label name
Type TypeExpr // The labeled type
}
// IfTypeExpr represents an if expression at the type level.
type IfTypeExpr struct {
Cond Expr
Then TypeExpr
Else TypeExpr // Optional
Payload *Payload // Optional
}
// ForTypeExpr represents a for expression at the type level.
type ForTypeExpr struct {
Prefix *ForPrefix
Body TypeExpr
Else TypeExpr // Optional
Payload *Payload // Optional
}
// WhileTypeExpr represents a while expression at the type level.
type WhileTypeExpr struct {
Prefix *WhilePrefix
Body TypeExpr
Else TypeExpr // Optional
Payload *Payload // Optional
}
// DotAsteriskExpr represents a .*
type DotAsteriskExpr struct {
Receiver Expr // The expression being dereferenced
}
// DotQuestionExpr represents a .?
type DotQuestionExpr struct {
Receiver Expr // The expression being checked for optional
}
// AsyncExpr represents an 'async' expression.
type AsyncExpr struct {
Expr Expr // The expression to be awaited asynchronously
}
// TryExpr represents a 'try' expression.
type TryExpr struct {
Expr Expr // The expression to try
}
// AwaitExpr represents an 'await' expression.
type AwaitExpr struct {
Expr Expr // The expression to await
}
// UnreachableExpr represents the 'unreachable' keyword.
type UnreachableExpr struct{}
// EmptyInitListExpr represents an empty initializer list '{}'.
type EmptyInitListExpr struct{}
// PositionalInitListExpr represents a positional initializer list '{expr, ...}'.
type PositionalInitListExpr struct {
Values []Expr // Expressions in order
}
// FieldInitListExpr represents a field initializer list '{.field = expr, ...}'.
type FieldInitListExpr struct {
Fields []*FieldInit // Field initializers
}
// SwitchProngPayload represents a switch prong payload (|*x, y|).
type SwitchProngPayload struct {
Pointer bool
Names []string
}
// SwitchProngCase represents a single case in a switch prong.
type SwitchProngCase struct {
Expr Expr // The case expression
To Expr // Optional, for ranges
}
// SwitchProngFull represents a full switch prong with cases and payload.
type SwitchProngFull struct {
Inline bool
Cases []*SwitchProngCase // One or more cases
Payload *SwitchProngPayload // Optional
Expr Expr // The result expression
}
// SwitchElseProng represents an 'else' prong in a switch.
type SwitchElseProng struct {
Expr Expr // The result expression
}
// VarPattern represents a variable pattern for destructuring or multiple variable declarations.
type VarPattern struct {
Names []string // Variable names (single or multiple for destructuring)
}

View File

@ -0,0 +1,582 @@
Root <- skip container_doc_comment? ContainerMembers eof
# *** Top level ***
ContainerMembers <- ContainerDeclarations (ContainerField COMMA)* (ContainerField / ContainerDeclarations)
ContainerDeclarations <- (TestDecl / ComptimeDecl / doc_comment? KEYWORD_pub? Decl)*
TestDecl <- KEYWORD_test (STRINGLITERALSINGLE / IDENTIFIER)? Block
ComptimeDecl <- KEYWORD_comptime Block
Decl
<- (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE? / KEYWORD_inline / KEYWORD_noinline)? FnProto (SEMICOLON / Block)
/ (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE?)? KEYWORD_threadlocal? GlobalVarDecl
/ KEYWORD_usingnamespace Expr SEMICOLON
FnProto <- KEYWORD_fn IDENTIFIER? LPAREN ParamDeclList RPAREN ByteAlign? AddrSpace? LinkSection? CallConv? EXCLAMATIONMARK? TypeExpr
VarDeclProto <- (KEYWORD_const / KEYWORD_var) IDENTIFIER (COLON TypeExpr)? ByteAlign? AddrSpace? LinkSection?
GlobalVarDecl <- VarDeclProto (EQUAL Expr)? SEMICOLON
ContainerField <- doc_comment? KEYWORD_comptime? !KEYWORD_fn (IDENTIFIER COLON)? TypeExpr ByteAlign? (EQUAL Expr)?
# *** Block Level ***
Statement
<- KEYWORD_comptime ComptimeStatement
/ KEYWORD_nosuspend BlockExprStatement
/ KEYWORD_suspend BlockExprStatement
/ KEYWORD_defer BlockExprStatement
/ KEYWORD_errdefer Payload? BlockExprStatement
/ IfStatement
/ LabeledStatement
/ SwitchExpr
/ VarDeclExprStatement
ComptimeStatement
<- BlockExpr
/ VarDeclExprStatement
IfStatement
<- IfPrefix BlockExpr ( KEYWORD_else Payload? Statement )?
/ IfPrefix AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement )
LabeledStatement <- BlockLabel? (Block / LoopStatement)
LoopStatement <- KEYWORD_inline? (ForStatement / WhileStatement)
ForStatement
<- ForPrefix BlockExpr ( KEYWORD_else Statement )?
/ ForPrefix AssignExpr ( SEMICOLON / KEYWORD_else Statement )
WhileStatement
<- WhilePrefix BlockExpr ( KEYWORD_else Payload? Statement )?
/ WhilePrefix AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement )
BlockExprStatement
<- BlockExpr
/ AssignExpr SEMICOLON
BlockExpr <- BlockLabel? Block
# An expression, assignment, or any destructure, as a statement.
VarDeclExprStatement
<- VarDeclProto (COMMA (VarDeclProto / Expr))* EQUAL Expr SEMICOLON
/ Expr (AssignOp Expr / (COMMA (VarDeclProto / Expr))+ EQUAL Expr)? SEMICOLON
# *** Expression Level ***
# An assignment or a destructure whose LHS are all lvalue expressions.
AssignExpr <- Expr (AssignOp Expr / (COMMA Expr)+ EQUAL Expr)?
SingleAssignExpr <- Expr (AssignOp Expr)?
Expr <- BoolOrExpr
BoolOrExpr <- BoolAndExpr (KEYWORD_or BoolAndExpr)*
BoolAndExpr <- CompareExpr (KEYWORD_and CompareExpr)*
CompareExpr <- BitwiseExpr (CompareOp BitwiseExpr)?
BitwiseExpr <- BitShiftExpr (BitwiseOp BitShiftExpr)*
BitShiftExpr <- AdditionExpr (BitShiftOp AdditionExpr)*
AdditionExpr <- MultiplyExpr (AdditionOp MultiplyExpr)*
MultiplyExpr <- PrefixExpr (MultiplyOp PrefixExpr)*
PrefixExpr <- PrefixOp* PrimaryExpr
PrimaryExpr
<- AsmExpr
/ IfExpr
/ KEYWORD_break BreakLabel? Expr?
/ KEYWORD_comptime Expr
/ KEYWORD_nosuspend Expr
/ KEYWORD_continue BreakLabel?
/ KEYWORD_resume Expr
/ KEYWORD_return Expr?
/ BlockLabel? LoopExpr
/ Block
/ CurlySuffixExpr
IfExpr <- IfPrefix Expr (KEYWORD_else Payload? Expr)?
Block <- LBRACE Statement* RBRACE
LoopExpr <- KEYWORD_inline? (ForExpr / WhileExpr)
ForExpr <- ForPrefix Expr (KEYWORD_else Expr)?
WhileExpr <- WhilePrefix Expr (KEYWORD_else Payload? Expr)?
CurlySuffixExpr <- TypeExpr InitList?
InitList
<- LBRACE FieldInit (COMMA FieldInit)* COMMA? RBRACE
/ LBRACE Expr (COMMA Expr)* COMMA? RBRACE
/ LBRACE RBRACE
TypeExpr <- PrefixTypeOp* ErrorUnionExpr
ErrorUnionExpr <- SuffixExpr (EXCLAMATIONMARK TypeExpr)?
SuffixExpr
<- KEYWORD_async PrimaryTypeExpr SuffixOp* FnCallArguments
/ PrimaryTypeExpr (SuffixOp / FnCallArguments)*
PrimaryTypeExpr
<- BUILTINIDENTIFIER FnCallArguments
/ CHAR_LITERAL
/ ContainerDecl
/ DOT IDENTIFIER
/ DOT InitList
/ ErrorSetDecl
/ FLOAT
/ FnProto
/ GroupedExpr
/ LabeledTypeExpr
/ IDENTIFIER
/ IfTypeExpr
/ INTEGER
/ KEYWORD_comptime TypeExpr
/ KEYWORD_error DOT IDENTIFIER
/ KEYWORD_anyframe
/ KEYWORD_unreachable
/ STRINGLITERAL
/ SwitchExpr
ContainerDecl <- (KEYWORD_extern / KEYWORD_packed)? ContainerDeclAuto
ErrorSetDecl <- KEYWORD_error LBRACE IdentifierList RBRACE
GroupedExpr <- LPAREN Expr RPAREN
IfTypeExpr <- IfPrefix TypeExpr (KEYWORD_else Payload? TypeExpr)?
LabeledTypeExpr
<- BlockLabel Block
/ BlockLabel? LoopTypeExpr
LoopTypeExpr <- KEYWORD_inline? (ForTypeExpr / WhileTypeExpr)
ForTypeExpr <- ForPrefix TypeExpr (KEYWORD_else TypeExpr)?
WhileTypeExpr <- WhilePrefix TypeExpr (KEYWORD_else Payload? TypeExpr)?
SwitchExpr <- KEYWORD_switch LPAREN Expr RPAREN LBRACE SwitchProngList RBRACE
# *** Assembly ***
AsmExpr <- KEYWORD_asm KEYWORD_volatile? LPAREN Expr AsmOutput? RPAREN
AsmOutput <- COLON AsmOutputList AsmInput?
AsmOutputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERAL LPAREN (MINUSRARROW TypeExpr / IDENTIFIER) RPAREN
AsmInput <- COLON AsmInputList AsmClobbers?
AsmInputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERAL LPAREN Expr RPAREN
AsmClobbers <- COLON StringList
# *** Helper grammar ***
BreakLabel <- COLON IDENTIFIER
BlockLabel <- IDENTIFIER COLON
FieldInit <- DOT IDENTIFIER EQUAL Expr
WhileContinueExpr <- COLON LPAREN AssignExpr RPAREN
LinkSection <- KEYWORD_linksection LPAREN Expr RPAREN
AddrSpace <- KEYWORD_addrspace LPAREN Expr RPAREN
# Fn specific
CallConv <- KEYWORD_callconv LPAREN Expr RPAREN
ParamDecl
<- doc_comment? (KEYWORD_noalias / KEYWORD_comptime)? (IDENTIFIER COLON)? ParamType
/ DOT3
ParamType
<- KEYWORD_anytype
/ TypeExpr
# Control flow prefixes
IfPrefix <- KEYWORD_if LPAREN Expr RPAREN PtrPayload?
WhilePrefix <- KEYWORD_while LPAREN Expr RPAREN PtrPayload? WhileContinueExpr?
ForPrefix <- KEYWORD_for LPAREN ForArgumentsList RPAREN PtrListPayload
# Payloads
Payload <- PIPE IDENTIFIER PIPE
PtrPayload <- PIPE ASTERISK? IDENTIFIER PIPE
PtrIndexPayload <- PIPE ASTERISK? IDENTIFIER (COMMA IDENTIFIER)? PIPE
PtrListPayload <- PIPE ASTERISK? IDENTIFIER (COMMA ASTERISK? IDENTIFIER)* COMMA? PIPE
# Switch specific
SwitchProng <- KEYWORD_inline? SwitchCase EQUALRARROW PtrIndexPayload? SingleAssignExpr
SwitchCase
<- SwitchItem (COMMA SwitchItem)* COMMA?
/ KEYWORD_else
SwitchItem <- Expr (DOT3 Expr)?
# For specific
ForArgumentsList <- ForItem (COMMA ForItem)* COMMA?
ForItem <- Expr (DOT2 Expr?)?
# Operators
AssignOp
<- ASTERISKEQUAL
/ ASTERISKPIPEEQUAL
/ SLASHEQUAL
/ PERCENTEQUAL
/ PLUSEQUAL
/ PLUSPIPEEQUAL
/ MINUSEQUAL
/ MINUSPIPEEQUAL
/ LARROW2EQUAL
/ LARROW2PIPEEQUAL
/ RARROW2EQUAL
/ AMPERSANDEQUAL
/ CARETEQUAL
/ PIPEEQUAL
/ ASTERISKPERCENTEQUAL
/ PLUSPERCENTEQUAL
/ MINUSPERCENTEQUAL
/ EQUAL
CompareOp
<- EQUALEQUAL
/ EXCLAMATIONMARKEQUAL
/ LARROW
/ RARROW
/ LARROWEQUAL
/ RARROWEQUAL
BitwiseOp
<- AMPERSAND
/ CARET
/ PIPE
/ KEYWORD_orelse
/ KEYWORD_catch Payload?
BitShiftOp
<- LARROW2
/ RARROW2
/ LARROW2PIPE
AdditionOp
<- PLUS
/ MINUS
/ PLUS2
/ PLUSPERCENT
/ MINUSPERCENT
/ PLUSPIPE
/ MINUSPIPE
MultiplyOp
<- PIPE2
/ ASTERISK
/ SLASH
/ PERCENT
/ ASTERISK2
/ ASTERISKPERCENT
/ ASTERISKPIPE
PrefixOp
<- EXCLAMATIONMARK
/ MINUS
/ TILDE
/ MINUSPERCENT
/ AMPERSAND
/ KEYWORD_try
/ KEYWORD_await
PrefixTypeOp
<- QUESTIONMARK
/ KEYWORD_anyframe MINUSRARROW
/ SliceTypeStart (ByteAlign / AddrSpace / KEYWORD_const / KEYWORD_volatile / KEYWORD_allowzero)*
/ PtrTypeStart (AddrSpace / KEYWORD_align LPAREN Expr (COLON Expr COLON Expr)? RPAREN / KEYWORD_const / KEYWORD_volatile / KEYWORD_allowzero)*
/ ArrayTypeStart
SuffixOp
<- LBRACKET Expr (DOT2 (Expr? (COLON Expr)?)?)? RBRACKET
/ DOT IDENTIFIER
/ DOTASTERISK
/ DOTQUESTIONMARK
FnCallArguments <- LPAREN ExprList RPAREN
# Ptr specific
SliceTypeStart <- LBRACKET (COLON Expr)? RBRACKET
PtrTypeStart
<- ASTERISK
/ ASTERISK2
/ LBRACKET ASTERISK (LETTERC / COLON Expr)? RBRACKET
ArrayTypeStart <- LBRACKET Expr (COLON Expr)? RBRACKET
# ContainerDecl specific
ContainerDeclAuto <- ContainerDeclType LBRACE container_doc_comment? ContainerMembers RBRACE
ContainerDeclType
<- KEYWORD_struct (LPAREN Expr RPAREN)?
/ KEYWORD_opaque
/ KEYWORD_enum (LPAREN Expr RPAREN)?
/ KEYWORD_union (LPAREN (KEYWORD_enum (LPAREN Expr RPAREN)? / Expr) RPAREN)?
# Alignment
ByteAlign <- KEYWORD_align LPAREN Expr RPAREN
# Lists
IdentifierList <- (doc_comment? IDENTIFIER COMMA)* (doc_comment? IDENTIFIER)?
SwitchProngList <- (SwitchProng COMMA)* SwitchProng?
AsmOutputList <- (AsmOutputItem COMMA)* AsmOutputItem?
AsmInputList <- (AsmInputItem COMMA)* AsmInputItem?
StringList <- (STRINGLITERAL COMMA)* STRINGLITERAL?
ParamDeclList <- (ParamDecl COMMA)* ParamDecl?
ExprList <- (Expr COMMA)* Expr?
# *** Tokens ***
eof <- !.
bin <- [01]
bin_ <- '_'? bin
oct <- [0-7]
oct_ <- '_'? oct
hex <- [0-9a-fA-F]
hex_ <- '_'? hex
dec <- [0-9]
dec_ <- '_'? dec
bin_int <- bin bin_*
oct_int <- oct oct_*
dec_int <- dec dec_*
hex_int <- hex hex_*
ox80_oxBF <- [\200-\277]
oxF4 <- '\364'
ox80_ox8F <- [\200-\217]
oxF1_oxF3 <- [\361-\363]
oxF0 <- '\360'
ox90_0xBF <- [\220-\277]
oxEE_oxEF <- [\356-\357]
oxED <- '\355'
ox80_ox9F <- [\200-\237]
oxE1_oxEC <- [\341-\354]
oxE0 <- '\340'
oxA0_oxBF <- [\240-\277]
oxC2_oxDF <- [\302-\337]
# From https://lemire.me/blog/2018/05/09/how-quickly-can-you-check-that-a-string-is-valid-unicode-utf-8/
# First Byte Second Byte Third Byte Fourth Byte
# [0x00,0x7F]
# [0xC2,0xDF] [0x80,0xBF]
# 0xE0 [0xA0,0xBF] [0x80,0xBF]
# [0xE1,0xEC] [0x80,0xBF] [0x80,0xBF]
# 0xED [0x80,0x9F] [0x80,0xBF]
# [0xEE,0xEF] [0x80,0xBF] [0x80,0xBF]
# 0xF0 [0x90,0xBF] [0x80,0xBF] [0x80,0xBF]
# [0xF1,0xF3] [0x80,0xBF] [0x80,0xBF] [0x80,0xBF]
# 0xF4 [0x80,0x8F] [0x80,0xBF] [0x80,0xBF]
mb_utf8_literal <-
oxF4 ox80_ox8F ox80_oxBF ox80_oxBF
/ oxF1_oxF3 ox80_oxBF ox80_oxBF ox80_oxBF
/ oxF0 ox90_0xBF ox80_oxBF ox80_oxBF
/ oxEE_oxEF ox80_oxBF ox80_oxBF
/ oxED ox80_ox9F ox80_oxBF
/ oxE1_oxEC ox80_oxBF ox80_oxBF
/ oxE0 oxA0_oxBF ox80_oxBF
/ oxC2_oxDF ox80_oxBF
# NOTE: `\135` is `]`. We separate to avoid this: [\000-\011\013-\046\050-\133]-\177]
# ^ ^XXXXXX
ascii_char_not_nl_slash_squote <- [\000-\011\013-\046\050-\133\136-\177] / ']'
char_escape
<- "\\x" hex hex
/ "\\u{" hex+ "}"
/ "\\" [nr\\t'"]
char_char
<- mb_utf8_literal
/ char_escape
/ ascii_char_not_nl_slash_squote
string_char
<- char_escape
/ [^\\"\n]
container_doc_comment <- ('//!' [^\n]* [ \n]* skip)+
doc_comment <- ('///' [^\n]* [ \n]* skip)+
line_comment <- '//' ![!/][^\n]* / '////' [^\n]*
line_string <- ('\\\\' [^\n]* [ \n]*)+
skip <- ([ \n] / line_comment)*
CHAR_LITERAL <- ['] char_char ['] skip
FLOAT
<- '0x' hex_int '.' hex_int ([pP] [-+]? dec_int)? skip
/ dec_int '.' dec_int ([eE] [-+]? dec_int)? skip
/ '0x' hex_int [pP] [-+]? dec_int skip
/ dec_int [eE] [-+]? dec_int skip
INTEGER
<- '0b' bin_int skip
/ '0o' oct_int skip
/ '0x' hex_int skip
/ dec_int skip
STRINGLITERALSINGLE <- ["] string_char* ["] skip
STRINGLITERAL
<- STRINGLITERALSINGLE
/ (line_string skip)+
IDENTIFIER
<- !keyword [A-Za-z_] [A-Za-z0-9_]* skip
/ '@' STRINGLITERALSINGLE
BUILTINIDENTIFIER <- '@'[A-Za-z_][A-Za-z0-9_]* skip
AMPERSAND <- '&' ![=] skip
AMPERSANDEQUAL <- '&=' skip
ASTERISK <- '*' ![*%=|] skip
ASTERISK2 <- '**' skip
ASTERISKEQUAL <- '*=' skip
ASTERISKPERCENT <- '*%' ![=] skip
ASTERISKPERCENTEQUAL <- '*%=' skip
ASTERISKPIPE <- '*|' ![=] skip
ASTERISKPIPEEQUAL <- '*|=' skip
CARET <- '^' ![=] skip
CARETEQUAL <- '^=' skip
COLON <- ':' skip
COMMA <- ',' skip
DOT <- '.' ![*.?] skip
DOT2 <- '..' ![.] skip
DOT3 <- '...' skip
DOTASTERISK <- '.*' skip
DOTQUESTIONMARK <- '.?' skip
EQUAL <- '=' ![>=] skip
EQUALEQUAL <- '==' skip
EQUALRARROW <- '=>' skip
EXCLAMATIONMARK <- '!' ![=] skip
EXCLAMATIONMARKEQUAL <- '!=' skip
LARROW <- '<' ![<=] skip
LARROW2 <- '<<' ![=|] skip
LARROW2EQUAL <- '<<=' skip
LARROW2PIPE <- '<<|' ![=] skip
LARROW2PIPEEQUAL <- '<<|=' skip
LARROWEQUAL <- '<=' skip
LBRACE <- '{' skip
LBRACKET <- '[' skip
LPAREN <- '(' skip
MINUS <- '-' ![%=>|] skip
MINUSEQUAL <- '-=' skip
MINUSPERCENT <- '-%' ![=] skip
MINUSPERCENTEQUAL <- '-%=' skip
MINUSPIPE <- '-|' ![=] skip
MINUSPIPEEQUAL <- '-|=' skip
MINUSRARROW <- '->' skip
PERCENT <- '%' ![=] skip
PERCENTEQUAL <- '%=' skip
PIPE <- '|' ![|=] skip
PIPE2 <- '||' skip
PIPEEQUAL <- '|=' skip
PLUS <- '+' ![%+=|] skip
PLUS2 <- '++' skip
PLUSEQUAL <- '+=' skip
PLUSPERCENT <- '+%' ![=] skip
PLUSPERCENTEQUAL <- '+%=' skip
PLUSPIPE <- '+|' ![=] skip
PLUSPIPEEQUAL <- '+|=' skip
LETTERC <- 'c' skip
QUESTIONMARK <- '?' skip
RARROW <- '>' ![>=] skip
RARROW2 <- '>>' ![=] skip
RARROW2EQUAL <- '>>=' skip
RARROWEQUAL <- '>=' skip
RBRACE <- '}' skip
RBRACKET <- ']' skip
RPAREN <- ')' skip
SEMICOLON <- ';' skip
SLASH <- '/' ![=] skip
SLASHEQUAL <- '/=' skip
TILDE <- '~' skip
end_of_word <- ![a-zA-Z0-9_] skip
KEYWORD_addrspace <- 'addrspace' end_of_word
KEYWORD_align <- 'align' end_of_word
KEYWORD_allowzero <- 'allowzero' end_of_word
KEYWORD_and <- 'and' end_of_word
KEYWORD_anyframe <- 'anyframe' end_of_word
KEYWORD_anytype <- 'anytype' end_of_word
KEYWORD_asm <- 'asm' end_of_word
KEYWORD_async <- 'async' end_of_word
KEYWORD_await <- 'await' end_of_word
KEYWORD_break <- 'break' end_of_word
KEYWORD_callconv <- 'callconv' end_of_word
KEYWORD_catch <- 'catch' end_of_word
KEYWORD_comptime <- 'comptime' end_of_word
KEYWORD_const <- 'const' end_of_word
KEYWORD_continue <- 'continue' end_of_word
KEYWORD_defer <- 'defer' end_of_word
KEYWORD_else <- 'else' end_of_word
KEYWORD_enum <- 'enum' end_of_word
KEYWORD_errdefer <- 'errdefer' end_of_word
KEYWORD_error <- 'error' end_of_word
KEYWORD_export <- 'export' end_of_word
KEYWORD_extern <- 'extern' end_of_word
KEYWORD_fn <- 'fn' end_of_word
KEYWORD_for <- 'for' end_of_word
KEYWORD_if <- 'if' end_of_word
KEYWORD_inline <- 'inline' end_of_word
KEYWORD_noalias <- 'noalias' end_of_word
KEYWORD_nosuspend <- 'nosuspend' end_of_word
KEYWORD_noinline <- 'noinline' end_of_word
KEYWORD_opaque <- 'opaque' end_of_word
KEYWORD_or <- 'or' end_of_word
KEYWORD_orelse <- 'orelse' end_of_word
KEYWORD_packed <- 'packed' end_of_word
KEYWORD_pub <- 'pub' end_of_word
KEYWORD_resume <- 'resume' end_of_word
KEYWORD_return <- 'return' end_of_word
KEYWORD_linksection <- 'linksection' end_of_word
KEYWORD_struct <- 'struct' end_of_word
KEYWORD_suspend <- 'suspend' end_of_word
KEYWORD_switch <- 'switch' end_of_word
KEYWORD_test <- 'test' end_of_word
KEYWORD_threadlocal <- 'threadlocal' end_of_word
KEYWORD_try <- 'try' end_of_word
KEYWORD_union <- 'union' end_of_word
KEYWORD_unreachable <- 'unreachable' end_of_word
KEYWORD_usingnamespace <- 'usingnamespace' end_of_word
KEYWORD_var <- 'var' end_of_word
KEYWORD_volatile <- 'volatile' end_of_word
KEYWORD_while <- 'while' end_of_word
keyword <- KEYWORD_addrspace / KEYWORD_align / KEYWORD_allowzero / KEYWORD_and
/ KEYWORD_anyframe / KEYWORD_anytype / KEYWORD_asm / KEYWORD_async
/ KEYWORD_await / KEYWORD_break / KEYWORD_callconv / KEYWORD_catch
/ KEYWORD_comptime / KEYWORD_const / KEYWORD_continue / KEYWORD_defer
/ KEYWORD_else / KEYWORD_enum / KEYWORD_errdefer / KEYWORD_error / KEYWORD_export
/ KEYWORD_extern / KEYWORD_fn / KEYWORD_for / KEYWORD_if
/ KEYWORD_inline / KEYWORD_noalias / KEYWORD_nosuspend / KEYWORD_noinline
/ KEYWORD_opaque / KEYWORD_or / KEYWORD_orelse / KEYWORD_packed
/ KEYWORD_pub / KEYWORD_resume / KEYWORD_return / KEYWORD_linksection
/ KEYWORD_struct / KEYWORD_suspend / KEYWORD_switch / KEYWORD_test
/ KEYWORD_threadlocal / KEYWORD_try / KEYWORD_union / KEYWORD_unreachable
/ KEYWORD_usingnamespace / KEYWORD_var / KEYWORD_volatile / KEYWORD_while

View File

@ -7,102 +7,189 @@ import (
type formatter struct {
w io.Writer
line int // 1-based
col int // 1-based, reset to 1 after newline
indent int // indentation level
}
func (f *formatter) WriteString(s string) (n int, err error) {
return f.w.Write([]byte(s))
// indentStr defines the string used for each indentation level (4 spaces).
const indentStr = " "
// Writef writes formatted text to the underlying writer and updates line/col counters.
// It also handles indentation after newlines when appropriate.
func (f *formatter) Writef(format string, a ...any) {
s := fmt.Sprintf(format, a...)
for i, r := range s {
if r == '\n' {
f.line++
f.col = 1
// After a newline, write indentation for the next line unless it's a closing brace or another newline.
if i+1 < len(s) && s[i+1] != '\n' && s[i+1] != '}' {
f.writeIndent()
}
} else {
if f.col == 0 {
f.col = 1
} else {
f.col++
}
}
}
if _, err := f.w.Write([]byte(s)); err != nil {
panic(err)
}
}
func (f *formatter) Writef(format string, a ...any) (err error) {
_, err = f.w.Write(fmt.Appendf(nil, format, a...))
return err
// writeIndent writes the current indentation level to the output.
// Call this at the start of a new line before writing statements or closing braces.
func (f *formatter) writeIndent() {
for i := 0; i < f.indent; i++ {
if _, err := f.w.Write([]byte(indentStr)); err != nil {
panic(err)
}
f.col += len(indentStr)
}
}
func Write(w io.Writer, root *Root) error {
f := &formatter{
w: w,
// Write is the entry point for formatting a Zig AST.
func Write(w io.Writer, root *Root) (err error) {
defer func() {
if r := recover(); r != nil {
if e, ok := r.(error); ok {
err = e
} else {
panic(r)
}
}
}()
f := &formatter{w: w, line: 1, col: 1, indent: 0}
if root.ContainerDocComment != "" {
err := f.Writef("//! %s\n\n", root.ContainerDocComment)
if err != nil {
return err
}
f.Writef("//! %s\n\n", root.ContainerDocComment)
}
for _, member := range root.ContainerMembers {
for _, decl := range member.Decls {
if err := writeDecl(f, decl); err != nil {
return err
}
// Only handle Decl for now (fields not needed for hello world)
if member.Decl != nil {
writeDecl(f, member.Decl)
}
}
return nil
}
func writeDecl(f *formatter, decl Decl) (err error) {
switch typ := decl.(type) {
// writeDecl emits a top-level declaration.
func writeDecl(f *formatter, decl Decl) {
switch d := decl.(type) {
case *FnDecl:
if err = f.Writef("fn %s(", typ.Name); err != nil {
return err
f.Writef("\nfn %s(", d.Name)
writeParams(f, d.Params)
f.Writef(") ")
writeTypeExpr(f, d.ReturnType)
writeBlock(f, d.Body)
case *GlobalVarDecl:
if d.Const {
f.Writef("const %s = ", d.Name)
} else {
f.Writef("var %s = ", d.Name)
}
if err = writeParams(f, typ.Params); err != nil {
return err
writeExpr(f, d.Value)
f.Writef(";\n")
}
if err = f.Writef(") %s", typ.ReturnType); err != nil {
return err
}
if err = writeBlock(f, typ.Body); err != nil {
return err
}
}
return nil
}
func writeParams(f *formatter, params []*ParamDecl) (err error) {
for _, param := range params {
// writeParams emits function parameters, separated by commas.
func writeParams(f *formatter, params []*ParamDecl) {
for i, param := range params {
if i > 0 {
f.Writef(", ")
}
if param.Name != "" {
if err = f.Writef("%s: ", param.Name); err != nil {
return err
f.Writef("%s: ", param.Name)
}
writeTypeExpr(f, param.Type)
}
if err = f.Writef("%s", param.Type); err != nil {
return err
}
}
return nil
}
func writeBlock(f *formatter, block *Block) (err error) {
// writeTypeExpr emits a type expression.
func writeTypeExpr(f *formatter, typ TypeExpr) {
switch t := typ.(type) {
case *Identifier:
f.Writef("%s", t.Name)
case nil:
// nothing
default:
f.Writef("%v", t)
}
}
// writeBlock emits a block, handling indentation for statements and the closing brace.
func writeBlock(f *formatter, block *Block) {
if block == nil {
if _, err = f.WriteString(";"); err != nil {
return err
f.Writef(";")
return
}
return nil
}
if err = f.Writef(" {\n"); err != nil {
return err
}
for _, stmt := range block.Stmts {
if err = writeStmt(f, stmt); err != nil {
return err
}
// Should this be the job of the formatter?
if _, err = f.WriteString("\n"); err != nil {
return err
f.Writef(" {\n")
f.indent++ // Increase indentation for block contents.
for i, stmt := range block.Stmts {
f.writeIndent() // Indent each statement.
writeStmt(f, stmt)
if i < len(block.Stmts)-1 {
f.Writef("\n")
}
}
if err = f.Writef("}\n"); err != nil {
return err
}
return nil
f.indent-- // Decrease indentation before closing brace.
f.Writef("\n")
f.writeIndent() // Indent the closing brace.
f.Writef("}\n")
}
func writeStmt(f *formatter, stmt Stmt) (err error) {
switch stmt.(type) {
// writeStmt emits a statement. Indentation is handled by the caller (writeBlock).
func writeStmt(f *formatter, stmt Stmt) {
switch s := stmt.(type) {
case *ReturnStmt:
if _, err = f.WriteString("return;"); err != nil {
return err
f.Writef("return")
if s.Value != nil {
f.Writef(" ")
writeExpr(f, s.Value)
}
f.Writef(";")
case *ExprStmt:
writeExpr(f, s.Expr)
f.Writef(";")
}
}
// writeExpr emits an expression.
func writeExpr(f *formatter, expr Expr) {
switch e := expr.(type) {
case *Identifier:
f.Writef("%s", e.Name)
case *CallExpr:
writeExpr(f, e.Fun)
f.Writef("(")
for i, arg := range e.Args {
if i > 0 {
f.Writef(", ")
}
writeExpr(f, arg)
}
f.Writef(")")
case *FieldAccessExpr:
writeExpr(f, e.Receiver)
f.Writef(".%s", e.Field)
case *Literal:
switch e.Kind {
case "string":
f.Writef("%q", e.Value)
default:
f.Writef("%v", e.Value)
}
case *InitListExpr:
if e.Empty {
f.Writef(".{}")
} else {
f.Writef(".{")
// TODO
f.Writef("}")
}
}
return nil
}

View File

@ -19,8 +19,10 @@ func Expect[T cmp.Ordered](expected, actual T) error {
func TestHelloWorld(t *testing.T) {
expected := `//! Hello, world!
const std = @import("std");
fn main() void {
return;
std.debug.print("Hello, world!\n", .{});
}
`
@ -28,13 +30,37 @@ return;
ContainerDocComment: "Hello, world!",
ContainerMembers: []*zig.ContainerMember{
{
Decls: []zig.Decl{
&zig.FnDecl{
Decl: &zig.GlobalVarDecl{
Const: true,
Name: "std",
Value: &zig.CallExpr{
Fun: &zig.Identifier{Name: "@import"},
Args: []zig.Expr{
&zig.Literal{Kind: "string", Value: "std"},
},
},
},
},
{
Decl: &zig.FnDecl{
Name: "main",
ReturnType: "void",
ReturnType: &zig.Identifier{Name: "void"},
Body: &zig.Block{
Stmts: []zig.Stmt{
&zig.ReturnStmt{},
&zig.ExprStmt{
Expr: &zig.CallExpr{
Fun: &zig.FieldAccessExpr{
Receiver: &zig.FieldAccessExpr{
Receiver: &zig.Identifier{Name: "std"},
Field: "debug",
},
Field: "print",
},
Args: []zig.Expr{
&zig.Literal{Kind: "string", Value: "Hello, world!\n"},
&zig.InitListExpr{Empty: true},
},
},
},
},
},