Files
pugz/src/parser.zig

1645 lines
56 KiB
Zig
Raw Normal View History

const std = @import("std");
2026-01-24 23:53:19 +05:30
const mem = std.mem;
const Allocator = std.mem.Allocator;
// Import token types from lexer
const lexer = @import("lexer.zig");
2026-01-24 23:53:19 +05:30
pub const TokenType = lexer.TokenType;
pub const TokenValue = lexer.TokenValue;
pub const Location = lexer.Location;
pub const TokenLoc = lexer.TokenLoc;
pub const Token = lexer.Token;
// ============================================================================
// Inline Tags (tags that are typically inline in HTML)
// ============================================================================
/// Comptime hash map for O(1) inline tag lookup instead of O(19) linear search
const inline_tags_map = std.StaticStringMap(void).initComptime(.{
.{ "a", {} },
.{ "abbr", {} },
.{ "acronym", {} },
.{ "b", {} },
.{ "br", {} },
.{ "code", {} },
.{ "em", {} },
.{ "font", {} },
.{ "i", {} },
.{ "img", {} },
.{ "ins", {} },
.{ "kbd", {} },
.{ "map", {} },
.{ "samp", {} },
.{ "small", {} },
.{ "span", {} },
.{ "strong", {} },
.{ "sub", {} },
.{ "sup", {} },
});
inline fn isInlineTag(name: []const u8) bool {
return inline_tags_map.has(name);
2026-01-24 23:53:19 +05:30
}
2026-01-24 23:53:19 +05:30
// ============================================================================
// AST Node Types
// ============================================================================
pub const NodeType = enum {
Block,
NamedBlock,
Tag,
InterpolatedTag,
Text,
Code,
Comment,
BlockComment,
Doctype,
Mixin,
MixinBlock,
Case,
When,
Conditional,
While,
Each,
EachOf,
Extends,
Include,
RawInclude,
Filter,
IncludeFilter,
FileReference,
YieldBlock,
AttributeBlock,
};
2026-01-24 23:53:19 +05:30
// ============================================================================
// AST Node - A tagged union representing all possible AST nodes
// ============================================================================
pub const Attribute = struct {
name: []const u8,
val: ?[]const u8,
line: usize,
column: usize,
filename: ?[]const u8,
must_escape: bool,
val_owned: bool = false, // true if val was allocated and needs to be freed
};
2026-01-24 23:53:19 +05:30
pub const AttributeBlock = struct {
val: []const u8,
line: usize,
column: usize,
filename: ?[]const u8,
};
2026-01-24 23:53:19 +05:30
pub const FileReference = struct {
path: ?[]const u8,
line: usize,
column: usize,
filename: ?[]const u8,
};
2026-01-24 23:53:19 +05:30
pub const Node = struct {
type: NodeType,
line: usize = 0,
column: usize = 0,
filename: ?[]const u8 = null,
// Block fields
nodes: std.ArrayListUnmanaged(*Node) = .{},
// NamedBlock additional fields
name: ?[]const u8 = null, // Also used for Tag, Mixin, Filter
mode: ?[]const u8 = null, // "prepend", "append", "replace"
// Tag fields
self_closing: bool = false,
attrs: std.ArrayListUnmanaged(Attribute) = .{},
attribute_blocks: std.ArrayListUnmanaged(AttributeBlock) = .{},
is_inline: bool = false,
text_only: bool = false,
self_closing_allowed: bool = false,
// Text fields
val: ?[]const u8 = null, // Also used for Code, Comment, Doctype, Case expr, When expr, Conditional test, While test
is_html: bool = false,
// Code fields
buffer: bool = false,
must_escape: bool = true,
is_inline_code: bool = false,
// Mixin fields
args: ?[]const u8 = null,
call: bool = false,
// Each fields
obj: ?[]const u8 = null,
key: ?[]const u8 = null,
// Conditional fields
test_expr: ?[]const u8 = null, // "test" in JS
consequent: ?*Node = null,
alternate: ?*Node = null,
// Extends/Include fields
file: ?FileReference = null,
// Include fields
filters: std.ArrayListUnmanaged(*Node) = .{},
// InterpolatedTag fields
expr: ?[]const u8 = null,
// When/Conditional debug field
debug: bool = true,
// Memory ownership flags
val_owned: bool = false, // true if val was allocated and needs to be freed
pub fn deinit(self: *Node, allocator: Allocator) void {
// Free owned val string
if (self.val_owned) {
if (self.val) |v| {
allocator.free(v);
}
}
2026-01-24 23:53:19 +05:30
// Free child nodes recursively
for (self.nodes.items) |child| {
child.deinit(allocator);
allocator.destroy(child);
}
2026-01-24 23:53:19 +05:30
self.nodes.deinit(allocator);
2026-01-24 23:53:19 +05:30
// Free attrs (including owned val strings)
for (self.attrs.items) |attr| {
if (attr.val_owned) {
if (attr.val) |v| {
allocator.free(v);
}
}
}
2026-01-24 23:53:19 +05:30
self.attrs.deinit(allocator);
2026-01-24 23:53:19 +05:30
// Free attribute_blocks
self.attribute_blocks.deinit(allocator);
2026-01-24 23:53:19 +05:30
// Free filters
for (self.filters.items) |filter| {
filter.deinit(allocator);
allocator.destroy(filter);
}
2026-01-24 23:53:19 +05:30
self.filters.deinit(allocator);
2026-01-24 23:53:19 +05:30
// Free consequent and alternate
if (self.consequent) |c| {
c.deinit(allocator);
allocator.destroy(c);
}
2026-01-24 23:53:19 +05:30
if (self.alternate) |a| {
a.deinit(allocator);
allocator.destroy(a);
}
2026-01-24 23:53:19 +05:30
}
2026-01-24 23:53:19 +05:30
pub fn addNode(self: *Node, allocator: Allocator, node: *Node) !void {
try self.nodes.append(allocator, node);
}
};
2026-01-24 23:53:19 +05:30
// ============================================================================
// Parser Error
// ============================================================================
pub const ParserErrorCode = enum {
INVALID_TOKEN,
BLOCK_IN_BUFFERED_CODE,
BLOCK_OUTISDE_MIXIN,
MIXIN_WITHOUT_BODY,
RAW_INCLUDE_BLOCK,
DUPLICATE_ID,
DUPLICATE_ATTRIBUTE,
UNEXPECTED_END,
};
2026-01-24 23:53:19 +05:30
pub const ParserError = struct {
code: ParserErrorCode,
message: []const u8,
line: usize,
column: usize,
filename: ?[]const u8,
};
2026-01-24 23:53:19 +05:30
// ============================================================================
// Parser
// ============================================================================
pub const Parser = struct {
allocator: Allocator,
tokens: []const Token,
pos: usize = 0,
deferred: std.ArrayListUnmanaged(Token) = .{},
filename: ?[]const u8 = null,
src: ?[]const u8 = null,
in_mixin: usize = 0,
err: ?ParserError = null,
pub fn init(allocator: Allocator, tokens: []const Token, filename: ?[]const u8, src: ?[]const u8) Parser {
return .{
.allocator = allocator,
.tokens = tokens,
.filename = filename,
.src = src,
};
}
2026-01-24 23:53:19 +05:30
pub fn deinit(self: *Parser) void {
self.deferred.deinit(self.allocator);
}
2026-01-24 23:53:19 +05:30
// ========================================================================
// Token Stream Methods
// ========================================================================
2026-01-24 23:53:19 +05:30
/// Return the next token without consuming it
pub fn peek(self: *Parser) Token {
if (self.deferred.items.len > 0) {
return self.deferred.items[0];
}
2026-01-24 23:53:19 +05:30
if (self.pos < self.tokens.len) {
return self.tokens[self.pos];
}
// Return EOS token if past end
return .{
.type = .eos,
.loc = .{ .start = .{ .line = 0, .column = 0 } },
};
}
2026-01-24 23:53:19 +05:30
/// Return the token at offset n from current position (0 = current)
pub fn lookahead(self: *Parser, n: usize) Token {
const deferred_len = self.deferred.items.len;
if (n < deferred_len) {
return self.deferred.items[n];
}
const index = self.pos + (n - deferred_len);
if (index < self.tokens.len) {
return self.tokens[index];
}
2026-01-24 23:53:19 +05:30
return .{
.type = .eos,
.loc = .{ .start = .{ .line = 0, .column = 0 } },
};
}
2026-01-24 23:53:19 +05:30
/// Consume and return the next token
pub fn advance(self: *Parser) Token {
if (self.deferred.items.len > 0) {
return self.deferred.orderedRemove(0);
}
if (self.pos < self.tokens.len) {
const tok = self.tokens[self.pos];
self.pos += 1;
return tok;
}
return .{
.type = .eos,
.loc = .{ .start = .{ .line = 0, .column = 0 } },
};
}
2026-01-24 23:53:19 +05:30
/// Push a token to the front of the stream
pub fn defer_token(self: *Parser, token: Token) !void {
try self.deferred.insert(self.allocator, 0, token);
}
2026-01-24 23:53:19 +05:30
/// Expect a specific token type, return error if not found
pub fn expect(self: *Parser, token_type: TokenType) !Token {
const tok = self.peek();
if (tok.type == token_type) {
return self.advance();
}
2026-01-24 23:53:19 +05:30
self.setError(.INVALID_TOKEN, "expected different token type", tok);
return error.InvalidToken;
}
2026-01-24 23:53:19 +05:30
/// Accept a token if it matches, otherwise return null
pub fn accept(self: *Parser, token_type: TokenType) ?Token {
if (self.peek().type == token_type) {
return self.advance();
}
return null;
}
2026-01-24 23:53:19 +05:30
// ========================================================================
// Error Handling
// ========================================================================
2026-01-24 23:53:19 +05:30
fn setError(self: *Parser, code: ParserErrorCode, message: []const u8, token: Token) void {
self.err = .{
.code = code,
.message = message,
.line = token.loc.start.line,
.column = token.loc.start.column,
.filename = self.filename,
};
}
2026-01-24 23:53:19 +05:30
pub fn getError(self: *const Parser) ?ParserError {
return self.err;
}
2026-01-24 23:53:19 +05:30
// ========================================================================
// Block Helpers
// ========================================================================
2026-01-24 23:53:19 +05:30
fn initBlock(self: *Parser, line: usize) !*Node {
const node = try self.allocator.create(Node);
node.* = .{
.type = .Block,
.line = line,
.filename = self.filename,
};
return node;
}
2026-01-24 23:53:19 +05:30
fn emptyBlock(self: *Parser, line: usize) !*Node {
return self.initBlock(line);
}
2026-01-24 23:53:19 +05:30
// ========================================================================
// Main Parse Entry Point
// ========================================================================
pub fn parse(self: *Parser) !*Node {
var block = try self.emptyBlock(0);
while (self.peek().type != .eos) {
if (self.peek().type == .newline) {
_ = self.advance();
} else if (self.peek().type == .text_html) {
var html_nodes = try self.parseTextHtml();
for (html_nodes.items) |node| {
try block.addNode(self.allocator, node);
}
2026-01-24 23:53:19 +05:30
html_nodes.deinit(self.allocator);
} else {
const expr = try self.parseExpr();
if (expr.type == .Block) {
// Flatten block nodes into parent
for (expr.nodes.items) |node| {
try block.addNode(self.allocator, node);
}
// Clear the expr's nodes list (already moved)
expr.nodes.clearAndFree(self.allocator);
self.allocator.destroy(expr);
} else {
try block.addNode(self.allocator, expr);
}
}
}
2026-01-24 23:53:19 +05:30
return block;
}
2026-01-24 23:53:19 +05:30
// ========================================================================
// Expression Parsing
// ========================================================================
fn parseExpr(self: *Parser) anyerror!*Node {
const tok = self.peek();
return switch (tok.type) {
.tag => self.parseTag(),
.mixin => self.parseMixin(),
.block => self.parseBlock(),
.mixin_block => self.parseMixinBlock(),
.case => self.parseCase(),
.extends => self.parseExtends(),
.include => self.parseInclude(),
.doctype => self.parseDoctype(),
.filter => self.parseFilter(),
.comment => self.parseComment(),
.text, .interpolated_code, .start_pug_interpolation => self.parseText(true),
.text_html => blk: {
var html_nodes = try self.parseTextHtml();
const block = try self.initBlock(tok.loc.start.line);
for (html_nodes.items) |node| {
try block.addNode(self.allocator, node);
}
html_nodes.deinit(self.allocator);
break :blk block;
},
.dot => self.parseDot(),
.each => self.parseEach(),
.each_of => self.parseEachOf(),
.code => self.parseCode(false),
.blockcode => self.parseBlockCode(),
.@"if" => self.parseConditional(),
.@"while" => self.parseWhile(),
.call => self.parseCall(),
.interpolation => self.parseInterpolation(),
.yield => self.parseYield(),
.id, .class => blk: {
// Implicit div tag for #id or .class
try self.defer_token(.{
.type = .tag,
.val = .{ .string = "div" },
.loc = tok.loc,
});
break :blk self.parseExpr();
},
else => {
self.setError(.INVALID_TOKEN, "unexpected token", tok);
return error.InvalidToken;
},
};
}
2026-01-24 23:53:19 +05:30
fn parseDot(self: *Parser) !*Node {
_ = self.advance();
return self.parseTextBlock() orelse try self.emptyBlock(self.peek().loc.start.line);
}
2026-01-24 23:53:19 +05:30
// ========================================================================
// Text Parsing
// ========================================================================
2026-01-24 23:53:19 +05:30
fn parseText(self: *Parser, allow_block: bool) !*Node {
const lineno = self.peek().loc.start.line;
var tags = std.ArrayListUnmanaged(*Node){};
defer tags.deinit(self.allocator);
2026-01-24 23:53:19 +05:30
while (true) {
const next_tok = self.peek();
switch (next_tok.type) {
.text => {
const tok = self.advance();
const text_node = try self.allocator.create(Node);
text_node.* = .{
.type = .Text,
.val = tok.val.getString(),
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
};
try tags.append(self.allocator, text_node);
},
.interpolated_code => {
const tok = self.advance();
const code_node = try self.allocator.create(Node);
code_node.* = .{
.type = .Code,
.val = tok.val.getString(),
.buffer = tok.isBuffered(),
.must_escape = tok.shouldEscape(),
.is_inline_code = true,
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
};
try tags.append(self.allocator, code_node);
},
.newline => {
if (!allow_block) break;
const tok = self.advance();
const next_type = self.peek().type;
if (next_type == .text or next_type == .interpolated_code) {
const nl_node = try self.allocator.create(Node);
nl_node.* = .{
.type = .Text,
.val = "\n",
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
};
try tags.append(self.allocator, nl_node);
}
},
.start_pug_interpolation => {
_ = self.advance();
const expr = try self.parseExpr();
try tags.append(self.allocator, expr);
_ = try self.expect(.end_pug_interpolation);
},
else => break,
}
}
2026-01-24 23:53:19 +05:30
if (tags.items.len == 1) {
const result = tags.items[0];
tags.clearAndFree(self.allocator);
return result;
} else {
const block = try self.initBlock(lineno);
for (tags.items) |node| {
try block.addNode(self.allocator, node);
}
2026-01-24 23:53:19 +05:30
tags.clearAndFree(self.allocator);
return block;
}
2026-01-24 23:53:19 +05:30
}
2026-01-24 23:53:19 +05:30
fn parseTextHtml(self: *Parser) !std.ArrayListUnmanaged(*Node) {
var nodes = std.ArrayListUnmanaged(*Node){};
var current_node: ?*Node = null;
while (true) {
switch (self.peek().type) {
.text_html => {
const text = self.advance();
if (current_node == null) {
current_node = try self.allocator.create(Node);
current_node.?.* = .{
.type = .Text,
.val = text.val.getString(),
.filename = self.filename,
.line = text.loc.start.line,
.column = text.loc.start.column,
.is_html = true,
};
try nodes.append(self.allocator, current_node.?);
} else {
// Concatenate with newline - need to allocate new string
// For now, create a new text node (simplified)
const new_node = try self.allocator.create(Node);
new_node.* = .{
.type = .Text,
.val = text.val.getString(),
.filename = self.filename,
.line = text.loc.start.line,
.column = text.loc.start.column,
.is_html = true,
};
try nodes.append(self.allocator, new_node);
}
},
.indent => {
const block_nodes = try self.block_();
for (block_nodes.nodes.items) |node| {
if (node.is_html) {
if (current_node == null) {
current_node = node;
try nodes.append(self.allocator, current_node.?);
} else {
try nodes.append(self.allocator, node);
}
} else {
current_node = null;
try nodes.append(self.allocator, node);
}
}
block_nodes.nodes.deinit(self.allocator);
self.allocator.destroy(block_nodes);
},
.code => {
current_node = null;
const code_node = try self.parseCode(true);
try nodes.append(self.allocator, code_node);
},
.newline => {
_ = self.advance();
},
else => break,
}
}
2026-01-24 23:53:19 +05:30
return nodes;
}
2026-01-24 23:53:19 +05:30
fn parseTextBlock(self: *Parser) ?*Node {
const tok = self.accept(.start_pipeless_text) orelse return null;
var block = self.emptyBlock(tok.loc.start.line) catch return null;
2026-01-24 23:53:19 +05:30
while (self.peek().type != .end_pipeless_text) {
const cur_tok = self.advance();
switch (cur_tok.type) {
.text => {
const text_node = self.allocator.create(Node) catch return null;
text_node.* = .{
.type = .Text,
.val = cur_tok.val.getString(),
.line = cur_tok.loc.start.line,
.column = cur_tok.loc.start.column,
.filename = self.filename,
};
block.addNode(self.allocator, text_node) catch return null;
},
.newline => {
const nl_node = self.allocator.create(Node) catch return null;
nl_node.* = .{
.type = .Text,
.val = "\n",
.line = cur_tok.loc.start.line,
.column = cur_tok.loc.start.column,
.filename = self.filename,
};
block.addNode(self.allocator, nl_node) catch return null;
},
.start_pug_interpolation => {
const expr = self.parseExpr() catch return null;
block.addNode(self.allocator, expr) catch return null;
_ = self.expect(.end_pug_interpolation) catch return null;
},
.interpolated_code => {
const code_node = self.allocator.create(Node) catch return null;
code_node.* = .{
.type = .Code,
.val = cur_tok.val.getString(),
.buffer = cur_tok.isBuffered(),
.must_escape = cur_tok.shouldEscape(),
.is_inline_code = true,
.line = cur_tok.loc.start.line,
.column = cur_tok.loc.start.column,
.filename = self.filename,
};
block.addNode(self.allocator, code_node) catch return null;
},
else => {
self.setError(.INVALID_TOKEN, "Unexpected token in text block", cur_tok);
return null;
},
}
}
2026-01-24 23:53:19 +05:30
_ = self.advance(); // consume end_pipeless_text
return block;
}
2026-01-24 23:53:19 +05:30
// ========================================================================
// Block Expansion
// ========================================================================
fn parseBlockExpansion(self: *Parser) !*Node {
if (self.accept(.colon)) |tok| {
const expr = try self.parseExpr();
if (expr.type == .Block) {
return expr;
}
2026-01-24 23:53:19 +05:30
const block = try self.initBlock(tok.loc.start.line);
try block.addNode(self.allocator, expr);
return block;
}
2026-01-24 23:53:19 +05:30
return self.block_();
}
// ========================================================================
// Case/When/Default
// ========================================================================
fn parseCase(self: *Parser) !*Node {
const tok = try self.expect(.case);
const node = try self.allocator.create(Node);
node.* = .{
.type = .Case,
.expr = tok.val.getString(),
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
};
var block = try self.emptyBlock(tok.loc.start.line + 1);
_ = try self.expect(.indent);
2026-01-24 23:53:19 +05:30
while (self.peek().type != .outdent) {
switch (self.peek().type) {
.comment, .newline => {
_ = self.advance();
},
.when => {
const when_node = try self.parseWhen();
try block.addNode(self.allocator, when_node);
},
.default => {
const default_node = try self.parseDefault();
try block.addNode(self.allocator, default_node);
},
else => {
self.setError(.INVALID_TOKEN, "Expected 'when', 'default' or 'newline'", self.peek());
return error.InvalidToken;
},
}
}
2026-01-24 23:53:19 +05:30
_ = try self.expect(.outdent);
2026-01-24 23:53:19 +05:30
// Move block nodes to case node
for (block.nodes.items) |n| {
try node.addNode(self.allocator, n);
}
2026-01-24 23:53:19 +05:30
block.nodes.deinit(self.allocator);
self.allocator.destroy(block);
2026-01-24 23:53:19 +05:30
return node;
}
2026-01-24 23:53:19 +05:30
fn parseWhen(self: *Parser) !*Node {
const tok = try self.expect(.when);
const node = try self.allocator.create(Node);
if (self.peek().type != .newline) {
node.* = .{
.type = .When,
.expr = tok.val.getString(),
.debug = false,
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
};
const block = try self.parseBlockExpansion();
for (block.nodes.items) |n| {
try node.addNode(self.allocator, n);
}
block.nodes.deinit(self.allocator);
self.allocator.destroy(block);
} else {
node.* = .{
.type = .When,
.expr = tok.val.getString(),
.debug = false,
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
};
}
2026-01-24 23:53:19 +05:30
return node;
}
2026-01-24 23:53:19 +05:30
fn parseDefault(self: *Parser) !*Node {
const tok = try self.expect(.default);
const node = try self.allocator.create(Node);
node.* = .{
.type = .When,
.expr = "default",
.debug = false,
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
};
const block = try self.parseBlockExpansion();
for (block.nodes.items) |n| {
try node.addNode(self.allocator, n);
}
2026-01-24 23:53:19 +05:30
block.nodes.deinit(self.allocator);
self.allocator.destroy(block);
return node;
}
2026-01-24 23:53:19 +05:30
// ========================================================================
// Code Parsing
// ========================================================================
fn parseCode(self: *Parser, no_block: bool) !*Node {
const tok = try self.expect(.code);
const node = try self.allocator.create(Node);
node.* = .{
.type = .Code,
.val = tok.val.getString(),
.buffer = tok.isBuffered(),
.must_escape = tok.shouldEscape(),
.is_inline_code = no_block,
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
};
2026-01-24 23:53:19 +05:30
// Check for "else" pattern - disable debug
if (node.val) |v| {
if (mem.indexOf(u8, v, "else") != null) {
node.debug = false;
}
}
2026-01-24 23:53:19 +05:30
if (no_block) return node;
2026-01-24 23:53:19 +05:30
// Handle block
if (self.peek().type == .indent) {
if (tok.isBuffered()) {
self.setError(.BLOCK_IN_BUFFERED_CODE, "Buffered code cannot have a block attached", self.peek());
return error.BlockInBufferedCode;
}
2026-01-24 23:53:19 +05:30
const block = try self.block_();
for (block.nodes.items) |n| {
try node.addNode(self.allocator, n);
}
2026-01-24 23:53:19 +05:30
block.nodes.deinit(self.allocator);
self.allocator.destroy(block);
}
2026-01-24 23:53:19 +05:30
return node;
}
2026-01-24 23:53:19 +05:30
fn parseConditional(self: *Parser) !*Node {
const tok = try self.expect(.@"if");
const node = try self.allocator.create(Node);
node.* = .{
.type = .Conditional,
.test_expr = tok.val.getString(),
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
};
node.consequent = try self.emptyBlock(tok.loc.start.line);
// Handle block
if (self.peek().type == .indent) {
const block = try self.block_();
// Replace empty consequent with actual block
self.allocator.destroy(node.consequent.?);
node.consequent = block;
}
2026-01-24 23:53:19 +05:30
var current_node = node;
while (true) {
if (self.peek().type == .newline) {
_ = try self.expect(.newline);
} else if (self.peek().type == .else_if) {
const else_if_tok = try self.expect(.else_if);
const else_if_node = try self.allocator.create(Node);
else_if_node.* = .{
.type = .Conditional,
.test_expr = else_if_tok.val.getString(),
.line = else_if_tok.loc.start.line,
.column = else_if_tok.loc.start.column,
.filename = self.filename,
};
else_if_node.consequent = try self.emptyBlock(else_if_tok.loc.start.line);
current_node.alternate = else_if_node;
current_node = else_if_node;
if (self.peek().type == .indent) {
const block = try self.block_();
self.allocator.destroy(current_node.consequent.?);
current_node.consequent = block;
}
} else if (self.peek().type == .@"else") {
_ = try self.expect(.@"else");
if (self.peek().type == .indent) {
current_node.alternate = try self.block_();
}
break;
} else {
break;
}
}
2026-01-24 23:53:19 +05:30
return node;
}
2026-01-24 23:53:19 +05:30
fn parseWhile(self: *Parser) !*Node {
const tok = try self.expect(.@"while");
const node = try self.allocator.create(Node);
node.* = .{
.type = .While,
.test_expr = tok.val.getString(),
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
};
2026-01-24 23:53:19 +05:30
// Handle block
if (self.peek().type == .indent) {
const block = try self.block_();
for (block.nodes.items) |n| {
try node.addNode(self.allocator, n);
}
block.nodes.deinit(self.allocator);
self.allocator.destroy(block);
}
2026-01-24 23:53:19 +05:30
return node;
}
2026-01-24 23:53:19 +05:30
fn parseBlockCode(self: *Parser) !*Node {
const tok = try self.expect(.blockcode);
const line = tok.loc.start.line;
const column = tok.loc.start.column;
2026-01-24 23:53:19 +05:30
var text = std.ArrayListUnmanaged(u8){};
defer text.deinit(self.allocator);
2026-01-24 23:53:19 +05:30
if (self.peek().type == .start_pipeless_text) {
_ = self.advance();
2026-01-24 23:53:19 +05:30
while (self.peek().type != .end_pipeless_text) {
const inner_tok = self.advance();
switch (inner_tok.type) {
.text => {
if (inner_tok.val.getString()) |s| {
try text.appendSlice(self.allocator, s);
}
2026-01-24 23:53:19 +05:30
},
.newline => {
try text.append(self.allocator, '\n');
},
else => {
self.setError(.INVALID_TOKEN, "Unexpected token in block code", inner_tok);
return error.InvalidToken;
},
}
}
2026-01-24 23:53:19 +05:30
_ = self.advance();
}
2026-01-24 23:53:19 +05:30
const node = try self.allocator.create(Node);
// Need to dupe the text to persist it
const text_slice = try self.allocator.dupe(u8, text.items);
node.* = .{
.type = .Code,
.val = text_slice,
.val_owned = true, // We allocated this string
.buffer = false,
.must_escape = false,
.is_inline_code = false,
.line = line,
.column = column,
.filename = self.filename,
};
return node;
}
2026-01-24 23:53:19 +05:30
// ========================================================================
// Comment Parsing
// ========================================================================
fn parseComment(self: *Parser) !*Node {
const tok = try self.expect(.comment);
if (self.parseTextBlock()) |block| {
const node = try self.allocator.create(Node);
node.* = .{
.type = .BlockComment,
.val = tok.val.getString(),
.buffer = tok.isBuffered(),
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
};
// Move block nodes to comment
for (block.nodes.items) |n| {
try node.addNode(self.allocator, n);
}
block.nodes.deinit(self.allocator);
self.allocator.destroy(block);
return node;
} else {
2026-01-24 23:53:19 +05:30
const node = try self.allocator.create(Node);
node.* = .{
.type = .Comment,
.val = tok.val.getString(),
.buffer = tok.isBuffered(),
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
};
return node;
}
2026-01-24 23:53:19 +05:30
}
2026-01-24 23:53:19 +05:30
// ========================================================================
// Doctype Parsing
// ========================================================================
fn parseDoctype(self: *Parser) !*Node {
const tok = try self.expect(.doctype);
const node = try self.allocator.create(Node);
node.* = .{
.type = .Doctype,
.val = tok.val.getString(),
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
};
return node;
}
2026-01-24 23:53:19 +05:30
// ========================================================================
// Filter Parsing
// ========================================================================
2026-01-24 23:53:19 +05:30
fn parseIncludeFilter(self: *Parser) !*Node {
const tok = try self.expect(.filter);
var filter_attrs = std.ArrayListUnmanaged(Attribute){};
2026-01-24 23:53:19 +05:30
if (self.peek().type == .start_attributes) {
filter_attrs = try self.attrs(null);
}
2026-01-24 23:53:19 +05:30
const node = try self.allocator.create(Node);
node.* = .{
.type = .IncludeFilter,
.name = tok.val.getString(),
.attrs = filter_attrs,
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
};
return node;
}
2026-01-24 23:53:19 +05:30
fn parseFilter(self: *Parser) !*Node {
const tok = try self.expect(.filter);
var filter_attrs = std.ArrayListUnmanaged(Attribute){};
2026-01-24 23:53:19 +05:30
if (self.peek().type == .start_attributes) {
filter_attrs = try self.attrs(null);
}
2026-01-24 23:53:19 +05:30
var block: *Node = undefined;
if (self.peek().type == .text) {
const text_token = self.advance();
block = try self.initBlock(text_token.loc.start.line);
const text_node = try self.allocator.create(Node);
text_node.* = .{
.type = .Text,
.val = text_token.val.getString(),
.line = text_token.loc.start.line,
.column = text_token.loc.start.column,
.filename = self.filename,
};
try block.addNode(self.allocator, text_node);
} else if (self.peek().type == .filter) {
block = try self.initBlock(tok.loc.start.line);
const nested_filter = try self.parseFilter();
try block.addNode(self.allocator, nested_filter);
} else {
block = self.parseTextBlock() orelse try self.emptyBlock(tok.loc.start.line);
}
2026-01-24 23:53:19 +05:30
const node = try self.allocator.create(Node);
node.* = .{
.type = .Filter,
.name = tok.val.getString(),
.attrs = filter_attrs,
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
};
for (block.nodes.items) |n| {
try node.addNode(self.allocator, n);
}
block.nodes.deinit(self.allocator);
self.allocator.destroy(block);
return node;
}
2026-01-24 23:53:19 +05:30
// ========================================================================
// Each Parsing
// ========================================================================
fn parseEach(self: *Parser) !*Node {
const tok = try self.expect(.each);
const node = try self.allocator.create(Node);
node.* = .{
.type = .Each,
.obj = tok.code.getString(),
.val = tok.val.getString(),
.key = tok.key.getString(),
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
};
2026-01-24 23:53:19 +05:30
const block = try self.block_();
for (block.nodes.items) |n| {
try node.addNode(self.allocator, n);
}
2026-01-24 23:53:19 +05:30
block.nodes.deinit(self.allocator);
self.allocator.destroy(block);
2026-01-24 23:53:19 +05:30
if (self.peek().type == .@"else") {
_ = self.advance();
2026-01-24 23:53:19 +05:30
node.alternate = try self.block_();
}
2026-01-24 23:53:19 +05:30
return node;
}
2026-01-24 23:53:19 +05:30
fn parseEachOf(self: *Parser) !*Node {
const tok = try self.expect(.each_of);
const node = try self.allocator.create(Node);
node.* = .{
.type = .EachOf,
.obj = tok.code.getString(),
.val = tok.val.getString(),
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
};
2026-01-24 23:53:19 +05:30
const block = try self.block_();
for (block.nodes.items) |n| {
try node.addNode(self.allocator, n);
}
2026-01-24 23:53:19 +05:30
block.nodes.deinit(self.allocator);
self.allocator.destroy(block);
2026-01-24 23:53:19 +05:30
return node;
}
2026-01-24 23:53:19 +05:30
// ========================================================================
// Extends Parsing
// ========================================================================
2026-01-24 23:53:19 +05:30
fn parseExtends(self: *Parser) !*Node {
const tok = try self.expect(.extends);
const path_tok = try self.expect(.path);
2026-01-24 23:53:19 +05:30
const path_val = if (path_tok.val.getString()) |s| mem.trim(u8, s, " \t") else null;
2026-01-24 23:53:19 +05:30
const node = try self.allocator.create(Node);
node.* = .{
.type = .Extends,
.file = .{
.path = path_val,
.line = path_tok.loc.start.line,
.column = path_tok.loc.start.column,
.filename = self.filename,
},
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
};
return node;
}
2026-01-24 23:53:19 +05:30
// ========================================================================
// Block Parsing
// ========================================================================
2026-01-24 23:53:19 +05:30
fn parseBlock(self: *Parser) !*Node {
const tok = try self.expect(.block);
2026-01-24 23:53:19 +05:30
var node: *Node = undefined;
if (self.peek().type == .indent) {
node = try self.block_();
} else {
2026-01-24 23:53:19 +05:30
node = try self.emptyBlock(tok.loc.start.line);
}
2026-01-24 23:53:19 +05:30
node.type = .NamedBlock;
node.name = if (tok.val.getString()) |s| mem.trim(u8, s, " \t") else null;
node.mode = tok.mode.getString();
node.line = tok.loc.start.line;
node.column = tok.loc.start.column;
2026-01-24 23:53:19 +05:30
return node;
}
2026-01-24 23:53:19 +05:30
fn parseMixinBlock(self: *Parser) !*Node {
const tok = try self.expect(.mixin_block);
if (self.in_mixin == 0) {
self.setError(.BLOCK_OUTISDE_MIXIN, "Anonymous blocks are not allowed unless they are part of a mixin.", tok);
return error.BlockOutsideMixin;
}
2026-01-24 23:53:19 +05:30
const node = try self.allocator.create(Node);
node.* = .{
.type = .MixinBlock,
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
};
return node;
}
2026-01-24 23:53:19 +05:30
fn parseYield(self: *Parser) !*Node {
const tok = try self.expect(.yield);
const node = try self.allocator.create(Node);
node.* = .{
.type = .YieldBlock,
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
};
return node;
}
2026-01-24 23:53:19 +05:30
// ========================================================================
// Include Parsing
// ========================================================================
fn parseInclude(self: *Parser) !*Node {
const tok = try self.expect(.include);
const node = try self.allocator.create(Node);
node.* = .{
.type = .Include,
.file = .{
.path = null,
.line = 0,
.column = 0,
.filename = self.filename,
},
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
};
2026-01-24 23:53:19 +05:30
// Parse filters
while (self.peek().type == .filter) {
const filter_node = try self.parseIncludeFilter();
try node.filters.append(self.allocator, filter_node);
}
2026-01-24 23:53:19 +05:30
const path_tok = try self.expect(.path);
const path_val = if (path_tok.val.getString()) |s| mem.trim(u8, s, " \t") else null;
2026-01-24 23:53:19 +05:30
node.file = .{
.path = path_val,
.line = path_tok.loc.start.line,
.column = path_tok.loc.start.column,
.filename = self.filename,
};
2026-01-24 23:53:19 +05:30
const has_filters = node.filters.items.len > 0;
const is_pug_file = if (path_val) |p| (mem.endsWith(u8, p, ".jade") or mem.endsWith(u8, p, ".pug")) else false;
if (is_pug_file and !has_filters) {
// Pug include with block
if (self.peek().type == .indent) {
const block = try self.block_();
for (block.nodes.items) |n| {
try node.addNode(self.allocator, n);
}
block.nodes.deinit(self.allocator);
self.allocator.destroy(block);
}
} else {
// Raw include
node.type = .RawInclude;
if (self.peek().type == .indent) {
self.setError(.RAW_INCLUDE_BLOCK, "Raw inclusion cannot contain a block", self.peek());
return error.RawIncludeBlock;
}
}
2026-01-24 23:53:19 +05:30
return node;
}
2026-01-24 23:53:19 +05:30
// ========================================================================
// Mixin/Call Parsing
// ========================================================================
fn parseCall(self: *Parser) !*Node {
const tok = try self.expect(.call);
const node = try self.allocator.create(Node);
node.* = .{
.type = .Mixin,
.name = tok.val.getString(),
.args = tok.args.getString(),
.call = true,
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
};
2026-01-24 23:53:19 +05:30
try self.tag_(node, true);
2026-01-24 23:53:19 +05:30
// If code was added, move it to block
// (simplified - the JS version has special handling for mixin.code)
2026-01-24 23:53:19 +05:30
// If block is empty, set to null (matching JS behavior)
if (node.nodes.items.len == 0) {
// Keep empty block as is - JS sets block to null but we don't have optional block
}
return node;
}
2026-01-24 23:53:19 +05:30
fn parseMixin(self: *Parser) !*Node {
const tok = try self.expect(.mixin);
if (self.peek().type == .indent) {
self.in_mixin += 1;
const node = try self.allocator.create(Node);
node.* = .{
.type = .Mixin,
.name = tok.val.getString(),
.args = tok.args.getString(),
.call = false,
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
};
const block = try self.block_();
for (block.nodes.items) |n| {
try node.addNode(self.allocator, n);
}
block.nodes.deinit(self.allocator);
self.allocator.destroy(block);
self.in_mixin -= 1;
return node;
} else {
self.setError(.MIXIN_WITHOUT_BODY, "Mixin declared without body", tok);
return error.MixinWithoutBody;
}
}
2026-01-24 23:53:19 +05:30
// ========================================================================
// Block (indent/outdent)
// ========================================================================
2026-01-24 23:53:19 +05:30
fn block_(self: *Parser) anyerror!*Node {
const tok = try self.expect(.indent);
var block = try self.emptyBlock(tok.loc.start.line);
2026-01-24 23:53:19 +05:30
while (self.peek().type != .outdent) {
if (self.peek().type == .newline) {
_ = self.advance();
} else if (self.peek().type == .text_html) {
var html_nodes = try self.parseTextHtml();
for (html_nodes.items) |node| {
try block.addNode(self.allocator, node);
}
html_nodes.deinit(self.allocator);
} else {
const expr = try self.parseExpr();
if (expr.type == .Block) {
for (expr.nodes.items) |node| {
try block.addNode(self.allocator, node);
}
expr.nodes.clearAndFree(self.allocator);
self.allocator.destroy(expr);
} else {
try block.addNode(self.allocator, expr);
}
}
}
2026-01-24 23:53:19 +05:30
_ = try self.expect(.outdent);
return block;
}
2026-01-24 23:53:19 +05:30
// ========================================================================
// Interpolation/Tag Parsing
// ========================================================================
fn parseInterpolation(self: *Parser) !*Node {
const tok = self.advance();
const node = try self.allocator.create(Node);
node.* = .{
.type = .InterpolatedTag,
.expr = tok.val.getString(),
.self_closing = false,
.is_inline = false,
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
};
try self.tag_(node, true);
return node;
}
2026-01-24 23:53:19 +05:30
fn parseTag(self: *Parser) !*Node {
const tok = self.advance();
const tag_name = tok.val.getString() orelse "div";
const node = try self.allocator.create(Node);
node.* = .{
.type = .Tag,
.name = tag_name,
.self_closing = false,
.is_inline = isInlineTag(tag_name),
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
};
try self.tag_(node, true);
return node;
}
2026-01-24 23:53:19 +05:30
fn tag_(self: *Parser, tag: *Node, self_closing_allowed: bool) !void {
var seen_attrs = false;
var attribute_names = std.ArrayListUnmanaged([]const u8){};
defer attribute_names.deinit(self.allocator);
// (attrs | class | id)*
outer: while (true) {
switch (self.peek().type) {
.id, .class => {
const tok = self.advance();
if (tok.type == .id) {
// Check for duplicate id
for (attribute_names.items) |name| {
if (mem.eql(u8, name, "id")) {
self.setError(.DUPLICATE_ID, "Duplicate attribute \"id\" is not allowed.", tok);
return error.DuplicateId;
}
}
try attribute_names.append(self.allocator, "id");
}
// Create quoted value
const val_str = tok.val.getString() orelse "";
var quoted_val = std.ArrayListUnmanaged(u8){};
defer quoted_val.deinit(self.allocator);
try quoted_val.append(self.allocator, '\'');
try quoted_val.appendSlice(self.allocator, val_str);
try quoted_val.append(self.allocator, '\'');
const final_val = try self.allocator.dupe(u8, quoted_val.items);
try tag.attrs.append(self.allocator, .{
.name = if (tok.type == .id) "id" else "class",
.val = final_val,
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
.must_escape = false,
.val_owned = true, // We allocated this string
});
},
2026-01-24 23:53:19 +05:30
.start_attributes => {
if (seen_attrs) {
// Warning: multiple attributes - but continue
}
seen_attrs = true;
var new_attrs = try self.attrs(&attribute_names);
for (new_attrs.items) |attr| {
try tag.attrs.append(self.allocator, attr);
}
new_attrs.deinit(self.allocator);
},
2026-01-24 23:53:19 +05:30
.@"&attributes" => {
const tok = self.advance();
try tag.attribute_blocks.append(self.allocator, .{
.val = tok.val.getString() orelse "",
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
});
},
2026-01-24 23:53:19 +05:30
else => break :outer,
}
}
2026-01-24 23:53:19 +05:30
// Check for textOnly (.)
if (self.peek().type == .dot) {
tag.text_only = true;
_ = self.advance();
}
2026-01-24 23:53:19 +05:30
// (text | code | ':')?
switch (self.peek().type) {
.text, .interpolated_code => {
const text = try self.parseText(false);
if (text.type == .Block) {
for (text.nodes.items) |node| {
try tag.addNode(self.allocator, node);
}
text.nodes.deinit(self.allocator);
self.allocator.destroy(text);
} else {
try tag.addNode(self.allocator, text);
}
},
.code => {
const code_node = try self.parseCode(true);
try tag.addNode(self.allocator, code_node);
},
.colon => {
_ = self.advance();
const expr = try self.parseExpr();
if (expr.type == .Block) {
for (expr.nodes.items) |node| {
try tag.addNode(self.allocator, node);
}
expr.nodes.deinit(self.allocator);
self.allocator.destroy(expr);
} else {
try tag.addNode(self.allocator, expr);
}
},
.newline, .indent, .outdent, .eos, .start_pipeless_text, .end_pug_interpolation => {},
.slash => {
if (self_closing_allowed) {
_ = self.advance();
tag.self_closing = true;
} else {
self.setError(.INVALID_TOKEN, "Unexpected token", self.peek());
return error.InvalidToken;
}
},
else => {
// Accept other tokens without error for now
},
}
2026-01-24 23:53:19 +05:30
// newline*
while (self.peek().type == .newline) {
_ = self.advance();
}
2026-01-24 23:53:19 +05:30
// block?
if (tag.text_only) {
if (self.parseTextBlock()) |block| {
for (block.nodes.items) |node| {
try tag.addNode(self.allocator, node);
}
2026-01-24 23:53:19 +05:30
block.nodes.deinit(self.allocator);
self.allocator.destroy(block);
}
} else if (self.peek().type == .indent) {
const block = try self.block_();
for (block.nodes.items) |node| {
try tag.addNode(self.allocator, node);
}
2026-01-24 23:53:19 +05:30
block.nodes.deinit(self.allocator);
self.allocator.destroy(block);
}
}
2026-01-24 23:53:19 +05:30
fn attrs(self: *Parser, attribute_names: ?*std.ArrayListUnmanaged([]const u8)) !std.ArrayListUnmanaged(Attribute) {
_ = try self.expect(.start_attributes);
2026-01-24 23:53:19 +05:30
var result = std.ArrayListUnmanaged(Attribute){};
var tok = self.advance();
2026-01-24 23:53:19 +05:30
while (tok.type == .attribute) {
const attr_name = tok.name.getString() orelse "";
2026-01-24 23:53:19 +05:30
// Check for duplicates (except class)
if (!mem.eql(u8, attr_name, "class")) {
if (attribute_names) |names| {
for (names.items) |name| {
if (mem.eql(u8, name, attr_name)) {
self.setError(.DUPLICATE_ATTRIBUTE, "Duplicate attribute is not allowed.", tok);
return error.DuplicateAttribute;
}
}
try names.append(self.allocator, attr_name);
}
}
2026-01-24 23:53:19 +05:30
try result.append(self.allocator, .{
.name = attr_name,
.val = tok.val.getString(),
.line = tok.loc.start.line,
.column = tok.loc.start.column,
.filename = self.filename,
.must_escape = tok.shouldEscape(),
});
tok = self.advance();
}
2026-01-24 23:53:19 +05:30
try self.defer_token(tok);
_ = try self.expect(.end_attributes);
2026-01-24 23:53:19 +05:30
return result;
}
};
2026-01-24 23:53:19 +05:30
// ============================================================================
// Tests
2026-01-24 23:53:19 +05:30
// ============================================================================
2026-01-24 23:53:19 +05:30
test "parser basic" {
const allocator = std.testing.allocator;
2026-01-24 23:53:19 +05:30
// Simulate tokens for: html\n body\n h1 Title
var tokens = [_]Token{
.{ .type = .tag, .val = .{ .string = "html" }, .loc = .{ .start = .{ .line = 1, .column = 1 } } },
.{ .type = .indent, .val = .{ .string = "2" }, .loc = .{ .start = .{ .line = 2, .column = 1 } } },
.{ .type = .tag, .val = .{ .string = "body" }, .loc = .{ .start = .{ .line = 2, .column = 3 } } },
.{ .type = .indent, .val = .{ .string = "4" }, .loc = .{ .start = .{ .line = 3, .column = 1 } } },
.{ .type = .tag, .val = .{ .string = "h1" }, .loc = .{ .start = .{ .line = 3, .column = 5 } } },
.{ .type = .text, .val = .{ .string = "Title" }, .loc = .{ .start = .{ .line = 3, .column = 8 } } },
.{ .type = .outdent, .loc = .{ .start = .{ .line = 3, .column = 13 } } },
.{ .type = .outdent, .loc = .{ .start = .{ .line = 3, .column = 13 } } },
.{ .type = .eos, .loc = .{ .start = .{ .line = 3, .column = 13 } } },
};
var parser = Parser.init(allocator, &tokens, "test.pug", null);
defer parser.deinit();
const ast = try parser.parse();
defer {
ast.deinit(allocator);
allocator.destroy(ast);
}
2026-01-24 23:53:19 +05:30
try std.testing.expectEqual(NodeType.Block, ast.type);
try std.testing.expectEqual(@as(usize, 1), ast.nodes.items.len);
2026-01-24 23:53:19 +05:30
const html_tag = ast.nodes.items[0];
try std.testing.expectEqual(NodeType.Tag, html_tag.type);
try std.testing.expectEqualStrings("html", html_tag.name.?);
}
2026-01-24 23:53:19 +05:30
test "parser doctype" {
const allocator = std.testing.allocator;
2026-01-24 23:53:19 +05:30
var tokens = [_]Token{
.{ .type = .doctype, .val = .{ .string = "html" }, .loc = .{ .start = .{ .line = 1, .column = 1 } } },
.{ .type = .eos, .loc = .{ .start = .{ .line = 1, .column = 13 } } },
};
2026-01-24 23:53:19 +05:30
var parser = Parser.init(allocator, &tokens, "test.pug", null);
defer parser.deinit();
const ast = try parser.parse();
defer {
ast.deinit(allocator);
allocator.destroy(ast);
}
2026-01-24 23:53:19 +05:30
try std.testing.expectEqual(@as(usize, 1), ast.nodes.items.len);
try std.testing.expectEqual(NodeType.Doctype, ast.nodes.items[0].type);
try std.testing.expectEqualStrings("html", ast.nodes.items[0].val.?);
}