Files
pugz/src/lexer.zig

2702 lines
88 KiB
Zig
Raw Normal View History

const std = @import("std");
2026-01-24 23:53:19 +05:30
const mem = std.mem;
const Allocator = std.mem.Allocator;
2026-01-23 22:08:53 +05:30
2026-01-24 23:53:19 +05:30
// ============================================================================
// Token Types
// ============================================================================
pub const TokenType = enum {
2026-01-24 23:53:19 +05:30
tag,
id,
class,
text,
text_html,
comment,
doctype,
filter,
extends,
include,
path,
block,
mixin_block,
mixin,
call,
yield,
code,
blockcode,
interpolation,
interpolated_code,
@"if",
else_if,
@"else",
case,
when,
default,
each,
each_of,
@"while",
indent,
outdent,
newline,
eos,
dot,
colon,
slash,
start_attributes,
end_attributes,
attribute,
@"&attributes",
start_pug_interpolation,
end_pug_interpolation,
start_pipeless_text,
end_pipeless_text,
};
// ============================================================================
// Token Value - Tagged Union for type-safe token values
// ============================================================================
pub const TokenValue = union(enum) {
none,
string: []const u8,
boolean: bool,
pub fn isNone(self: TokenValue) bool {
return self == .none;
}
pub fn getString(self: TokenValue) ?[]const u8 {
return switch (self) {
.string => |s| s,
else => null,
};
}
pub fn getBool(self: TokenValue) ?bool {
return switch (self) {
.boolean => |b| b,
else => null,
};
}
pub fn fromString(s: []const u8) TokenValue {
return .{ .string = s };
}
pub fn fromBool(b: bool) TokenValue {
return .{ .boolean = b };
}
pub fn format(
self: TokenValue,
comptime fmt: []const u8,
options: std.fmt.FormatOptions,
writer: anytype,
) !void {
_ = fmt;
_ = options;
switch (self) {
.none => try writer.writeAll("none"),
.string => |s| try writer.print("\"{s}\"", .{s}),
.boolean => |b| try writer.print("{}", .{b}),
}
}
};
// ============================================================================
// Location and Token
// ============================================================================
pub const Location = struct {
line: usize,
column: usize,
};
pub const TokenLoc = struct {
start: Location,
end: ?Location = null,
filename: ?[]const u8 = null,
};
pub const Token = struct {
type: TokenType,
2026-01-24 23:53:19 +05:30
val: TokenValue = .none,
loc: TokenLoc,
// Additional fields for specific token types
buffer: TokenValue = .none, // boolean for comment/code tokens
must_escape: TokenValue = .none, // boolean for code/attribute tokens
mode: TokenValue = .none, // string: "prepend", "append", "replace" for block
args: TokenValue = .none, // string for mixin/call
key: TokenValue = .none, // string for each
code: TokenValue = .none, // string for each/eachOf
name: TokenValue = .none, // string for attribute
/// Helper to get val as string
pub fn getVal(self: Token) ?[]const u8 {
return self.val.getString();
}
/// Helper to check if buffer is true
pub fn isBuffered(self: Token) bool {
return self.buffer.getBool() orelse false;
}
/// Helper to check if must_escape is true
pub fn shouldEscape(self: Token) bool {
return self.must_escape.getBool() orelse true;
}
/// Helper to get mode as string
pub fn getMode(self: Token) ?[]const u8 {
return self.mode.getString();
}
/// Helper to get args as string
pub fn getArgs(self: Token) ?[]const u8 {
return self.args.getString();
}
/// Helper to get key as string
pub fn getKey(self: Token) ?[]const u8 {
return self.key.getString();
}
/// Helper to get code as string
pub fn getCode(self: Token) ?[]const u8 {
return self.code.getString();
}
/// Helper to get attribute name as string
pub fn getName(self: Token) ?[]const u8 {
return self.name.getString();
}
};
// ============================================================================
// Character Parser State (simplified) - Zig 0.15 style with ArrayListUnmanaged
// ============================================================================
const BracketType = enum { paren, brace, bracket };
const CharParserState = struct {
nesting_stack: std.ArrayListUnmanaged(BracketType) = .{},
in_string: bool = false,
string_char: ?u8 = null,
in_template: bool = false,
escape_next: bool = false,
pub fn deinit(self: *CharParserState, allocator: Allocator) void {
self.nesting_stack.deinit(allocator);
}
pub fn isNesting(self: *const CharParserState) bool {
return self.nesting_stack.items.len > 0;
}
pub fn isString(self: *const CharParserState) bool {
return self.in_string or self.in_template;
}
pub fn getStringChar(self: *const CharParserState) ?u8 {
if (self.in_string) return self.string_char;
if (self.in_template) return '`';
return null;
}
pub fn parseChar(self: *CharParserState, allocator: Allocator, char: u8) !void {
if (self.escape_next) {
self.escape_next = false;
return;
}
if (char == '\\') {
self.escape_next = true;
return;
}
if (self.in_string) {
if (char == self.string_char.?) {
self.in_string = false;
self.string_char = null;
}
return;
}
if (self.in_template) {
if (char == '`') {
self.in_template = false;
}
return;
}
switch (char) {
'"', '\'' => {
self.in_string = true;
self.string_char = char;
},
'`' => {
self.in_template = true;
},
'(' => try self.nesting_stack.append(allocator, .paren),
'{' => try self.nesting_stack.append(allocator, .brace),
'[' => try self.nesting_stack.append(allocator, .bracket),
')' => {
if (self.nesting_stack.items.len > 0 and
self.nesting_stack.items[self.nesting_stack.items.len - 1] == .paren)
{
_ = self.nesting_stack.pop();
}
},
'}' => {
if (self.nesting_stack.items.len > 0 and
self.nesting_stack.items[self.nesting_stack.items.len - 1] == .brace)
{
_ = self.nesting_stack.pop();
}
},
']' => {
if (self.nesting_stack.items.len > 0 and
self.nesting_stack.items[self.nesting_stack.items.len - 1] == .bracket)
{
_ = self.nesting_stack.pop();
}
},
else => {},
}
}
};
// ============================================================================
// Lexer Error
// ============================================================================
pub const LexerErrorCode = enum {
ASSERT_FAILED,
SYNTAX_ERROR,
INCORRECT_NESTING,
NO_END_BRACKET,
BRACKET_MISMATCH,
INVALID_ID,
INVALID_CLASS_NAME,
NO_EXTENDS_PATH,
MALFORMED_EXTENDS,
NO_INCLUDE_PATH,
MALFORMED_INCLUDE,
NO_CASE_EXPRESSION,
NO_WHEN_EXPRESSION,
DEFAULT_WITH_EXPRESSION,
NO_WHILE_EXPRESSION,
MALFORMED_EACH,
MALFORMED_EACH_OF_LVAL,
INVALID_INDENTATION,
INCONSISTENT_INDENTATION,
UNEXPECTED_TEXT,
INVALID_KEY_CHARACTER,
ELSE_CONDITION,
};
pub const LexerError = struct {
code: LexerErrorCode,
message: []const u8,
line: usize,
column: usize,
2026-01-24 23:53:19 +05:30
filename: ?[]const u8,
};
2026-01-24 23:53:19 +05:30
// ============================================================================
// BracketExpression Result
// ============================================================================
const BracketExpressionResult = struct {
src: []const u8,
end: usize,
};
2026-01-24 23:53:19 +05:30
// ============================================================================
// Lexer - Zig 0.15 style with ArrayListUnmanaged
// ============================================================================
pub const Lexer = struct {
2026-01-24 23:53:19 +05:30
allocator: Allocator,
input: []const u8,
input_allocated: []const u8, // Keep reference to allocated memory for cleanup
original_input: []const u8,
filename: ?[]const u8,
interpolated: bool,
lineno: usize,
colno: usize,
indent_stack: std.ArrayListUnmanaged(usize) = .{},
indent_re_type: ?IndentType = null,
interpolation_allowed: bool,
tokens: std.ArrayListUnmanaged(Token) = .{},
ended: bool,
last_error: ?LexerError = null,
const IndentType = enum { tabs, spaces };
pub fn init(allocator: Allocator, str: []const u8, options: LexerOptions) !Lexer {
// Strip UTF-8 BOM if present
var input = str;
if (input.len >= 3 and input[0] == 0xEF and input[1] == 0xBB and input[2] == 0xBF) {
input = input[3..];
}
// Normalize line endings
var normalized: std.ArrayListUnmanaged(u8) = .{};
errdefer normalized.deinit(allocator);
var i: usize = 0;
while (i < input.len) {
if (input[i] == '\r') {
if (i + 1 < input.len and input[i + 1] == '\n') {
try normalized.append(allocator, '\n');
i += 2;
} else {
try normalized.append(allocator, '\n');
i += 1;
}
} else {
try normalized.append(allocator, input[i]);
i += 1;
}
}
var indent_stack: std.ArrayListUnmanaged(usize) = .{};
try indent_stack.append(allocator, 0);
const input_slice = try normalized.toOwnedSlice(allocator);
return Lexer{
.allocator = allocator,
2026-01-24 23:53:19 +05:30
.input = input_slice,
.input_allocated = input_slice,
.original_input = str,
.filename = options.filename,
.interpolated = options.interpolated,
.lineno = options.starting_line,
.colno = options.starting_column,
.indent_stack = indent_stack,
.interpolation_allowed = true,
.tokens = .{},
.ended = false,
};
}
2026-01-24 23:53:19 +05:30
pub fn deinit(self: *Lexer) void {
self.indent_stack.deinit(self.allocator);
self.tokens.deinit(self.allocator);
if (self.input_allocated.len > 0) {
self.allocator.free(self.input_allocated);
}
2026-01-23 22:08:53 +05:30
}
2026-01-24 23:53:19 +05:30
// ========================================================================
// Error handling
// ========================================================================
fn setError(self: *Lexer, err_code: LexerErrorCode, message: []const u8) void {
self.last_error = LexerError{
.code = err_code,
.message = message,
.line = self.lineno,
.column = self.colno,
.filename = self.filename,
};
2026-01-23 22:08:53 +05:30
}
2026-01-24 23:53:19 +05:30
/// Set error and return false - common pattern for scan functions
fn failWith(self: *Lexer, err_code: LexerErrorCode, message: []const u8) bool {
self.setError(err_code, message);
return false;
}
2026-01-24 23:53:19 +05:30
/// Set error and return LexerError - for functions with error unions
fn failWithError(self: *Lexer, err_code: LexerErrorCode, message: []const u8) error{LexerError} {
self.setError(err_code, message);
return error.LexerError;
}
2026-01-24 23:53:19 +05:30
// ========================================================================
// Token creation
// ========================================================================
2026-01-24 23:53:19 +05:30
fn tok(self: *Lexer, token_type: TokenType, val: TokenValue) Token {
return Token{
.type = token_type,
.val = val,
.loc = TokenLoc{
.start = Location{
.line = self.lineno,
.column = self.colno,
},
.filename = self.filename,
},
};
}
2026-01-24 23:53:19 +05:30
fn tokWithString(self: *Lexer, token_type: TokenType, val: ?[]const u8) Token {
return self.tok(token_type, if (val) |v| TokenValue.fromString(v) else .none);
}
fn tokEnd(self: *Lexer, token: *Token) void {
token.loc.end = Location{
.line = self.lineno,
.column = self.colno,
};
}
/// Helper to emit a token with common boilerplate:
/// 1. Creates token with type and string value
/// 2. Appends to tokens list
/// 3. Increments column by specified amount
/// 4. Sets token end location
/// Returns false on allocation failure.
fn emitToken(self: *Lexer, token_type: TokenType, val: ?[]const u8, col_increment: usize) bool {
var token = self.tokWithString(token_type, val);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(col_increment);
self.tokEnd(&token);
return true;
}
/// Helper to emit a token with a TokenValue (for non-string values)
fn emitTokenVal(self: *Lexer, token_type: TokenType, val: TokenValue, col_increment: usize) bool {
var token = self.tok(token_type, val);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(col_increment);
self.tokEnd(&token);
return true;
}
// ========================================================================
// Position tracking
// ========================================================================
2026-01-24 23:53:19 +05:30
fn incrementLine(self: *Lexer, increment: usize) void {
self.lineno += increment;
if (increment > 0) {
self.colno = 1;
}
2026-01-24 23:53:19 +05:30
}
2026-01-24 23:53:19 +05:30
fn incrementColumn(self: *Lexer, increment: usize) void {
self.colno += increment;
}
2026-01-24 23:53:19 +05:30
fn consume(self: *Lexer, len: usize) void {
self.input = self.input[len..];
}
2026-01-24 23:53:19 +05:30
// ========================================================================
// Scanning helpers
// ========================================================================
fn isWhitespace(char: u8) bool {
return char == ' ' or char == '\n' or char == '\t';
}
2026-01-24 23:53:19 +05:30
// ========================================================================
// Bracket expression parsing
// ========================================================================
fn bracketExpression(self: *Lexer, skip: usize) !BracketExpressionResult {
if (skip >= self.input.len) {
return self.failWithError(.NO_END_BRACKET, "Empty input for bracket expression");
}
const start_char = self.input[skip];
const end_char: u8 = switch (start_char) {
'(' => ')',
'{' => '}',
'[' => ']',
else => {
return self.failWithError(.ASSERT_FAILED, "The start character should be '(', '{' or '['");
},
};
var state: CharParserState = .{};
defer state.deinit(self.allocator);
var i = skip + 1;
// Use fixed-size stack buffer for bracket tracking (avoids allocations)
// 256 levels of nesting should be more than enough for any real code
var bracket_stack: [256]u8 = undefined;
var bracket_depth: usize = 1;
bracket_stack[0] = start_char;
while (i < self.input.len) {
const char = self.input[i];
try state.parseChar(self.allocator, char);
if (!state.isString()) {
// Check for opening brackets
if (char == '(' or char == '[' or char == '{') {
if (bracket_depth >= bracket_stack.len) {
return self.failWithError(.BRACKET_MISMATCH, "Bracket nesting too deep (max 256 levels)");
}
2026-01-24 23:53:19 +05:30
bracket_stack[bracket_depth] = char;
bracket_depth += 1;
}
// Check for closing brackets
else if (char == ')' or char == ']' or char == '}') {
// Check for bracket type mismatch
if (bracket_depth > 0) {
const last_open = bracket_stack[bracket_depth - 1];
const expected_close: u8 = switch (last_open) {
'(' => ')',
'[' => ']',
'{' => '}',
else => 0,
};
if (char != expected_close) {
return self.failWithError(.BRACKET_MISMATCH, "Mismatched bracket - expected different closing bracket");
}
2026-01-24 23:53:19 +05:30
bracket_depth -= 1;
}
2026-01-24 23:53:19 +05:30
if (char == end_char and bracket_depth == 0) {
return BracketExpressionResult{
.src = self.input[skip + 1 .. i],
.end = i,
};
}
}
}
2026-01-24 23:53:19 +05:30
i += 1;
}
2026-01-24 23:53:19 +05:30
return self.failWithError(.NO_END_BRACKET, "The end of the string reached with no closing bracket found.");
}
2026-01-24 23:53:19 +05:30
// ========================================================================
// Indentation scanning
// ========================================================================
2026-01-24 23:53:19 +05:30
fn scanIndentation(self: *Lexer) ?struct { indent: []const u8, total_len: usize } {
if (self.input.len == 0 or self.input[0] != '\n') {
return null;
}
2026-01-24 23:53:19 +05:30
const indent_start: usize = 1;
// Single-pass: detect indent type from first whitespace character
if (indent_start >= self.input.len) {
return .{ .indent = "", .total_len = 1 };
}
2026-01-24 23:53:19 +05:30
const first_char = self.input[indent_start];
// Determine indent type from first character (or use existing type)
if (first_char == '\t') {
// Tab-based indentation
if (self.indent_re_type == .spaces) {
// Already using spaces, but found tab - scan tabs then trailing spaces
var i = indent_start;
while (i < self.input.len and self.input[i] == '\t') : (i += 1) {}
const tab_end = i;
// Skip trailing spaces after tabs
while (i < self.input.len and self.input[i] == ' ') : (i += 1) {}
return .{ .indent = self.input[indent_start..tab_end], .total_len = i };
}
2026-01-24 23:53:19 +05:30
// Using tabs or undetermined
self.indent_re_type = .tabs;
var i = indent_start;
while (i < self.input.len and self.input[i] == '\t') : (i += 1) {}
const tab_end = i;
// Skip trailing spaces after tabs
while (i < self.input.len and self.input[i] == ' ') : (i += 1) {}
return .{ .indent = self.input[indent_start..tab_end], .total_len = i };
} else if (first_char == ' ') {
// Space-based indentation
self.indent_re_type = .spaces;
var i = indent_start;
while (i < self.input.len and self.input[i] == ' ') : (i += 1) {}
return .{ .indent = self.input[indent_start..i], .total_len = i };
}
// Just a newline with no indentation
return .{ .indent = "", .total_len = 1 };
}
2026-01-24 23:53:19 +05:30
// ========================================================================
// Token parsing methods
// ========================================================================
fn eos(self: *Lexer) bool {
if (self.input.len > 0) return false;
if (self.interpolated) {
self.setError(.NO_END_BRACKET, "End of line was reached with no closing bracket for interpolation.");
return false;
}
2026-01-24 23:53:19 +05:30
// Add outdent tokens for remaining indentation
var i: usize = 0;
while (i < self.indent_stack.items.len and self.indent_stack.items[i] > 0) : (i += 1) {
var outdent_tok = self.tok(.outdent, .none);
self.tokEnd(&outdent_tok);
self.tokens.append(self.allocator, outdent_tok) catch return false;
}
2026-01-24 23:53:19 +05:30
var eos_tok = self.tok(.eos, .none);
self.tokEnd(&eos_tok);
self.tokens.append(self.allocator, eos_tok) catch return false;
self.ended = true;
return true;
}
fn blank(self: *Lexer) bool {
// Match /^\n[ \t]*\n/
if (self.input.len < 2 or self.input[0] != '\n') return false;
var i: usize = 1;
while (i < self.input.len and (self.input[i] == ' ' or self.input[i] == '\t')) {
i += 1;
}
2026-01-24 23:53:19 +05:30
if (i < self.input.len and self.input[i] == '\n') {
self.consume(i); // Don't consume the second newline
self.incrementLine(1);
return true;
}
2026-01-24 23:53:19 +05:30
return false;
}
fn comment(self: *Lexer) bool {
// Match /^\/\/(-)?([^\n]*)/
if (self.input.len < 2 or self.input[0] != '/' or self.input[1] != '/') {
return false;
}
2026-01-24 23:53:19 +05:30
var i: usize = 2;
var buffer = true;
if (i < self.input.len and self.input[i] == '-') {
buffer = false;
i += 1;
}
2026-01-24 23:53:19 +05:30
const comment_start = i;
while (i < self.input.len and self.input[i] != '\n') {
i += 1;
}
2026-01-24 23:53:19 +05:30
const comment_text = self.input[comment_start..i];
self.consume(i);
var token = self.tokWithString(.comment, comment_text);
token.buffer = TokenValue.fromBool(buffer);
self.interpolation_allowed = buffer;
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(i);
self.tokEnd(&token);
_ = self.pipelessText(null);
return true;
}
fn interpolation(self: *Lexer) bool {
// Match /^#\{/
if (self.input.len < 2 or self.input[0] != '#' or self.input[1] != '{') {
return false;
}
2026-01-24 23:53:19 +05:30
const match = self.bracketExpression(1) catch return false;
self.consume(match.end + 1);
var token = self.tokWithString(.interpolation, match.src);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(2); // '#{'
// Count newlines in expression
var lines: usize = 0;
var last_line_len: usize = 0;
for (match.src) |c| {
if (c == '\n') {
lines += 1;
last_line_len = 0;
} else {
last_line_len += 1;
}
}
2026-01-24 23:53:19 +05:30
self.incrementLine(lines);
self.incrementColumn(last_line_len + 1); // + 1 for '}'
self.tokEnd(&token);
return true;
}
fn tag(self: *Lexer) bool {
// Match /^(\w(?:[-:\w]*\w)?)/
if (self.input.len == 0) return false;
const first = self.input[0];
if (!isWordChar(first)) return false;
var end: usize = 1;
while (end < self.input.len) {
const c = self.input[end];
if (isWordChar(c) or c == '-' or c == ':') {
end += 1;
} else {
break;
}
}
2026-01-24 23:53:19 +05:30
// Ensure it doesn't end with - or :
while (end > 1 and (self.input[end - 1] == '-' or self.input[end - 1] == ':')) {
end -= 1;
}
2026-01-24 23:53:19 +05:30
if (end == 0) return false;
const name = self.input[0..end];
self.consume(end);
var token = self.tokWithString(.tag, name);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(end);
self.tokEnd(&token);
return true;
}
fn isWordChar(c: u8) bool {
return (c >= 'a' and c <= 'z') or
(c >= 'A' and c <= 'Z') or
(c >= '0' and c <= '9') or
c == '_';
}
fn filter(self: *Lexer, in_include: bool) bool {
// Match /^:([\w\-]+)/
if (self.input.len < 2 or self.input[0] != ':') return false;
var end: usize = 1;
while (end < self.input.len) {
const c = self.input[end];
if (isWordChar(c) or c == '-') {
end += 1;
} else {
break;
}
}
2026-01-24 23:53:19 +05:30
if (end == 1) return false;
const filter_name = self.input[1..end];
self.consume(end);
var token = self.tokWithString(.filter, filter_name);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(filter_name.len);
self.tokEnd(&token);
_ = self.attrs();
if (!in_include) {
self.interpolation_allowed = false;
_ = self.pipelessText(null);
}
2026-01-24 23:53:19 +05:30
return true;
}
2026-01-24 23:53:19 +05:30
fn doctype(self: *Lexer) bool {
// Match /^doctype *([^\n]*)/
const prefix = "doctype";
if (!mem.startsWith(u8, self.input, prefix)) return false;
var i = prefix.len;
// Skip spaces
while (i < self.input.len and self.input[i] == ' ') {
i += 1;
}
2026-01-24 23:53:19 +05:30
// Find end of line
var end = i;
while (end < self.input.len and self.input[end] != '\n') {
end += 1;
}
2026-01-24 23:53:19 +05:30
const doctype_val = self.input[i..end];
self.consume(end);
var token = self.tokWithString(.doctype, if (doctype_val.len > 0) doctype_val else null);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(end);
self.tokEnd(&token);
return true;
}
fn id(self: *Lexer) bool {
// Match /^#([\w-]+)/
if (self.input.len < 2 or self.input[0] != '#') return false;
// Check it's not #{
if (self.input[1] == '{') return false;
var end: usize = 1;
while (end < self.input.len) {
const c = self.input[end];
if (isWordChar(c) or c == '-') {
end += 1;
} else {
break;
}
}
if (end == 1) {
self.setError(.INVALID_ID, "Invalid ID");
return false;
}
2026-01-24 23:53:19 +05:30
const id_val = self.input[1..end];
self.consume(end);
var token = self.tokWithString(.id, id_val);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(id_val.len);
self.tokEnd(&token);
return true;
}
2026-01-24 23:53:19 +05:30
fn className(self: *Lexer) bool {
// Match /^\.([_a-z0-9\-]*[_a-z][_a-z0-9\-]*)/i
if (self.input.len < 2 or self.input[0] != '.') return false;
var end: usize = 1;
var has_letter = false;
2026-01-24 23:53:19 +05:30
while (end < self.input.len) {
const c = self.input[end];
if (isWordChar(c) or c == '-') {
if ((c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '_') {
has_letter = true;
}
end += 1;
} else {
break;
}
}
2026-01-24 23:53:19 +05:30
if (end == 1 or !has_letter) {
if (end > 1) {
self.setError(.INVALID_CLASS_NAME, "Class names must contain at least one letter or underscore.");
}
return false;
}
const class_name = self.input[1..end];
self.consume(end);
var token = self.tokWithString(.class, class_name);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(class_name.len);
self.tokEnd(&token);
return true;
}
2026-01-24 23:53:19 +05:30
fn endInterpolation(self: *Lexer) bool {
if (self.interpolated and self.input.len > 0 and self.input[0] == ']') {
self.consume(1);
self.ended = true;
return true;
}
return false;
}
2026-01-24 23:53:19 +05:30
fn text(self: *Lexer) bool {
// Match /^(?:\| ?| )([^\n]+)/ or /^( )/ or /^\|( ?)/
// This handles:
// 1. "| text" - piped text
// 2. " text" - inline text after tag (space followed by text)
// 3. "|" or "| " - empty pipe
if (self.input.len == 0) return false;
// Case 1: Pipe syntax "| text" or "|"
if (self.input[0] == '|') {
var i: usize = 1;
// Skip optional space after |
if (i < self.input.len and self.input[i] == ' ') {
i += 1;
}
// Find end of line
var end = i;
while (end < self.input.len and self.input[end] != '\n') {
end += 1;
}
const text_val = self.input[i..end];
self.consume(end);
self.addText(.text, text_val, "", 0);
return true;
}
2026-01-24 23:53:19 +05:30
// Case 2: Inline text after tag " text" (space followed by content)
if (self.input[0] == ' ') {
// Find end of potential text (until newline)
var end: usize = 1;
while (end < self.input.len and self.input[end] != '\n') {
end += 1;
}
2026-01-24 23:53:19 +05:30
// Check what's in the rest of the line after the space
const rest = self.input[1..end];
2026-01-24 23:53:19 +05:30
// If it's only whitespace, don't treat as text (let indent handle newlines)
var all_whitespace = true;
for (rest) |c| {
if (c != ' ' and c != '\t') {
all_whitespace = false;
break;
}
}
2026-01-24 23:53:19 +05:30
if (all_whitespace) {
// Only whitespace until newline - consume it but don't create text token
self.consume(end);
self.incrementColumn(end);
return true;
}
// Check if it's just " /" pattern (self-closing tag with space)
var trimmed_start: usize = 0;
while (trimmed_start < rest.len and rest[trimmed_start] == ' ') {
trimmed_start += 1;
}
if (trimmed_start < rest.len and rest[trimmed_start] == '/' and
(trimmed_start + 1 >= rest.len or rest[trimmed_start + 1] == ' ' or rest[trimmed_start + 1] == '\n'))
{
// This is "tag /" pattern - consume spaces, let slash handler deal with /
self.consume(1 + trimmed_start);
self.incrementColumn(1 + trimmed_start);
return true;
}
const text_val = self.input[1..end];
self.consume(end);
self.addText(.text, text_val, "", 0);
return true;
}
2026-01-24 23:53:19 +05:30
return false;
}
2026-01-24 23:53:19 +05:30
fn textHtml(self: *Lexer) bool {
// Match /^(<[^\n]*)/
if (self.input.len == 0 or self.input[0] != '<') return false;
2026-01-24 23:53:19 +05:30
var end: usize = 1;
while (end < self.input.len and self.input[end] != '\n') {
end += 1;
}
2026-01-24 23:53:19 +05:30
const html_val = self.input[0..end];
self.consume(end);
self.addText(.text_html, html_val, "", 0);
return true;
}
fn dot(self: *Lexer) bool {
// Match /^\./
if (self.input.len == 0 or self.input[0] != '.') return false;
// Check if it's followed by end of line or colon
if (self.input.len == 1 or self.input[1] == '\n' or self.input[1] == ':') {
self.consume(1);
var token = self.tok(.dot, .none);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(1);
self.tokEnd(&token);
_ = self.pipelessText(null);
return true;
}
2026-01-24 23:53:19 +05:30
return false;
}
2026-01-24 23:53:19 +05:30
fn extendsToken(self: *Lexer) bool {
// Match /^extends?(?= |$|\n)/
if (mem.startsWith(u8, self.input, "extends")) {
const after = if (self.input.len > 7) self.input[7] else 0;
if (after == 0 or after == ' ' or after == '\n') {
self.consume(7);
var token = self.tok(.extends, .none);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(7);
self.tokEnd(&token);
if (!self.path()) {
self.setError(.NO_EXTENDS_PATH, "missing path for extends");
return true;
}
return true;
}
// "extends" followed by something else (like "(") - malformed
if (after != 0) {
self.setError(.MALFORMED_EXTENDS, "malformed extends");
return true;
}
} else if (mem.startsWith(u8, self.input, "extend")) {
const after = if (self.input.len > 6) self.input[6] else 0;
if (after == 0 or after == ' ' or after == '\n') {
self.consume(6);
var token = self.tok(.extends, .none);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(6);
self.tokEnd(&token);
if (!self.path()) {
self.setError(.NO_EXTENDS_PATH, "missing path for extends");
return true;
}
return true;
}
// "extend" followed by something else (like "(") - malformed
if (after != 0 and after != 's') {
self.setError(.MALFORMED_EXTENDS, "malformed extends");
return true;
}
}
return false;
}
fn prepend(self: *Lexer) bool {
return self.blockHelper("prepend", .prepend);
}
fn append(self: *Lexer) bool {
return self.blockHelper("append", .append);
}
fn blockToken(self: *Lexer) bool {
return self.blockHelper("block", .replace);
}
2026-01-24 23:53:19 +05:30
const BlockMode = enum { prepend, append, replace };
fn blockHelper(self: *Lexer, keyword: []const u8, mode: BlockMode) bool {
const full_prefix = switch (mode) {
.prepend => "prepend ",
.append => "append ",
.replace => "block ",
};
const block_prefix = switch (mode) {
.prepend => "block prepend ",
.append => "block append ",
.replace => "block ",
};
var name_start: usize = 0;
if (mem.startsWith(u8, self.input, block_prefix)) {
name_start = block_prefix.len;
} else if (mem.startsWith(u8, self.input, full_prefix)) {
name_start = full_prefix.len;
} else {
_ = keyword;
return false;
}
2026-01-24 23:53:19 +05:30
// Find end of line
var end = name_start;
while (end < self.input.len and self.input[end] != '\n') {
end += 1;
}
2026-01-24 23:53:19 +05:30
// Extract name (trim and handle comments)
var name_end = end;
// Check for comment
var i = name_start;
while (i < end) {
if (i + 1 < end and self.input[i] == '/' and self.input[i + 1] == '/') {
name_end = i;
break;
}
i += 1;
}
// Trim whitespace
while (name_end > name_start and isWhitespace(self.input[name_end - 1])) {
name_end -= 1;
}
if (name_end <= name_start) return false;
const name = self.input[name_start..name_end];
self.consume(end);
var token = self.tokWithString(.block, name);
token.mode = TokenValue.fromString(switch (mode) {
.prepend => "prepend",
.append => "append",
.replace => "replace",
});
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(end);
self.tokEnd(&token);
return true;
}
2026-01-24 23:53:19 +05:30
fn mixinBlock(self: *Lexer) bool {
if (!mem.startsWith(u8, self.input, "block")) return false;
2026-01-24 23:53:19 +05:30
// Check if followed by end of line, colon, or only whitespace until newline
var consume_len: usize = 5;
var is_mixin_block = false;
if (self.input.len == 5 or self.input[5] == '\n' or self.input[5] == ':') {
is_mixin_block = true;
} else if (self.input[5] == ' ' or self.input[5] == '\t') {
// Check if only whitespace until newline
var i: usize = 5;
while (i < self.input.len and (self.input[i] == ' ' or self.input[i] == '\t')) {
i += 1;
}
if (i >= self.input.len or self.input[i] == '\n') {
is_mixin_block = true;
consume_len = i;
}
}
2026-01-24 23:53:19 +05:30
if (is_mixin_block) {
self.consume(consume_len);
var token = self.tok(.mixin_block, .none);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(consume_len);
self.tokEnd(&token);
return true;
}
return false;
}
fn yieldToken(self: *Lexer) bool {
if (!mem.startsWith(u8, self.input, "yield")) return false;
if (self.input.len == 5 or self.input[5] == '\n' or self.input[5] == ':') {
self.consume(5);
var token = self.tok(.yield, .none);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(5);
self.tokEnd(&token);
return true;
}
return false;
}
fn includeToken(self: *Lexer) bool {
if (!mem.startsWith(u8, self.input, "include")) return false;
const after = if (self.input.len > 7) self.input[7] else 0;
if (after != 0 and after != ' ' and after != ':' and after != '\n') {
// "include" followed by something else (like "(") - malformed
self.setError(.MALFORMED_INCLUDE, "malformed include");
return true;
}
self.consume(7);
var token = self.tok(.include, .none);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(7);
self.tokEnd(&token);
// Parse filters
while (self.filter(true)) {}
if (!self.path()) {
self.setError(.NO_INCLUDE_PATH, "missing path for include");
return true;
}
return true;
}
fn path(self: *Lexer) bool {
// Match /^ ([^\n]+)/
if (self.input.len == 0 or self.input[0] != ' ') return false;
var i: usize = 1;
// Skip leading spaces
while (i < self.input.len and self.input[i] == ' ') {
i += 1;
}
var end = i;
while (end < self.input.len and self.input[end] != '\n') {
end += 1;
}
// Trim trailing spaces
var path_end = end;
while (path_end > i and self.input[path_end - 1] == ' ') {
path_end -= 1;
}
if (path_end <= i) return false;
const path_val = self.input[i..path_end];
self.consume(end);
2026-01-24 23:53:19 +05:30
var token = self.tokWithString(.path, path_val);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(end);
self.tokEnd(&token);
return true;
}
2026-01-24 23:53:19 +05:30
fn caseToken(self: *Lexer) bool {
// Match /^case +([^\n]+)/
if (!mem.startsWith(u8, self.input, "case")) return false;
// Check if followed by word boundary
if (self.input.len > 4 and self.input[4] != ' ' and self.input[4] != '\n') {
return false;
}
// Check for "case" without expression
if (self.input.len == 4 or self.input[4] == '\n') {
self.consume(4);
self.incrementColumn(4);
self.setError(.NO_CASE_EXPRESSION, "missing expression for case");
return false;
}
var i: usize = 5;
while (i < self.input.len and self.input[i] == ' ') {
i += 1;
}
// If only spaces after "case", that's also an error
if (i >= self.input.len or self.input[i] == '\n') {
self.consume(i);
self.incrementColumn(i);
self.setError(.NO_CASE_EXPRESSION, "missing expression for case");
return false;
}
var end = i;
while (end < self.input.len and self.input[end] != '\n') {
end += 1;
}
if (end <= i) {
self.setError(.NO_CASE_EXPRESSION, "missing expression for case");
return false;
}
const expr = self.input[i..end];
// Validate brackets are balanced in the expression
if (!self.validateExpressionBrackets(expr)) {
self.consume(end);
self.incrementColumn(end);
return true; // Error already set
}
self.consume(end);
2026-01-24 23:53:19 +05:30
var token = self.tokWithString(.case, expr);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(end);
self.tokEnd(&token);
return true;
}
/// Validates that brackets in an expression are balanced
fn validateExpressionBrackets(self: *Lexer, expr: []const u8) bool {
var bracket_stack = std.ArrayListUnmanaged(u8){};
defer bracket_stack.deinit(self.allocator);
var in_string: u8 = 0;
var i: usize = 0;
while (i < expr.len) {
const c = expr[i];
if (in_string != 0) {
if (c == in_string and (i == 0 or expr[i - 1] != '\\')) {
in_string = 0;
}
} else {
2026-01-24 23:53:19 +05:30
if (c == '"' or c == '\'' or c == '`') {
in_string = c;
} else if (c == '(' or c == '[' or c == '{') {
bracket_stack.append(self.allocator, c) catch return false;
} else if (c == ')' or c == ']' or c == '}') {
if (bracket_stack.items.len == 0) {
self.setError(.BRACKET_MISMATCH, "Unexpected closing bracket in expression");
return false;
}
const last_open = bracket_stack.items[bracket_stack.items.len - 1];
const expected_close: u8 = switch (last_open) {
'(' => ')',
'[' => ']',
'{' => '}',
else => 0,
};
if (c != expected_close) {
self.setError(.BRACKET_MISMATCH, "Mismatched bracket in expression");
return false;
}
_ = bracket_stack.pop();
}
}
2026-01-24 23:53:19 +05:30
i += 1;
}
2026-01-24 23:53:19 +05:30
if (bracket_stack.items.len > 0) {
self.setError(.NO_END_BRACKET, "Unclosed bracket in expression");
return false;
}
2026-01-24 23:53:19 +05:30
return true;
}
2026-01-24 23:53:19 +05:30
fn when(self: *Lexer) bool {
// Match /^when +([^:\n]+)/ but handle colons inside strings
if (!mem.startsWith(u8, self.input, "when")) return false;
2026-01-24 23:53:19 +05:30
// Check if followed by word boundary (space, newline, or end)
if (self.input.len > 4 and self.input[4] != ' ' and self.input[4] != '\n') {
return false;
}
2026-01-24 23:53:19 +05:30
// Check for "when" without expression (just "when" or "when\n")
if (self.input.len == 4 or self.input[4] == '\n') {
self.consume(4);
self.incrementColumn(4);
self.setError(.NO_WHEN_EXPRESSION, "missing expression for when");
return false;
}
var i: usize = 5;
while (i < self.input.len and self.input[i] == ' ') {
i += 1;
}
// If only spaces after "when", that's also an error
if (i >= self.input.len or self.input[i] == '\n') {
self.consume(i);
self.incrementColumn(i);
self.setError(.NO_WHEN_EXPRESSION, "missing expression for when");
return false;
}
// Parse until colon or newline, but handle strings properly
var end = i;
var in_string = false;
var string_char: u8 = 0;
var escape_next = false;
var brace_depth: usize = 0;
while (end < self.input.len and self.input[end] != '\n') {
const c = self.input[end];
if (escape_next) {
escape_next = false;
end += 1;
continue;
}
2026-01-24 23:53:19 +05:30
if (c == '\\') {
escape_next = true;
end += 1;
continue;
}
2026-01-24 23:53:19 +05:30
if (in_string) {
if (c == string_char) {
in_string = false;
}
2026-01-24 23:53:19 +05:30
end += 1;
continue;
}
// Not in string
if (c == '\'' or c == '"' or c == '`') {
in_string = true;
string_char = c;
end += 1;
continue;
}
// Track braces for object literals like {tim: 'g'}
if (c == '{') {
brace_depth += 1;
end += 1;
continue;
}
if (c == '}') {
if (brace_depth > 0) brace_depth -= 1;
end += 1;
continue;
}
// Colon outside string and outside braces ends the expression
if (c == ':' and brace_depth == 0) {
break;
}
end += 1;
}
if (end <= i) {
self.setError(.NO_WHEN_EXPRESSION, "missing expression for when");
return false;
}
const expr = self.input[i..end];
self.consume(end);
var token = self.tokWithString(.when, expr);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(end);
self.tokEnd(&token);
return true;
}
fn defaultToken(self: *Lexer) bool {
if (!mem.startsWith(u8, self.input, "default")) return false;
if (self.input.len == 7 or self.input[7] == '\n' or self.input[7] == ':') {
self.consume(7);
var token = self.tok(.default, .none);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(7);
self.tokEnd(&token);
return true;
}
// Check if "default" is followed by something other than whitespace/newline/colon
// "default foo" should error
if (self.input[7] == ' ') {
// Skip spaces and check if there's content after
var i: usize = 8;
while (i < self.input.len and self.input[i] == ' ') {
i += 1;
}
if (i < self.input.len and self.input[i] != '\n' and self.input[i] != ':') {
self.consume(i);
self.incrementColumn(i);
self.setError(.DEFAULT_WITH_EXPRESSION, "`default` cannot have an expression");
return true; // Return true to stop advance chain, error is set
}
// Just spaces then newline/colon or end of input is fine
self.consume(7);
var token = self.tok(.default, .none);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(7);
self.tokEnd(&token);
return true;
}
return false;
}
fn call(self: *Lexer) bool {
// Match /^\+(\s*)(([-\w]+)|(#\{))/
if (self.input.len < 2 or self.input[0] != '+') return false;
var i: usize = 1;
// Skip whitespace
while (i < self.input.len and (self.input[i] == ' ' or self.input[i] == '\t')) {
i += 1;
}
// Check for interpolated call #{
if (i + 1 < self.input.len and self.input[i] == '#' and self.input[i + 1] == '{') {
const match = self.bracketExpression(i + 1) catch return false;
const increment = match.end + 1;
self.consume(increment);
var token = self.tok(.call, .none);
// Store the interpolated expression - use the original slice from input
// Format: #{expression} - we store just the expression part, prefixed with #{
// The value points to input[i..match.end+1] which includes #{ and }
token.val = TokenValue.fromString(self.original_input[self.original_input.len - self.input.len - increment + i .. self.original_input.len - self.input.len - increment + match.end + 1]);
self.incrementColumn(increment);
token.args = .none;
// Check for args
if (self.input.len > 0 and self.input[0] == '(') {
if (self.bracketExpression(0)) |args_match| {
self.incrementColumn(1);
self.consume(args_match.end + 1);
token.args = TokenValue.fromString(args_match.src);
} else |_| {}
}
self.tokens.append(self.allocator, token) catch return false;
self.tokEnd(&token);
return true;
}
// Simple call
var end = i;
while (end < self.input.len) {
const c = self.input[end];
if (isWordChar(c) or c == '-') {
end += 1;
} else {
break;
}
}
if (end == i) return false;
const name = self.input[i..end];
self.consume(end);
var token = self.tokWithString(.call, name);
self.incrementColumn(end);
token.args = .none;
// Check for args (not attributes)
if (self.input.len > 0) {
var j: usize = 0;
while (j < self.input.len and self.input[j] == ' ') {
j += 1;
}
if (j < self.input.len and self.input[j] == '(') {
if (self.bracketExpression(j)) |args_match| {
// Check if it looks like args, not attributes
var is_args = true;
var k: usize = 0;
while (k < args_match.src.len and (args_match.src[k] == ' ' or args_match.src[k] == '\t')) {
k += 1;
}
// Check for key= pattern (attributes)
var key_end = k;
while (key_end < args_match.src.len and (isWordChar(args_match.src[key_end]) or args_match.src[key_end] == '-')) {
key_end += 1;
}
2026-01-24 23:53:19 +05:30
if (key_end < args_match.src.len) {
var eq_pos = key_end;
while (eq_pos < args_match.src.len and args_match.src[eq_pos] == ' ') {
eq_pos += 1;
}
2026-01-24 23:53:19 +05:30
if (eq_pos < args_match.src.len and args_match.src[eq_pos] == '=') {
is_args = false;
}
}
2026-01-24 23:53:19 +05:30
if (is_args) {
self.incrementColumn(j + 1);
self.consume(j + args_match.end + 1);
token.args = TokenValue.fromString(args_match.src);
}
} else |_| {}
}
2026-01-24 23:53:19 +05:30
}
2026-01-24 23:53:19 +05:30
self.tokens.append(self.allocator, token) catch return false;
self.tokEnd(&token);
return true;
}
fn mixin(self: *Lexer) bool {
// Match /^mixin +([-\w]+)(?: *\((.*)\))? */
if (!mem.startsWith(u8, self.input, "mixin ")) return false;
var i: usize = 6;
while (i < self.input.len and self.input[i] == ' ') {
i += 1;
}
// Get mixin name
var name_end = i;
while (name_end < self.input.len) {
const c = self.input[name_end];
if (isWordChar(c) or c == '-') {
name_end += 1;
} else {
break;
}
2026-01-24 23:53:19 +05:30
}
if (name_end == i) return false;
const name = self.input[i..name_end];
var end = name_end;
// Skip spaces
while (end < self.input.len and self.input[end] == ' ') {
end += 1;
}
var args: TokenValue = .none;
// Check for args
if (end < self.input.len and self.input[end] == '(') {
const bracket_result = self.bracketExpression(end) catch return false;
args = TokenValue.fromString(bracket_result.src);
end = bracket_result.end + 1;
}
self.consume(end);
var token = self.tokWithString(.mixin, name);
token.args = args;
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(end);
self.tokEnd(&token);
return true;
}
fn conditional(self: *Lexer) bool {
// Match /^(if|unless|else if|else)\b([^\n]*)/
var keyword: []const u8 = undefined;
var token_type: TokenType = undefined;
if (mem.startsWith(u8, self.input, "else if")) {
keyword = "else if";
token_type = .else_if;
} else if (mem.startsWith(u8, self.input, "if")) {
keyword = "if";
token_type = .@"if";
} else if (mem.startsWith(u8, self.input, "unless")) {
keyword = "unless";
token_type = .@"if"; // unless becomes if with negated condition
} else if (mem.startsWith(u8, self.input, "else")) {
keyword = "else";
token_type = .@"else";
} else {
return false;
}
// Check word boundary
if (self.input.len > keyword.len) {
const next = self.input[keyword.len];
if (isWordChar(next)) return false;
}
const i = keyword.len;
// Get expression
var end = i;
while (end < self.input.len and self.input[end] != '\n') {
end += 1;
}
var js = self.input[i..end];
// Trim
while (js.len > 0 and (js[0] == ' ' or js[0] == '\t')) {
js = js[1..];
}
while (js.len > 0 and (js[js.len - 1] == ' ' or js[js.len - 1] == '\t')) {
js = js[0 .. js.len - 1];
}
self.consume(end);
var token = self.tokWithString(token_type, if (js.len > 0) js else null);
// Handle else with condition
if (token_type == .@"else" and js.len > 0) {
self.setError(.ELSE_CONDITION, "`else` cannot have a condition, perhaps you meant `else if`");
return true; // Return true to stop advance chain, error is set
}
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(end);
self.tokEnd(&token);
return true;
}
2026-01-24 23:53:19 +05:30
fn whileToken(self: *Lexer) bool {
// Match /^while +([^\n]+)/
if (!mem.startsWith(u8, self.input, "while")) return false;
2026-01-24 23:53:19 +05:30
// Check if followed by word boundary
if (self.input.len > 5 and self.input[5] != ' ' and self.input[5] != '\n') {
return false;
}
2026-01-24 23:53:19 +05:30
// Check for "while" without expression
if (self.input.len == 5 or self.input[5] == '\n') {
self.consume(5);
self.incrementColumn(5);
self.setError(.NO_WHILE_EXPRESSION, "missing expression for while");
return false;
}
2026-01-24 23:53:19 +05:30
var i: usize = 6;
while (i < self.input.len and self.input[i] == ' ') {
i += 1;
}
2026-01-24 23:53:19 +05:30
// If only spaces after "while", that's also an error
if (i >= self.input.len or self.input[i] == '\n') {
self.consume(i);
self.incrementColumn(i);
self.setError(.NO_WHILE_EXPRESSION, "missing expression for while");
return false;
}
2026-01-24 23:53:19 +05:30
var end = i;
while (end < self.input.len and self.input[end] != '\n') {
end += 1;
}
2026-01-24 23:53:19 +05:30
if (end <= i) {
self.setError(.NO_WHILE_EXPRESSION, "missing expression for while");
return false;
}
2026-01-24 23:53:19 +05:30
const expr = self.input[i..end];
self.consume(end);
var token = self.tokWithString(.@"while", expr);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(end);
self.tokEnd(&token);
return true;
}
2026-01-24 23:53:19 +05:30
fn each(self: *Lexer) bool {
const is_each = mem.startsWith(u8, self.input, "each ");
const is_for = mem.startsWith(u8, self.input, "for ");
2026-01-24 23:53:19 +05:30
if (!is_each and !is_for) return false;
2026-01-24 23:53:19 +05:30
const prefix_len: usize = if (is_each) 5 else 4;
var i = prefix_len;
2026-01-24 23:53:19 +05:30
// Skip spaces
while (i < self.input.len and self.input[i] == ' ') {
i += 1;
}
2026-01-24 23:53:19 +05:30
// Get first identifier
if (i >= self.input.len or !isIdentStart(self.input[i])) {
return self.eachOf();
}
2026-01-24 23:53:19 +05:30
var ident_end = i + 1;
while (ident_end < self.input.len and isIdentChar(self.input[ident_end])) {
ident_end += 1;
}
2026-01-24 23:53:19 +05:30
const val_name = self.input[i..ident_end];
i = ident_end;
// Skip spaces
while (i < self.input.len and self.input[i] == ' ') {
i += 1;
}
2026-01-24 23:53:19 +05:30
var key_name: TokenValue = .none;
2026-01-24 23:53:19 +05:30
// Check for , key
if (i < self.input.len and self.input[i] == ',') {
i += 1;
while (i < self.input.len and self.input[i] == ' ') {
i += 1;
}
2026-01-24 23:53:19 +05:30
if (i < self.input.len and isIdentStart(self.input[i])) {
var key_end = i + 1;
while (key_end < self.input.len and isIdentChar(self.input[key_end])) {
key_end += 1;
}
2026-01-24 23:53:19 +05:30
key_name = TokenValue.fromString(self.input[i..key_end]);
i = key_end;
}
}
2026-01-24 23:53:19 +05:30
// Skip spaces
while (i < self.input.len and self.input[i] == ' ') {
i += 1;
}
2026-01-24 23:53:19 +05:30
// Check for 'in' or 'of'
if (mem.startsWith(u8, self.input[i..], "of ")) {
return self.eachOf();
}
2026-01-24 23:53:19 +05:30
if (!mem.startsWith(u8, self.input[i..], "in ")) {
self.setError(.MALFORMED_EACH, "Malformed each statement");
return false;
}
2026-01-24 23:53:19 +05:30
i += 3; // skip "in "
2026-01-24 23:53:19 +05:30
while (i < self.input.len and self.input[i] == ' ') {
i += 1;
}
2026-01-24 23:53:19 +05:30
// Get expression
var end = i;
while (end < self.input.len and self.input[end] != '\n') {
end += 1;
}
2026-01-24 23:53:19 +05:30
if (end <= i) {
self.setError(.MALFORMED_EACH, "missing expression for each");
return false;
}
2026-01-24 23:53:19 +05:30
const expr = self.input[i..end];
self.consume(end);
var token = self.tokWithString(.each, val_name);
token.key = key_name;
token.code = TokenValue.fromString(expr);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(end);
self.tokEnd(&token);
return true;
}
2026-01-24 23:53:19 +05:30
fn isIdentStart(c: u8) bool {
return (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '_' or c == '$';
}
2026-01-24 23:53:19 +05:30
fn isIdentChar(c: u8) bool {
return isIdentStart(c) or (c >= '0' and c <= '9');
}
2026-01-24 23:53:19 +05:30
fn eachOf(self: *Lexer) bool {
const is_each = mem.startsWith(u8, self.input, "each ");
const is_for = mem.startsWith(u8, self.input, "for ");
2026-01-24 23:53:19 +05:30
if (!is_each and !is_for) return false;
const prefix_len: usize = if (is_each) 5 else 4;
var i = prefix_len;
// Find " of "
var of_pos: ?usize = null;
var j = i;
while (j + 3 < self.input.len) {
if (self.input[j] == ' ' and self.input[j + 1] == 'o' and self.input[j + 2] == 'f' and self.input[j + 3] == ' ') {
of_pos = j;
break;
}
2026-01-24 23:53:19 +05:30
if (self.input[j] == '\n') break;
j += 1;
}
2026-01-24 23:53:19 +05:30
if (of_pos == null) return false;
const value = self.input[i..of_pos.?];
i = of_pos.? + 4; // skip " of "
while (i < self.input.len and self.input[i] == ' ') {
i += 1;
}
2026-01-24 23:53:19 +05:30
var end = i;
while (end < self.input.len and self.input[end] != '\n') {
end += 1;
}
2026-01-24 23:53:19 +05:30
if (end <= i) return false;
2026-01-24 23:53:19 +05:30
const expr = self.input[i..end];
self.consume(end);
2026-01-24 23:53:19 +05:30
var token = self.tokWithString(.each_of, value);
token.code = TokenValue.fromString(expr);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(end);
self.tokEnd(&token);
return true;
}
2026-01-24 23:53:19 +05:30
fn code(self: *Lexer) bool {
if (self.input.len == 0) return false;
var flags_end: usize = 0;
var must_escape = false;
var buffer = false;
if (self.input[0] == '-') {
flags_end = 1;
buffer = false;
} else if (self.input[0] == '=') {
flags_end = 1;
must_escape = true;
buffer = true;
} else if (self.input.len >= 2 and self.input[0] == '!' and self.input[1] == '=') {
flags_end = 2;
must_escape = false;
buffer = true;
} else {
return false;
}
2026-01-24 23:53:19 +05:30
var i = flags_end;
// Skip spaces/tabs
while (i < self.input.len and (self.input[i] == ' ' or self.input[i] == '\t')) {
i += 1;
}
// Check for old-style "- each" or "- for" prefixed syntax
if (flags_end == 1 and self.input[0] == '-') {
const rest = self.input[i..];
// Match: each/for VAR(, VAR)? in EXPR
if (mem.startsWith(u8, rest, "each ") or mem.startsWith(u8, rest, "for ")) {
// Check if it looks like the old prefixed each/for syntax
var j: usize = 0;
if (mem.startsWith(u8, rest, "each ")) {
j = 5;
} else {
j = 4;
}
2026-01-24 23:53:19 +05:30
// Skip whitespace
while (j < rest.len and (rest[j] == ' ' or rest[j] == '\t')) {
j += 1;
}
// Check for identifier
if (j < rest.len and (std.ascii.isAlphabetic(rest[j]) or rest[j] == '_' or rest[j] == '$')) {
// This looks like "- each var in expr" which is old syntax
self.setError(.MALFORMED_EACH, "Pug each and for should no longer be prefixed with a dash (\"-\"). They are pug keywords and not part of JavaScript.");
return true;
}
}
2026-01-24 23:53:19 +05:30
}
2026-01-24 23:53:19 +05:30
var end = i;
while (end < self.input.len and self.input[end] != '\n') {
end += 1;
}
2026-01-24 23:53:19 +05:30
const code_val = self.input[i..end];
self.consume(end);
var token = self.tokWithString(.code, code_val);
token.must_escape = TokenValue.fromBool(must_escape);
token.buffer = TokenValue.fromBool(buffer);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(end);
self.tokEnd(&token);
return true;
}
fn blockCode(self: *Lexer) bool {
if (self.input.len == 0 or self.input[0] != '-') return false;
// Must be followed by end of line
if (self.input.len > 1 and self.input[1] != '\n' and self.input[1] != ':') {
return false;
}
2026-01-24 23:53:19 +05:30
self.consume(1);
var token = self.tok(.blockcode, .none);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(1);
self.tokEnd(&token);
self.interpolation_allowed = false;
_ = self.pipelessText(null);
return true;
}
2026-01-24 23:53:19 +05:30
fn attrs(self: *Lexer) bool {
if (self.input.len == 0 or self.input[0] != '(') return false;
var token = self.tok(.start_attributes, .none);
const bracket_result = self.bracketExpression(0) catch return false;
const str = self.input[1..bracket_result.end];
self.incrementColumn(1);
self.tokens.append(self.allocator, token) catch return false;
self.tokEnd(&token);
self.consume(bracket_result.end + 1);
// Parse attributes from str
self.parseAttributes(str);
// Check if parseAttributes set an error
if (self.last_error != null) {
return true; // Error is set, return true to stop further parsing
}
2026-01-24 23:53:19 +05:30
var end_token = self.tok(.end_attributes, .none);
self.incrementColumn(1);
self.tokens.append(self.allocator, end_token) catch return false;
self.tokEnd(&end_token);
return true;
}
fn parseAttributes(self: *Lexer, str: []const u8) void {
var i: usize = 0;
while (i < str.len) {
// Skip whitespace
while (i < str.len and isWhitespace(str[i])) {
if (str[i] == '\n') {
self.incrementLine(1);
} else {
2026-01-24 23:53:19 +05:30
self.incrementColumn(1);
}
2026-01-24 23:53:19 +05:30
i += 1;
}
2026-01-24 23:53:19 +05:30
if (i >= str.len) break;
var attr_token = self.tok(.attribute, .none);
// Check for quoted key
var key: []const u8 = undefined;
if (str[i] == '"' or str[i] == '\'') {
const quote = str[i];
self.incrementColumn(1);
i += 1;
const key_start = i;
while (i < str.len and str[i] != quote) {
if (str[i] == '\n') {
self.incrementLine(1);
} else {
2026-01-24 23:53:19 +05:30
self.incrementColumn(1);
}
2026-01-24 23:53:19 +05:30
i += 1;
}
key = str[key_start..i];
if (i < str.len) {
self.incrementColumn(1);
i += 1;
}
2026-01-24 23:53:19 +05:30
} else {
// Unquoted key
const key_start = i;
while (i < str.len and !isWhitespace(str[i]) and str[i] != '!' and str[i] != '=' and str[i] != ',') {
if (str[i] == '\n') {
self.incrementLine(1);
} else {
2026-01-24 23:53:19 +05:30
self.incrementColumn(1);
}
2026-01-24 23:53:19 +05:30
i += 1;
}
2026-01-24 23:53:19 +05:30
key = str[key_start..i];
}
2026-01-24 23:53:19 +05:30
attr_token.name = TokenValue.fromString(key);
2026-01-24 23:53:19 +05:30
// Skip whitespace
while (i < str.len and (str[i] == ' ' or str[i] == '\t')) {
self.incrementColumn(1);
i += 1;
}
2026-01-24 23:53:19 +05:30
// Check for value
var must_escape = true;
if (i < str.len and str[i] == '!') {
must_escape = false;
self.incrementColumn(1);
i += 1;
}
if (i < str.len and str[i] == '=') {
self.incrementColumn(1);
i += 1;
2026-01-24 23:53:19 +05:30
// Skip whitespace (including newlines)
while (i < str.len and isWhitespace(str[i])) {
if (str[i] == '\n') {
self.incrementLine(1);
} else {
2026-01-24 23:53:19 +05:30
self.incrementColumn(1);
}
2026-01-24 23:53:19 +05:30
i += 1;
}
2026-01-24 23:53:19 +05:30
// Parse value
var state: CharParserState = .{};
defer state.deinit(self.allocator);
2026-01-24 23:53:19 +05:30
const val_start = i;
var has_content = false; // Track if we've seen non-whitespace
while (i < str.len) {
const char = str[i];
state.parseChar(self.allocator, char) catch break;
2026-01-24 23:53:19 +05:30
if (!isWhitespace(char)) {
has_content = true;
}
2026-01-24 23:53:19 +05:30
if (!state.isNesting() and !state.isString() and has_content) {
if (isWhitespace(char) or char == ',') {
break;
}
}
2026-01-24 23:53:19 +05:30
// Check for invalid newline inside single/double quoted string
// (template literals with backticks can have newlines)
if (char == '\n') {
if (state.isString()) {
const quote_char = state.getStringChar();
if (quote_char) |qc| {
if (qc == '\'' or qc == '"') {
self.setError(.SYNTAX_ERROR, "Invalid newline in string literal");
return;
}
}
}
}
2026-01-24 23:53:19 +05:30
if (str[i] == '\n') {
self.incrementLine(1);
} else {
self.incrementColumn(1);
}
i += 1;
}
2026-01-24 23:53:19 +05:30
attr_token.val = TokenValue.fromString(str[val_start..i]);
attr_token.must_escape = TokenValue.fromBool(must_escape);
} else {
// Boolean attribute
attr_token.val = TokenValue.fromBool(true);
attr_token.must_escape = TokenValue.fromBool(true);
}
2026-01-24 23:53:19 +05:30
self.tokens.append(self.allocator, attr_token) catch return;
self.tokEnd(&attr_token);
2026-01-24 23:53:19 +05:30
// Skip whitespace and comma
while (i < str.len and (isWhitespace(str[i]) or str[i] == ',')) {
if (str[i] == '\n') {
self.incrementLine(1);
} else {
self.incrementColumn(1);
}
i += 1;
}
}
2026-01-24 23:53:19 +05:30
}
2026-01-24 23:53:19 +05:30
fn attributesBlock(self: *Lexer) bool {
if (!mem.startsWith(u8, self.input, "&attributes")) return false;
2026-01-24 23:53:19 +05:30
if (self.input.len > 11 and isWordChar(self.input[11])) return false;
2026-01-24 23:53:19 +05:30
self.consume(11);
var token = self.tok(.@"&attributes", .none);
self.incrementColumn(11);
2026-01-24 23:53:19 +05:30
const args = self.bracketExpression(0) catch return false;
self.consume(args.end + 1);
token.val = TokenValue.fromString(args.src);
self.incrementColumn(args.end + 1);
2026-01-24 23:53:19 +05:30
self.tokens.append(self.allocator, token) catch return false;
self.tokEnd(&token);
return true;
}
2026-01-24 23:53:19 +05:30
fn indent(self: *Lexer) bool {
const captures = self.scanIndentation() orelse return false;
2026-01-24 23:53:19 +05:30
const indents = captures.indent.len;
2026-01-24 23:53:19 +05:30
self.incrementLine(1);
self.consume(captures.total_len);
2026-01-24 23:53:19 +05:30
// Blank line
if (self.input.len > 0 and self.input[0] == '\n') {
self.interpolation_allowed = true;
var newline_token = self.tok(.newline, .none);
self.tokEnd(&newline_token);
return true;
}
2026-01-24 23:53:19 +05:30
// Outdent
if (indents < self.indent_stack.items[0]) {
var outdent_count: usize = 0;
while (self.indent_stack.items[0] > indents) {
if (self.indent_stack.items.len > 1 and self.indent_stack.items[1] < indents) {
self.setError(.INCONSISTENT_INDENTATION, "Inconsistent indentation");
return false;
}
outdent_count += 1;
_ = self.indent_stack.orderedRemove(0);
}
while (outdent_count > 0) : (outdent_count -= 1) {
self.colno = 1;
var outdent_token = self.tok(.outdent, .none);
self.colno = self.indent_stack.items[0] + 1;
self.tokens.append(self.allocator, outdent_token) catch return false;
self.tokEnd(&outdent_token);
}
2026-01-24 23:53:19 +05:30
} else if (indents > 0 and indents != self.indent_stack.items[0]) {
// Indent
var indent_token = self.tok(.indent, .none);
self.colno = 1 + indents;
self.tokens.append(self.allocator, indent_token) catch return false;
self.tokEnd(&indent_token);
self.indent_stack.insert(self.allocator, 0, indents) catch return false;
} else {
2026-01-24 23:53:19 +05:30
// Newline
var newline_token = self.tok(.newline, .none);
self.colno = 1 + @min(self.indent_stack.items[0], indents);
self.tokens.append(self.allocator, newline_token) catch return false;
self.tokEnd(&newline_token);
}
2026-01-24 23:53:19 +05:30
self.interpolation_allowed = true;
return true;
}
2026-01-24 23:53:19 +05:30
fn pipelessText(self: *Lexer, forced_indents: ?usize) bool {
while (self.blank()) {}
2026-01-24 23:53:19 +05:30
const captures = self.scanIndentation() orelse return false;
const indents = forced_indents orelse captures.indent.len;
2026-01-22 11:10:47 +05:30
2026-01-24 23:53:19 +05:30
if (indents <= self.indent_stack.items[0]) return false;
2026-01-24 23:53:19 +05:30
var start_token = self.tok(.start_pipeless_text, .none);
self.tokEnd(&start_token);
self.tokens.append(self.allocator, start_token) catch return false;
2026-01-24 23:53:19 +05:30
var string_ptr: usize = 0;
var tokens_list: std.ArrayListUnmanaged([]const u8) = .{};
var token_indent_list: std.ArrayListUnmanaged(bool) = .{};
defer tokens_list.deinit(self.allocator);
defer token_indent_list.deinit(self.allocator);
2026-01-24 23:53:19 +05:30
while (string_ptr < self.input.len) {
// text has `\n` as a prefix
const line_start = string_ptr + 1; // skip the \n
if (string_ptr >= self.input.len or self.input[string_ptr] != '\n') {
break;
}
2026-01-24 23:53:19 +05:30
// Find end of line
var line_end = line_start;
while (line_end < self.input.len and self.input[line_end] != '\n') {
line_end += 1;
}
2026-01-24 23:53:19 +05:30
const str = self.input[line_start..line_end];
2026-01-24 23:53:19 +05:30
// Check indentation of this line (count leading whitespace)
var line_indent: usize = 0;
for (str) |c| {
if (c == ' ' or c == '\t') {
line_indent += 1;
} else {
break;
}
2026-01-24 23:53:19 +05:30
}
const is_match = line_indent >= indents;
token_indent_list.append(self.allocator, is_match) catch return false;
// Match if indented enough OR if line is empty/whitespace
const trimmed = mem.trim(u8, str, " \t");
if (is_match or trimmed.len == 0) {
// consume line along with `\n` prefix
string_ptr = line_end;
// Extract text after the indent
2026-01-25 00:06:55 +05:30
// For whitespace-only lines, preserve the whitespace as-is (don't subtract indent)
const text_content = if (trimmed.len == 0)
str // Preserve whitespace-only lines exactly
else if (str.len > indents)
str[indents..]
else
"";
2026-01-24 23:53:19 +05:30
tokens_list.append(self.allocator, text_content) catch return false;
} else if (line_indent > self.indent_stack.items[0]) {
// line is indented less than the first line but is still indented
// need to retry lexing the text block with new indent level
_ = self.tokens.pop();
return self.pipelessText(line_indent);
} else {
break;
}
}
2026-01-24 23:53:19 +05:30
self.consume(string_ptr);
2026-01-24 23:53:19 +05:30
// Remove trailing empty lines when input is exhausted
while (self.input.len == 0 and tokens_list.items.len > 0 and tokens_list.items[tokens_list.items.len - 1].len == 0) {
_ = tokens_list.pop();
}
2026-01-24 23:53:19 +05:30
for (tokens_list.items, 0..) |token_text, ii| {
self.incrementLine(1);
if (ii != 0) {
var newline_token = self.tok(.newline, .none);
self.tokens.append(self.allocator, newline_token) catch return false;
self.tokEnd(&newline_token);
}
if (ii < token_indent_list.items.len and token_indent_list.items[ii]) {
self.incrementColumn(indents);
}
2026-01-25 00:06:55 +05:30
// For pipeless text, emit empty text tokens to preserve blank lines
// (addText skips empty content, but blank lines need to be preserved)
if (token_text.len == 0) {
var empty_token = self.tok(.text, .none);
empty_token.val = .{ .string = "" };
self.tokens.append(self.allocator, empty_token) catch return false;
self.tokEnd(&empty_token);
} else {
self.addText(.text, token_text, "", 0);
}
}
2026-01-24 23:53:19 +05:30
var end_token = self.tok(.end_pipeless_text, .none);
self.tokEnd(&end_token);
self.tokens.append(self.allocator, end_token) catch return false;
return true;
}
fn slash(self: *Lexer) bool {
if (self.input.len == 0 or self.input[0] != '/') return false;
self.consume(1);
var token = self.tok(.slash, .none);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(1);
self.tokEnd(&token);
return true;
}
2026-01-24 23:53:19 +05:30
fn colon(self: *Lexer) bool {
if (self.input.len < 2 or self.input[0] != ':' or self.input[1] != ' ') return false;
2026-01-24 23:53:19 +05:30
var i: usize = 2;
while (i < self.input.len and self.input[i] == ' ') {
i += 1;
}
self.consume(i);
var token = self.tok(.colon, .none);
self.tokens.append(self.allocator, token) catch return false;
self.incrementColumn(i);
self.tokEnd(&token);
return true;
}
2026-01-24 23:53:19 +05:30
fn fail(self: *Lexer) void {
self.setError(.UNEXPECTED_TEXT, "unexpected text");
}
2026-01-24 23:53:19 +05:30
fn addText(self: *Lexer, token_type: TokenType, value: []const u8, prefix: []const u8, escaped: usize) void {
if (value.len + prefix.len == 0) return;
// Check for unclosed or mismatched tag interpolations #[...]
// Note: Inside #[...] is full Pug syntax, so we need to track ALL bracket types
if (self.interpolation_allowed) {
var i: usize = 0;
while (i + 1 < value.len) {
// Skip escaped \#[
if (value[i] == '\\' and i + 2 < value.len and value[i + 1] == '#' and value[i + 2] == '[') {
i += 3;
continue;
}
if (value[i] == '#' and value[i + 1] == '[') {
// Found start of tag interpolation, look for matching ]
var j = i + 2;
var in_string: u8 = 0;
// Track bracket stack - inside #[...] you can have (...) and {...} for attrs/code
var bracket_stack = std.ArrayListUnmanaged(u8){};
defer bracket_stack.deinit(self.allocator);
bracket_stack.append(self.allocator, '[') catch return;
while (j < value.len and bracket_stack.items.len > 0) {
const c = value[j];
if (in_string != 0) {
if (c == in_string and (j == i + 2 or value[j - 1] != '\\')) {
in_string = 0;
}
} else {
if (c == '"' or c == '\'' or c == '`') {
in_string = c;
} else if (c == '[' or c == '(' or c == '{') {
bracket_stack.append(self.allocator, c) catch return;
} else if (c == ']' or c == ')' or c == '}') {
if (bracket_stack.items.len > 0) {
const last_open = bracket_stack.items[bracket_stack.items.len - 1];
const expected_close: u8 = switch (last_open) {
'[' => ']',
'(' => ')',
'{' => '}',
else => 0,
};
if (c != expected_close) {
// Mismatched bracket type
self.setError(.BRACKET_MISMATCH, "Mismatched bracket in tag interpolation");
return;
}
_ = bracket_stack.pop();
}
}
}
j += 1;
}
if (bracket_stack.items.len > 0) {
// Unclosed interpolation
self.setError(.NO_END_BRACKET, "Unclosed tag interpolation - missing ]");
return;
}
i = j;
} else {
i += 1;
}
}
}
var token = self.tokWithString(token_type, value);
self.incrementColumn(value.len + escaped);
self.tokens.append(self.allocator, token) catch return;
self.tokEnd(&token);
}
2026-01-24 23:53:19 +05:30
// ========================================================================
// Main advance and getTokens
// ========================================================================
fn advance(self: *Lexer) bool {
return self.blank() or
self.eos() or
self.endInterpolation() or
self.yieldToken() or
self.doctype() or
self.interpolation() or
self.caseToken() or
self.when() or
self.defaultToken() or
self.extendsToken() or
self.append() or
self.prepend() or
self.blockToken() or
self.mixinBlock() or
self.includeToken() or
self.mixin() or
self.call() or
self.conditional() or
self.eachOf() or
self.each() or
self.whileToken() or
self.tag() or
self.filter(false) or
self.blockCode() or
self.code() or
self.id() or
self.dot() or
self.className() or
self.attrs() or
self.attributesBlock() or
self.indent() or
self.text() or
self.textHtml() or
self.comment() or
self.slash() or
self.colon() or
blk: {
self.fail();
break :blk false;
};
}
2026-01-24 23:53:19 +05:30
pub fn getTokens(self: *Lexer) ![]Token {
while (!self.ended) {
const advanced = self.advance();
// Check for errors after every advance, regardless of return value
if (self.last_error) |err| {
std.debug.print("Lexer error at {d}:{d}: {s}\n", .{ err.line, err.column, err.message });
return error.LexerError;
}
if (!advanced) {
break;
}
}
2026-01-24 23:53:19 +05:30
return self.tokens.items;
}
};
2026-01-24 23:53:19 +05:30
// ============================================================================
// Options
// ============================================================================
2026-01-24 23:53:19 +05:30
pub const LexerOptions = struct {
filename: ?[]const u8 = null,
interpolated: bool = false,
starting_line: usize = 1,
starting_column: usize = 1,
};
2026-01-24 23:53:19 +05:30
// ============================================================================
// Public API
// ============================================================================
/// Lexes the input string and returns a slice of tokens.
/// IMPORTANT: The caller must keep the Lexer alive while using the returned tokens,
/// as token string values are slices into the lexer's input buffer.
/// For simpler usage, use Lexer.init() and Lexer.getTokens() directly.
pub fn lex(allocator: Allocator, str: []const u8, options: LexerOptions) !struct { tokens: []Token, lexer: *Lexer } {
const lexer = try allocator.create(Lexer);
lexer.* = try Lexer.init(allocator, str, options);
const tokens = try lexer.getTokens();
return .{ .tokens = tokens, .lexer = lexer };
}
2026-01-24 23:53:19 +05:30
/// Frees resources from a lex() call
pub fn freeLexResult(allocator: Allocator, lexer: *Lexer) void {
lexer.deinit();
allocator.destroy(lexer);
}
2026-01-24 23:53:19 +05:30
// ============================================================================
// Tests
2026-01-24 23:53:19 +05:30
// ============================================================================
2026-01-24 23:53:19 +05:30
test "TokenValue - none" {
const val: TokenValue = .none;
try std.testing.expect(val.isNone());
try std.testing.expect(val.getString() == null);
try std.testing.expect(val.getBool() == null);
}
2026-01-24 23:53:19 +05:30
test "TokenValue - string" {
const val = TokenValue.fromString("hello");
try std.testing.expect(!val.isNone());
try std.testing.expectEqualStrings("hello", val.getString().?);
try std.testing.expect(val.getBool() == null);
}
2026-01-24 23:53:19 +05:30
test "TokenValue - boolean" {
const val_true = TokenValue.fromBool(true);
const val_false = TokenValue.fromBool(false);
2026-01-24 23:53:19 +05:30
try std.testing.expect(!val_true.isNone());
try std.testing.expect(val_true.getBool().? == true);
try std.testing.expect(val_true.getString() == null);
try std.testing.expect(val_false.getBool().? == false);
}
2026-01-24 23:53:19 +05:30
test "basic tag lexing" {
const allocator = std.testing.allocator;
2026-01-24 23:53:19 +05:30
var lexer = try Lexer.init(allocator, "div", .{});
defer lexer.deinit();
2026-01-24 23:53:19 +05:30
const tokens = try lexer.getTokens();
2026-01-24 23:53:19 +05:30
try std.testing.expect(tokens.len >= 2);
try std.testing.expectEqual(TokenType.tag, tokens[0].type);
try std.testing.expectEqualStrings("div", tokens[0].getVal().?);
}
2026-01-24 23:53:19 +05:30
test "tag with id" {
const allocator = std.testing.allocator;
2026-01-24 23:53:19 +05:30
var lexer = try Lexer.init(allocator, "div#main", .{});
defer lexer.deinit();
2026-01-24 23:53:19 +05:30
const tokens = try lexer.getTokens();
2026-01-24 23:53:19 +05:30
try std.testing.expect(tokens.len >= 3);
try std.testing.expectEqual(TokenType.tag, tokens[0].type);
2026-01-24 23:53:19 +05:30
try std.testing.expectEqual(TokenType.id, tokens[1].type);
try std.testing.expectEqualStrings("main", tokens[1].getVal().?);
}
2026-01-24 23:53:19 +05:30
test "tag with class" {
const allocator = std.testing.allocator;
2026-01-24 23:53:19 +05:30
var lexer = try Lexer.init(allocator, "div.container", .{});
defer lexer.deinit();
2026-01-24 23:53:19 +05:30
const tokens = try lexer.getTokens();
2026-01-24 23:53:19 +05:30
try std.testing.expect(tokens.len >= 3);
try std.testing.expectEqual(TokenType.tag, tokens[0].type);
try std.testing.expectEqual(TokenType.class, tokens[1].type);
try std.testing.expectEqualStrings("container", tokens[1].getVal().?);
}
2026-01-24 23:53:19 +05:30
test "doctype" {
const allocator = std.testing.allocator;
2026-01-24 23:53:19 +05:30
var lexer = try Lexer.init(allocator, "doctype html", .{});
defer lexer.deinit();
2026-01-24 23:53:19 +05:30
const tokens = try lexer.getTokens();
2026-01-24 23:53:19 +05:30
try std.testing.expect(tokens.len >= 2);
try std.testing.expectEqual(TokenType.doctype, tokens[0].type);
try std.testing.expectEqualStrings("html", tokens[0].getVal().?);
}
2026-01-24 23:53:19 +05:30
test "comment with buffer" {
const allocator = std.testing.allocator;
2026-01-24 23:53:19 +05:30
var lexer = try Lexer.init(allocator, "// this is a comment", .{});
defer lexer.deinit();
2026-01-24 23:53:19 +05:30
const tokens = try lexer.getTokens();
2026-01-24 23:53:19 +05:30
try std.testing.expect(tokens.len >= 2);
try std.testing.expectEqual(TokenType.comment, tokens[0].type);
try std.testing.expect(tokens[0].isBuffered() == true);
}
2026-01-24 23:53:19 +05:30
test "comment without buffer" {
const allocator = std.testing.allocator;
2026-01-24 23:53:19 +05:30
var lexer = try Lexer.init(allocator, "//- this is a silent comment", .{});
defer lexer.deinit();
2026-01-24 23:53:19 +05:30
const tokens = try lexer.getTokens();
2026-01-24 23:53:19 +05:30
try std.testing.expect(tokens.len >= 2);
try std.testing.expectEqual(TokenType.comment, tokens[0].type);
2026-01-24 23:53:19 +05:30
try std.testing.expect(tokens[0].isBuffered() == false);
}
2026-01-24 23:53:19 +05:30
test "code with escape" {
const allocator = std.testing.allocator;
2026-01-24 23:53:19 +05:30
var lexer = try Lexer.init(allocator, "= foo", .{});
defer lexer.deinit();
2026-01-24 23:53:19 +05:30
const tokens = try lexer.getTokens();
try std.testing.expect(tokens.len >= 2);
try std.testing.expectEqual(TokenType.code, tokens[0].type);
try std.testing.expect(tokens[0].shouldEscape() == true);
try std.testing.expect(tokens[0].isBuffered() == true);
}
2026-01-24 23:53:19 +05:30
test "code without escape" {
const allocator = std.testing.allocator;
2026-01-24 23:53:19 +05:30
var lexer = try Lexer.init(allocator, "!= foo", .{});
defer lexer.deinit();
2026-01-24 23:53:19 +05:30
const tokens = try lexer.getTokens();
2026-01-24 23:53:19 +05:30
try std.testing.expect(tokens.len >= 2);
try std.testing.expectEqual(TokenType.code, tokens[0].type);
try std.testing.expect(tokens[0].shouldEscape() == false);
try std.testing.expect(tokens[0].isBuffered() == true);
}
2026-01-24 23:53:19 +05:30
test "boolean attribute" {
const allocator = std.testing.allocator;
2026-01-24 23:53:19 +05:30
var lexer = try Lexer.init(allocator, "input(disabled)", .{});
defer lexer.deinit();
2026-01-24 23:53:19 +05:30
const tokens = try lexer.getTokens();
2026-01-24 23:53:19 +05:30
// Find the attribute token
var attr_found = false;
for (tokens) |tok| {
if (tok.type == .attribute) {
attr_found = true;
try std.testing.expectEqualStrings("disabled", tok.getName().?);
// Boolean attribute should have boolean true value
try std.testing.expect(tok.val.getBool().? == true);
break;
}
}
2026-01-24 23:53:19 +05:30
try std.testing.expect(attr_found);
}