diff --git a/CLAUDE.md b/CLAUDE.md
index fba20d3..4f40325 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -213,10 +213,10 @@ button(disabled=true) // disabled="disabled"
### Text & Interpolation
```pug
-p Hello #{name} // escaped interpolation
-p Hello !{rawHtml} // unescaped interpolation
-p= variable // buffered code (escaped)
-p!= rawVariable // buffered code (unescaped)
+p Hello #{name} // escaped interpolation (SAFE - default)
+p Hello !{rawHtml} // unescaped interpolation (UNSAFE - trusted content only)
+p= variable // buffered code (escaped, SAFE)
+p!= rawVariable // buffered code (unescaped, UNSAFE)
| Piped text line
p.
Multi-line
@@ -227,6 +227,8 @@ p.
h1.header #{title} // renders
```
+**Security Note**: By default, `#{}` and `=` escape HTML entities (`<`, `>`, `&`, `"`, `'`) to prevent XSS attacks. Only use `!{}` or `!=` for content you fully trust (e.g., pre-sanitized HTML from your own code). Never use unescaped output for user-provided data.
+
### Tag Interpolation
```pug
p This is #[em emphasized] text
diff --git a/build.zig.zon b/build.zig.zon
index 456a76c..2df70e5 100644
--- a/build.zig.zon
+++ b/build.zig.zon
@@ -1,6 +1,6 @@
.{
.name = .pugz,
- .version = "0.2.0",
+ .version = "0.2.1",
.fingerprint = 0x822db0790e17621d, // Changing this has security and trust implications.
.minimum_zig_version = "0.15.2",
.dependencies = .{},
diff --git a/src/build_templates.zig b/src/build_templates.zig
index 12c56dc..76f65e3 100644
--- a/src/build_templates.zig
+++ b/src/build_templates.zig
@@ -36,7 +36,9 @@
//! ```
const std = @import("std");
-const Lexer = @import("lexer.zig").Lexer;
+const lexer_mod = @import("lexer.zig");
+const Lexer = lexer_mod.Lexer;
+const Diagnostic = lexer_mod.Diagnostic;
const Parser = @import("parser.zig").Parser;
const ast = @import("ast.zig");
@@ -316,6 +318,17 @@ fn generateSingleFile(
try file.writeAll(out.items);
}
+/// Logs a diagnostic error with file location in compiler-style format.
+fn logDiagnostic(file_path: []const u8, diag: Diagnostic) void {
+ std.log.err("{s}:{d}:{d}: {s}", .{ file_path, diag.line, diag.column, diag.message });
+ if (diag.source_line) |src_line| {
+ std.log.err(" | {s}", .{src_line});
+ }
+ if (diag.suggestion) |hint| {
+ std.log.err(" = hint: {s}", .{hint});
+ }
+}
+
/// Compiles a single .pug template into a Zig function.
/// Handles three cases:
/// - Empty templates: return ""
@@ -332,13 +345,21 @@ fn compileTemplate(
var lexer = Lexer.init(allocator, source);
defer lexer.deinit();
const tokens = lexer.tokenize() catch |err| {
- std.log.err("Tokenize error in '{s}': {}", .{ name, err });
+ if (lexer.getDiagnostic()) |diag| {
+ logDiagnostic(name, diag);
+ } else {
+ std.log.err("Tokenize error in '{s}': {}", .{ name, err });
+ }
return err;
};
- var parser = Parser.init(allocator, tokens);
+ var parser = Parser.initWithSource(allocator, tokens, source);
const doc = parser.parse() catch |err| {
- std.log.err("Parse error in '{s}': {}", .{ name, err });
+ if (parser.getDiagnostic()) |diag| {
+ logDiagnostic(name, diag);
+ } else {
+ std.log.err("Parse error in '{s}': {}", .{ name, err });
+ }
return err;
};
@@ -487,6 +508,86 @@ fn nodeHasDynamic(node: ast.Node) bool {
};
}
+/// Checks if a mixin body references `attributes` (for &attributes pass-through).
+/// Used to avoid emitting unused mixin_attrs struct in generated code.
+fn mixinUsesAttributes(nodes: []const ast.Node) bool {
+ for (nodes) |node| {
+ switch (node) {
+ .element => |e| {
+ // Check spread_attributes field
+ if (e.spread_attributes != null) return true;
+
+ // Check attribute values for 'attributes' reference
+ for (e.attributes) |attr| {
+ if (attr.value) |val| {
+ if (exprReferencesAttributes(val)) return true;
+ }
+ }
+
+ // Check inline text for interpolated attributes reference
+ if (e.inline_text) |segs| {
+ if (textSegmentsReferenceAttributes(segs)) return true;
+ }
+
+ // Check buffered code
+ if (e.buffered_code) |bc| {
+ if (exprReferencesAttributes(bc.expression)) return true;
+ }
+
+ // Recurse into children
+ if (mixinUsesAttributes(e.children)) return true;
+ },
+ .text => |t| {
+ if (textSegmentsReferenceAttributes(t.segments)) return true;
+ },
+ .conditional => |c| {
+ for (c.branches) |br| {
+ if (mixinUsesAttributes(br.children)) return true;
+ }
+ },
+ .each => |e| {
+ if (mixinUsesAttributes(e.children)) return true;
+ if (mixinUsesAttributes(e.else_children)) return true;
+ },
+ .case => |c| {
+ for (c.whens) |when| {
+ if (mixinUsesAttributes(when.children)) return true;
+ }
+ if (mixinUsesAttributes(c.default_children)) return true;
+ },
+ .block => |b| {
+ if (mixinUsesAttributes(b.children)) return true;
+ },
+ else => {},
+ }
+ }
+ return false;
+}
+
+/// Checks if an expression string references 'attributes' (e.g., "attributes.class").
+fn exprReferencesAttributes(expr: []const u8) bool {
+ // Check for 'attributes' as standalone or prefix (attributes.class, attributes.id, etc.)
+ if (std.mem.startsWith(u8, expr, "attributes")) {
+ // Must be exactly "attributes" or "attributes." followed by more
+ if (expr.len == 10) return true; // exactly "attributes"
+ if (expr.len > 10 and expr[10] == '.') return true; // "attributes.something"
+ }
+ return false;
+}
+
+/// Checks if text segments contain interpolations referencing 'attributes'.
+fn textSegmentsReferenceAttributes(segs: []const ast.TextSegment) bool {
+ for (segs) |seg| {
+ switch (seg) {
+ .interp_escaped, .interp_unescaped => |expr| {
+ if (exprReferencesAttributes(expr)) return true;
+ },
+ else => {},
+ }
+ }
+ return false;
+}
+
/// Zig reserved keywords - field names matching these must be escaped with @"..."
/// when used in generated code (e.g., @"type" instead of type)
const zig_keywords = std.StaticStringMap(void).initComptime(.{
@@ -575,7 +676,6 @@ const Compiler = struct {
uses_data: bool, // True if template accesses the data parameter 'd'
mixin_depth: usize, // Nesting level for generating unique mixin variable names
current_attrs_var: ?[]const u8, // Variable name for current mixin's &attributes
- used_attrs_var: bool, // True if current mixin accessed its attributes
fn init(
allocator: std.mem.Allocator,
@@ -597,7 +697,6 @@ const Compiler = struct {
.uses_data = false,
.mixin_depth = 0,
.current_attrs_var = null,
- .used_attrs_var = false,
};
}
@@ -689,13 +788,21 @@ const Compiler = struct {
var lexer = Lexer.init(self.allocator, source);
const tokens = lexer.tokenize() catch |err| {
- std.log.err("Tokenize error in included template '{s}': {}", .{ path, err });
+ if (lexer.getDiagnostic()) |diag| {
+ logDiagnostic(path, diag);
+ } else {
+ std.log.err("Tokenize error in included template '{s}': {}", .{ path, err });
+ }
return err;
};
- var parser = Parser.init(self.allocator, tokens);
+ var parser = Parser.initWithSource(self.allocator, tokens, source);
return parser.parse() catch |err| {
- std.log.err("Parse error in included template '{s}': {}", .{ path, err });
+ if (parser.getDiagnostic()) |diag| {
+ logDiagnostic(path, diag);
+ } else {
+ std.log.err("Parse error in included template '{s}': {}", .{ path, err });
+ }
return err;
};
}
@@ -1195,7 +1302,6 @@ const Compiler = struct {
// Special case: attributes.X should use current mixin's attributes variable
if (std.mem.eql(u8, base, "attributes")) {
if (self.current_attrs_var) |attrs_var| {
- self.used_attrs_var = true;
return std.fmt.bufPrint(buf, "{s}.{s}", .{ attrs_var, rest }) catch expr;
}
}
@@ -1215,7 +1321,6 @@ const Compiler = struct {
// Special case: 'attributes' alone should use current mixin's attributes variable
if (std.mem.eql(u8, expr, "attributes")) {
if (self.current_attrs_var) |attrs_var| {
- self.used_attrs_var = true;
return attrs_var;
}
}
@@ -1573,71 +1678,67 @@ const Compiler = struct {
try self.writer.writeAll("};\n");
}
- // Handle mixin call attributes: +mixin(args)(class="foo", data-id="bar")
- // Create an 'attributes' struct with optional fields that the mixin body can access
- // Use unique name based on mixin depth to avoid shadowing in nested mixin calls
- self.mixin_depth += 1;
- const current_depth = self.mixin_depth;
+ // Check if mixin body actually uses &attributes before emitting the struct
+ const uses_attributes = mixinUsesAttributes(mixin_def.children);
// Save previous attrs var and restore after mixin body
const prev_attrs_var = self.current_attrs_var;
- const prev_used_attrs = self.used_attrs_var;
- self.used_attrs_var = false;
+ defer self.current_attrs_var = prev_attrs_var;
- // Generate unique attribute variable name for this mixin depth
- var attr_var_buf: [32]u8 = undefined;
- const attr_var_name = std.fmt.bufPrint(&attr_var_buf, "mixin_attrs_{d}", .{current_depth}) catch "mixin_attrs";
+ // Only emit attributes struct if the mixin actually uses it
+ if (uses_attributes) {
+ // Use unique name based on mixin depth to avoid shadowing in nested mixin calls
+ self.mixin_depth += 1;
+ const current_depth = self.mixin_depth;
- // Set current attrs var for buildAccessor to use
- self.current_attrs_var = attr_var_name;
+ var attr_var_buf: [32]u8 = undefined;
+ const attr_var_name = std.fmt.bufPrint(&attr_var_buf, "mixin_attrs_{d}", .{current_depth}) catch "mixin_attrs";
- try self.mixin_params.append(self.allocator, attr_var_name);
- try self.writeIndent();
- try self.writer.print("const {s}: struct {{\n", .{attr_var_name});
- self.depth += 1;
- // Define fields as optional with defaults
- try self.writeIndent();
- try self.writer.writeAll("class: []const u8 = \"\",\n");
- try self.writeIndent();
- try self.writer.writeAll("id: []const u8 = \"\",\n");
- try self.writeIndent();
- try self.writer.writeAll("style: []const u8 = \"\",\n");
- self.depth -= 1;
- try self.writeIndent();
- try self.writer.writeAll("} = .{\n");
- self.depth += 1;
- for (call.attributes) |attr| {
- // Only emit known attributes (class, id, style for now)
- if (std.mem.eql(u8, attr.name, "class") or
- std.mem.eql(u8, attr.name, "id") or
- std.mem.eql(u8, attr.name, "style"))
- {
- try self.writeIndent();
- try self.writer.print(".{s} = ", .{attr.name});
- if (attr.value) |val| {
- // Check if it's a string literal
- if (val.len >= 2 and (val[0] == '"' or val[0] == '\'')) {
- try self.writer.print("{s},\n", .{val});
+ self.current_attrs_var = attr_var_name;
+ try self.mixin_params.append(self.allocator, attr_var_name);
+
+ try self.writeIndent();
+ try self.writer.print("const {s}: struct {{\n", .{attr_var_name});
+ self.depth += 1;
+ try self.writeIndent();
+ try self.writer.writeAll("class: []const u8 = \"\",\n");
+ try self.writeIndent();
+ try self.writer.writeAll("id: []const u8 = \"\",\n");
+ try self.writeIndent();
+ try self.writer.writeAll("style: []const u8 = \"\",\n");
+ self.depth -= 1;
+ try self.writeIndent();
+ try self.writer.writeAll("} = .{\n");
+ self.depth += 1;
+
+ for (call.attributes) |attr| {
+ if (std.mem.eql(u8, attr.name, "class") or
+ std.mem.eql(u8, attr.name, "id") or
+ std.mem.eql(u8, attr.name, "style"))
+ {
+ try self.writeIndent();
+ try self.writer.print(".{s} = ", .{attr.name});
+ if (attr.value) |val| {
+ if (val.len >= 2 and (val[0] == '"' or val[0] == '\'')) {
+ try self.writer.print("{s},\n", .{val});
+ } else {
+ var accessor_buf: [512]u8 = undefined;
+ const accessor = self.buildAccessor(val, &accessor_buf);
+ try self.writer.print("{s},\n", .{accessor});
+ }
} else {
- // It's a variable reference
- var accessor_buf: [512]u8 = undefined;
- const accessor = self.buildAccessor(val, &accessor_buf);
- try self.writer.print("{s},\n", .{accessor});
+ try self.writer.writeAll("\"\",\n");
}
- } else {
- try self.writer.writeAll("\"\",\n");
}
}
+
+ self.depth -= 1;
+ try self.writeIndent();
+ try self.writer.writeAll("};\n");
}
- self.depth -= 1;
- try self.writeIndent();
- try self.writer.writeAll("};\n");
// Emit mixin body
- // Note: block content (call.block_children) is handled by mixin_block nodes
- // For now, we'll inline the mixin body directly
for (mixin_def.children) |child| {
- // Handle mixin_block specially - replace with call's block_children
if (child == .mixin_block) {
for (call.block_children) |block_child| {
try self.emitNode(block_child);
@@ -1647,22 +1748,15 @@ const Compiler = struct {
}
}
- // Suppress unused variable warning if attributes wasn't used
- if (!self.used_attrs_var) {
- try self.writeIndent();
- try self.writer.print("_ = {s};\n", .{attr_var_name});
- }
-
// Close scope block
try self.flush();
self.depth -= 1;
try self.writeIndent();
try self.writer.writeAll("}\n");
- // Restore previous state
- self.current_attrs_var = prev_attrs_var;
- self.used_attrs_var = prev_used_attrs;
- self.mixin_depth -= 1;
+ if (uses_attributes) {
+ self.mixin_depth -= 1;
+ }
}
/// Attempts to load a mixin from the mixins/ subdirectory.
diff --git a/src/diagnostic.zig b/src/diagnostic.zig
new file mode 100644
index 0000000..02bc75d
--- /dev/null
+++ b/src/diagnostic.zig
@@ -0,0 +1,253 @@
+//! Diagnostic - Rich error reporting for Pug template parsing.
+//!
+//! Provides structured error information including:
+//! - Line and column numbers
+//! - Source code snippet showing the error location
+//! - Descriptive error messages
+//! - Optional fix suggestions
+//!
+//! ## Usage
+//! ```zig
+//! var lexer = Lexer.init(allocator, source);
+//! const tokens = lexer.tokenize() catch |err| {
+//! if (lexer.getDiagnostic()) |diag| {
+//! std.debug.print("{}\n", .{diag});
+//! }
+//! return err;
+//! };
+//! ```
+
+const std = @import("std");
+
+/// Severity level for diagnostics.
+pub const Severity = enum {
+ @"error",
+ warning,
+ hint,
+
+ pub fn toString(self: Severity) []const u8 {
+ return switch (self) {
+ .@"error" => "error",
+ .warning => "warning",
+ .hint => "hint",
+ };
+ }
+};
+
+/// A diagnostic message with rich context about an error or warning.
+pub const Diagnostic = struct {
+ /// Severity level (error, warning, hint)
+ severity: Severity = .@"error",
+ /// 1-based line number where the error occurred
+ line: u32,
+ /// 1-based column number where the error occurred
+ column: u32,
+ /// Length of the problematic span (0 if unknown)
+ length: u32 = 0,
+ /// Human-readable error message
+ message: []const u8,
+ /// Source line containing the error (for snippet display)
+ source_line: ?[]const u8 = null,
+ /// Optional suggestion for fixing the error
+ suggestion: ?[]const u8 = null,
+ /// Optional error code for programmatic handling
+ code: ?[]const u8 = null,
+
+ /// Formats the diagnostic for display.
+ /// Output format:
+ /// ```
+ /// error[E001]: Unterminated string
+ /// --> template.pug:5:12
+ /// |
+ /// 5 | p Hello #{name
+ /// | ^^^^ unterminated interpolation
+ /// |
+ /// = hint: Add closing }
+ /// ```
+ pub fn format(
+ self: Diagnostic,
+ comptime fmt: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+ ) !void {
+ _ = fmt;
+ _ = options;
+
+ // Header: error[CODE]: message
+ try writer.print("{s}", .{self.severity.toString()});
+ if (self.code) |code| {
+ try writer.print("[{s}]", .{code});
+ }
+ try writer.print(": {s}\n", .{self.message});
+
+ // Location: --> file:line:column
+ try writer.print(" --> line {d}:{d}\n", .{ self.line, self.column });
+
+ // Source snippet with caret pointer
+ if (self.source_line) |src| {
+ const line_num_width = digitCount(self.line);
+
+ // Empty line with gutter
+ try writer.writeByteNTimes(' ', line_num_width + 1);
+ try writer.writeAll("|\n");
+
+ // Source line
+ try writer.print("{d} | {s}\n", .{ self.line, src });
+
+ // Caret line pointing to error
+ try writer.writeByteNTimes(' ', line_num_width + 1);
+ try writer.writeAll("| ");
+
+ // Spaces before caret (account for tabs)
+ var col: u32 = 1;
+ for (src) |c| {
+ if (col >= self.column) break;
+ if (c == '\t') {
+ try writer.writeAll(" "); // 4-space tab
+ } else {
+ try writer.writeByte(' ');
+ }
+ col += 1;
+ }
+
+ // Carets for the error span
+ const caret_count = if (self.length > 0) self.length else 1;
+ try writer.writeByteNTimes('^', caret_count);
+ try writer.writeByte('\n');
+ }
+
+ // Suggestion hint
+ if (self.suggestion) |hint| {
+ try writer.print(" = hint: {s}\n", .{hint});
+ }
+ }
+
+ /// Creates a simple diagnostic without source context.
+ pub fn simple(line: u32, column: u32, message: []const u8) Diagnostic {
+ return .{
+ .line = line,
+ .column = column,
+ .message = message,
+ };
+ }
+
+ /// Creates a diagnostic with full context.
+ pub fn withContext(
+ line: u32,
+ column: u32,
+ message: []const u8,
+ source_line: []const u8,
+ suggestion: ?[]const u8,
+ ) Diagnostic {
+ return .{
+ .line = line,
+ .column = column,
+ .message = message,
+ .source_line = source_line,
+ .suggestion = suggestion,
+ };
+ }
+};
+
+/// Returns the number of digits in a number (for alignment).
+fn digitCount(n: u32) usize {
+ if (n == 0) return 1;
+ var count: usize = 0;
+ var val = n;
+ while (val > 0) : (val /= 10) {
+ count += 1;
+ }
+ return count;
+}
+
+/// Extracts a line from source text given a position.
+/// Returns the line content and updates line_start to the beginning of the line.
+pub fn extractSourceLine(source: []const u8, position: usize) ?[]const u8 {
+ if (position >= source.len) return null;
+
+ // Find line start
+ var line_start: usize = position;
+ while (line_start > 0 and source[line_start - 1] != '\n') {
+ line_start -= 1;
+ }
+
+ // Find line end
+ var line_end: usize = position;
+ while (line_end < source.len and source[line_end] != '\n') {
+ line_end += 1;
+ }
+
+ return source[line_start..line_end];
+}
+
+/// Calculates line and column from a byte position in source.
+pub fn positionToLineCol(source: []const u8, position: usize) struct { line: u32, column: u32 } {
+ var line: u32 = 1;
+ var col: u32 = 1;
+ var i: usize = 0;
+
+ while (i < position and i < source.len) : (i += 1) {
+ if (source[i] == '\n') {
+ line += 1;
+ col = 1;
+ } else {
+ col += 1;
+ }
+ }
+
+ return .{ .line = line, .column = col };
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// Tests
+// ─────────────────────────────────────────────────────────────────────────────
+
+test "Diagnostic formatting" {
+ const diag = Diagnostic{
+ .line = 5,
+ .column = 12,
+ .message = "Unterminated interpolation",
+ .source_line = "p Hello #{name",
+ .suggestion = "Add closing }",
+ .code = "E001",
+ };
+
+ var buf: [512]u8 = undefined;
+ var fbs = std.io.fixedBufferStream(&buf);
+ try diag.format("", .{}, fbs.writer());
+
+ const output = fbs.getWritten();
+ try std.testing.expect(std.mem.indexOf(u8, output, "error[E001]") != null);
+ try std.testing.expect(std.mem.indexOf(u8, output, "Unterminated interpolation") != null);
+ try std.testing.expect(std.mem.indexOf(u8, output, "line 5:12") != null);
+ try std.testing.expect(std.mem.indexOf(u8, output, "p Hello #{name") != null);
+ try std.testing.expect(std.mem.indexOf(u8, output, "hint: Add closing }") != null);
+}
+
+test "extractSourceLine" {
+ const source = "line one\nline two\nline three";
+
+ // Position in middle of "line two"
+ const line = extractSourceLine(source, 12);
+ try std.testing.expect(line != null);
+ try std.testing.expectEqualStrings("line two", line.?);
+}
+
+test "positionToLineCol" {
+ const source = "ab\ncde\nfghij";
+
+ // Position 0 = line 1, col 1
+ var pos = positionToLineCol(source, 0);
+ try std.testing.expectEqual(@as(u32, 1), pos.line);
+ try std.testing.expectEqual(@as(u32, 1), pos.column);
+
+ // Position 4 = line 2, col 2 (the 'd' in "cde")
+ pos = positionToLineCol(source, 4);
+ try std.testing.expectEqual(@as(u32, 2), pos.line);
+ try std.testing.expectEqual(@as(u32, 2), pos.column);
+
+ // Position 7 = line 3, col 1 (the 'f' in "fghij")
+ pos = positionToLineCol(source, 7);
+ try std.testing.expectEqual(@as(u32, 3), pos.line);
+ try std.testing.expectEqual(@as(u32, 1), pos.column);
+}
diff --git a/src/lexer.zig b/src/lexer.zig
index d97f2b1..9f08f2c 100644
--- a/src/lexer.zig
+++ b/src/lexer.zig
@@ -3,8 +3,23 @@
//! The lexer handles indentation-based nesting (emitting indent/dedent tokens),
//! Pug-specific syntax (tags, classes, IDs, attributes), and text content
//! including interpolation markers.
+//!
+//! ## Error Diagnostics
+//! When tokenization fails, call `getDiagnostic()` to get rich error info:
+//! ```zig
+//! var lexer = Lexer.init(allocator, source);
+//! const tokens = lexer.tokenize() catch |err| {
+//! if (lexer.getDiagnostic()) |diag| {
+//! std.debug.print("{}\n", .{diag});
+//! }
+//! return err;
+//! };
+//! ```
const std = @import("std");
+const diagnostic = @import("diagnostic.zig");
+
+pub const Diagnostic = diagnostic.Diagnostic;
/// All possible token types produced by the lexer.
pub const TokenType = enum {
@@ -136,6 +151,8 @@ pub const Lexer = struct {
in_raw_block: bool,
raw_block_indent: usize,
raw_block_started: bool,
+ /// Last error diagnostic (populated on error)
+ last_diagnostic: ?Diagnostic,
/// Creates a new lexer for the given source.
/// Does not allocate; allocations happen during tokenize().
@@ -153,9 +170,27 @@ pub const Lexer = struct {
.in_raw_block = false,
.raw_block_indent = 0,
.raw_block_started = false,
+ .last_diagnostic = null,
};
}
+ /// Returns the last error diagnostic, if any.
+ /// Call this after tokenize() returns an error to get detailed error info.
+ pub fn getDiagnostic(self: *const Lexer) ?Diagnostic {
+ return self.last_diagnostic;
+ }
+
+ /// Sets a diagnostic error with full context.
+ fn setDiagnostic(self: *Lexer, message: []const u8, suggestion: ?[]const u8) void {
+ self.last_diagnostic = Diagnostic.withContext(
+ @intCast(self.line),
+ @intCast(self.column),
+ message,
+ diagnostic.extractSourceLine(self.source, self.pos) orelse "",
+ suggestion,
+ );
+ }
+
/// Releases all allocated memory (tokens and indent stack).
/// Call this when done with the lexer, typically via defer.
pub fn deinit(self: *Lexer) void {
@@ -201,6 +236,7 @@ pub const Lexer = struct {
self.column = 1;
self.at_line_start = true;
self.current_indent = 0;
+ self.last_diagnostic = null;
}
/// Appends a token to the output list.
@@ -858,7 +894,10 @@ pub const Lexer = struct {
brace_depth += 1;
} else if (c == '}') {
if (brace_depth == 0) {
- // Unmatched closing brace - shouldn't happen if called correctly
+ self.setDiagnostic(
+ "Unmatched closing brace '}'",
+ "Remove the extra '}' or add a matching '{'",
+ );
return LexerError.UnmatchedBrace;
}
brace_depth -= 1;
@@ -872,6 +911,10 @@ pub const Lexer = struct {
// Check for unterminated object literal
if (brace_depth > 0) {
+ self.setDiagnostic(
+ "Unterminated object literal - missing closing '}'",
+ "Add a closing '}' to complete the object",
+ );
return LexerError.UnterminatedString;
}
@@ -889,6 +932,10 @@ pub const Lexer = struct {
bracket_depth += 1;
} else if (c == ']') {
if (bracket_depth == 0) {
+ self.setDiagnostic(
+ "Unmatched closing bracket ']'",
+ "Remove the extra ']' or add a matching '['",
+ );
return LexerError.UnmatchedBrace;
}
bracket_depth -= 1;
@@ -901,6 +948,10 @@ pub const Lexer = struct {
}
if (bracket_depth > 0) {
+ self.setDiagnostic(
+ "Unterminated array literal - missing closing ']'",
+ "Add a closing ']' to complete the array",
+ );
return LexerError.UnterminatedString;
}
diff --git a/src/parser.zig b/src/parser.zig
index eb494cd..355a786 100644
--- a/src/parser.zig
+++ b/src/parser.zig
@@ -6,10 +6,23 @@
//! - Element construction (tag, classes, id, attributes)
//! - Control flow (if/else, each, while)
//! - Mixins, includes, and template inheritance
+//!
+//! ## Error Diagnostics
+//! When parsing fails, call `getDiagnostic()` to get rich error info:
+//! ```zig
+//! var parser = Parser.init(allocator, tokens);
+//! const doc = parser.parse() catch |err| {
+//! if (parser.getDiagnostic()) |diag| {
+//! std.debug.print("{}\n", .{diag});
+//! }
+//! return err;
+//! };
+//! ```
const std = @import("std");
const lexer = @import("lexer.zig");
const ast = @import("ast.zig");
+const diagnostic = @import("diagnostic.zig");
const Token = lexer.Token;
const TokenType = lexer.TokenType;
@@ -17,6 +30,8 @@ const Node = ast.Node;
const Attribute = ast.Attribute;
const TextSegment = ast.TextSegment;
+pub const Diagnostic = diagnostic.Diagnostic;
+
/// Errors that can occur during parsing.
pub const ParserError = error{
UnexpectedToken,
@@ -42,6 +57,10 @@ pub const Parser = struct {
tokens: []const Token,
pos: usize,
allocator: std.mem.Allocator,
+ /// Original source text (for error snippets)
+ source: ?[]const u8,
+ /// Last error diagnostic (populated on error)
+ last_diagnostic: ?Diagnostic,
/// Creates a new parser for the given tokens.
pub fn init(allocator: std.mem.Allocator, tokens: []const Token) Parser {
@@ -49,6 +68,53 @@ pub const Parser = struct {
.tokens = tokens,
.pos = 0,
.allocator = allocator,
+ .source = null,
+ .last_diagnostic = null,
+ };
+ }
+
+ /// Creates a parser with source text for better error messages.
+ pub fn initWithSource(allocator: std.mem.Allocator, tokens: []const Token, source: []const u8) Parser {
+ return .{
+ .tokens = tokens,
+ .pos = 0,
+ .allocator = allocator,
+ .source = source,
+ .last_diagnostic = null,
+ };
+ }
+
+ /// Returns the last error diagnostic, if any.
+ /// Call this after parse() returns an error to get detailed error info.
+ pub fn getDiagnostic(self: *const Parser) ?Diagnostic {
+ return self.last_diagnostic;
+ }
+
+ /// Sets a diagnostic error with context from the current token.
+ fn setDiagnostic(self: *Parser, message: []const u8, suggestion: ?[]const u8) void {
+ const token = if (self.pos < self.tokens.len) self.tokens[self.pos] else self.tokens[self.tokens.len - 1];
+ const source_line = if (self.source) |src|
+ diagnostic.extractSourceLine(src, 0) // Would need position mapping
+ else
+ null;
+
+ self.last_diagnostic = .{
+ .line = @intCast(token.line),
+ .column = @intCast(token.column),
+ .message = message,
+ .source_line = source_line,
+ .suggestion = suggestion,
+ };
+ }
+
+ /// Sets a diagnostic error for a specific token.
+ fn setDiagnosticAtToken(self: *Parser, token: Token, message: []const u8, suggestion: ?[]const u8) void {
+ self.last_diagnostic = .{
+ .line = @intCast(token.line),
+ .column = @intCast(token.column),
+ .message = message,
+ .source_line = null,
+ .suggestion = suggestion,
};
}
@@ -562,6 +628,10 @@ pub const Parser = struct {
value_name = before_in;
}
} else {
+ self.setDiagnostic(
+ "Missing collection in 'each' loop - expected 'in' keyword",
+ "Use syntax: each item in collection",
+ );
return ParserError.MissingCollection;
}
} else if (self.check(.tag)) {
@@ -584,6 +654,10 @@ pub const Parser = struct {
// Parse collection expression
collection = try self.parseRestOfLine();
} else {
+ self.setDiagnostic(
+ "Missing iterator variable in 'each' loop",
+ "Use syntax: each item in collection",
+ );
return ParserError.MissingIterator;
}
@@ -774,6 +848,10 @@ pub const Parser = struct {
if (self.check(.tag)) {
name = self.advance().value;
} else {
+ self.setDiagnostic(
+ "Missing mixin name after 'mixin' keyword",
+ "Use syntax: mixin name(params)",
+ );
return ParserError.MissingMixinName;
}
@@ -973,6 +1051,10 @@ pub const Parser = struct {
// No name - this is a mixin block placeholder
return .{ .mixin_block = {} };
} else {
+ self.setDiagnostic(
+ "Missing block name after 'block' keyword",
+ "Use syntax: block name",
+ );
return ParserError.MissingBlockName;
}
@@ -1005,6 +1087,10 @@ pub const Parser = struct {
} else if (self.check(.text)) {
name = std.mem.trim(u8, self.advance().value, " \t");
} else {
+ self.setDiagnostic(
+ "Missing block name after 'append' or 'prepend'",
+ "Use syntax: append blockname or prepend blockname",
+ );
return ParserError.MissingBlockName;
}
diff --git a/src/root.zig b/src/root.zig
index 5ccd8b7..1aa3d39 100644
--- a/src/root.zig
+++ b/src/root.zig
@@ -34,6 +34,7 @@ pub const parser = @import("parser.zig");
pub const codegen = @import("codegen.zig");
pub const runtime = @import("runtime.zig");
pub const view_engine = @import("view_engine.zig");
+pub const diagnostic = @import("diagnostic.zig");
// Re-export main types for convenience
pub const Lexer = lexer.Lexer;