diff --git a/CLAUDE.md b/CLAUDE.md index fba20d3..4f40325 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -213,10 +213,10 @@ button(disabled=true) // disabled="disabled" ### Text & Interpolation ```pug -p Hello #{name} // escaped interpolation -p Hello !{rawHtml} // unescaped interpolation -p= variable // buffered code (escaped) -p!= rawVariable // buffered code (unescaped) +p Hello #{name} // escaped interpolation (SAFE - default) +p Hello !{rawHtml} // unescaped interpolation (UNSAFE - trusted content only) +p= variable // buffered code (escaped, SAFE) +p!= rawVariable // buffered code (unescaped, UNSAFE) | Piped text line p. Multi-line @@ -227,6 +227,8 @@ p. h1.header #{title} // renders

Title Value

``` +**Security Note**: By default, `#{}` and `=` escape HTML entities (`<`, `>`, `&`, `"`, `'`) to prevent XSS attacks. Only use `!{}` or `!=` for content you fully trust (e.g., pre-sanitized HTML from your own code). Never use unescaped output for user-provided data. + ### Tag Interpolation ```pug p This is #[em emphasized] text diff --git a/build.zig.zon b/build.zig.zon index 456a76c..2df70e5 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -1,6 +1,6 @@ .{ .name = .pugz, - .version = "0.2.0", + .version = "0.2.1", .fingerprint = 0x822db0790e17621d, // Changing this has security and trust implications. .minimum_zig_version = "0.15.2", .dependencies = .{}, diff --git a/src/build_templates.zig b/src/build_templates.zig index 12c56dc..76f65e3 100644 --- a/src/build_templates.zig +++ b/src/build_templates.zig @@ -36,7 +36,9 @@ //! ``` const std = @import("std"); -const Lexer = @import("lexer.zig").Lexer; +const lexer_mod = @import("lexer.zig"); +const Lexer = lexer_mod.Lexer; +const Diagnostic = lexer_mod.Diagnostic; const Parser = @import("parser.zig").Parser; const ast = @import("ast.zig"); @@ -316,6 +318,17 @@ fn generateSingleFile( try file.writeAll(out.items); } +/// Logs a diagnostic error with file location in compiler-style format. +fn logDiagnostic(file_path: []const u8, diag: Diagnostic) void { + std.log.err("{s}:{d}:{d}: {s}", .{ file_path, diag.line, diag.column, diag.message }); + if (diag.source_line) |src_line| { + std.log.err(" | {s}", .{src_line}); + } + if (diag.suggestion) |hint| { + std.log.err(" = hint: {s}", .{hint}); + } +} + /// Compiles a single .pug template into a Zig function. /// Handles three cases: /// - Empty templates: return "" @@ -332,13 +345,21 @@ fn compileTemplate( var lexer = Lexer.init(allocator, source); defer lexer.deinit(); const tokens = lexer.tokenize() catch |err| { - std.log.err("Tokenize error in '{s}': {}", .{ name, err }); + if (lexer.getDiagnostic()) |diag| { + logDiagnostic(name, diag); + } else { + std.log.err("Tokenize error in '{s}': {}", .{ name, err }); + } return err; }; - var parser = Parser.init(allocator, tokens); + var parser = Parser.initWithSource(allocator, tokens, source); const doc = parser.parse() catch |err| { - std.log.err("Parse error in '{s}': {}", .{ name, err }); + if (parser.getDiagnostic()) |diag| { + logDiagnostic(name, diag); + } else { + std.log.err("Parse error in '{s}': {}", .{ name, err }); + } return err; }; @@ -487,6 +508,86 @@ fn nodeHasDynamic(node: ast.Node) bool { }; } +/// Checks if a mixin body references `attributes` (for &attributes pass-through). +/// Used to avoid emitting unused mixin_attrs struct in generated code. +fn mixinUsesAttributes(nodes: []const ast.Node) bool { + for (nodes) |node| { + switch (node) { + .element => |e| { + // Check spread_attributes field + if (e.spread_attributes != null) return true; + + // Check attribute values for 'attributes' reference + for (e.attributes) |attr| { + if (attr.value) |val| { + if (exprReferencesAttributes(val)) return true; + } + } + + // Check inline text for interpolated attributes reference + if (e.inline_text) |segs| { + if (textSegmentsReferenceAttributes(segs)) return true; + } + + // Check buffered code + if (e.buffered_code) |bc| { + if (exprReferencesAttributes(bc.expression)) return true; + } + + // Recurse into children + if (mixinUsesAttributes(e.children)) return true; + }, + .text => |t| { + if (textSegmentsReferenceAttributes(t.segments)) return true; + }, + .conditional => |c| { + for (c.branches) |br| { + if (mixinUsesAttributes(br.children)) return true; + } + }, + .each => |e| { + if (mixinUsesAttributes(e.children)) return true; + if (mixinUsesAttributes(e.else_children)) return true; + }, + .case => |c| { + for (c.whens) |when| { + if (mixinUsesAttributes(when.children)) return true; + } + if (mixinUsesAttributes(c.default_children)) return true; + }, + .block => |b| { + if (mixinUsesAttributes(b.children)) return true; + }, + else => {}, + } + } + return false; +} + +/// Checks if an expression string references 'attributes' (e.g., "attributes.class"). +fn exprReferencesAttributes(expr: []const u8) bool { + // Check for 'attributes' as standalone or prefix (attributes.class, attributes.id, etc.) + if (std.mem.startsWith(u8, expr, "attributes")) { + // Must be exactly "attributes" or "attributes." followed by more + if (expr.len == 10) return true; // exactly "attributes" + if (expr.len > 10 and expr[10] == '.') return true; // "attributes.something" + } + return false; +} + +/// Checks if text segments contain interpolations referencing 'attributes'. +fn textSegmentsReferenceAttributes(segs: []const ast.TextSegment) bool { + for (segs) |seg| { + switch (seg) { + .interp_escaped, .interp_unescaped => |expr| { + if (exprReferencesAttributes(expr)) return true; + }, + else => {}, + } + } + return false; +} + /// Zig reserved keywords - field names matching these must be escaped with @"..." /// when used in generated code (e.g., @"type" instead of type) const zig_keywords = std.StaticStringMap(void).initComptime(.{ @@ -575,7 +676,6 @@ const Compiler = struct { uses_data: bool, // True if template accesses the data parameter 'd' mixin_depth: usize, // Nesting level for generating unique mixin variable names current_attrs_var: ?[]const u8, // Variable name for current mixin's &attributes - used_attrs_var: bool, // True if current mixin accessed its attributes fn init( allocator: std.mem.Allocator, @@ -597,7 +697,6 @@ const Compiler = struct { .uses_data = false, .mixin_depth = 0, .current_attrs_var = null, - .used_attrs_var = false, }; } @@ -689,13 +788,21 @@ const Compiler = struct { var lexer = Lexer.init(self.allocator, source); const tokens = lexer.tokenize() catch |err| { - std.log.err("Tokenize error in included template '{s}': {}", .{ path, err }); + if (lexer.getDiagnostic()) |diag| { + logDiagnostic(path, diag); + } else { + std.log.err("Tokenize error in included template '{s}': {}", .{ path, err }); + } return err; }; - var parser = Parser.init(self.allocator, tokens); + var parser = Parser.initWithSource(self.allocator, tokens, source); return parser.parse() catch |err| { - std.log.err("Parse error in included template '{s}': {}", .{ path, err }); + if (parser.getDiagnostic()) |diag| { + logDiagnostic(path, diag); + } else { + std.log.err("Parse error in included template '{s}': {}", .{ path, err }); + } return err; }; } @@ -1195,7 +1302,6 @@ const Compiler = struct { // Special case: attributes.X should use current mixin's attributes variable if (std.mem.eql(u8, base, "attributes")) { if (self.current_attrs_var) |attrs_var| { - self.used_attrs_var = true; return std.fmt.bufPrint(buf, "{s}.{s}", .{ attrs_var, rest }) catch expr; } } @@ -1215,7 +1321,6 @@ const Compiler = struct { // Special case: 'attributes' alone should use current mixin's attributes variable if (std.mem.eql(u8, expr, "attributes")) { if (self.current_attrs_var) |attrs_var| { - self.used_attrs_var = true; return attrs_var; } } @@ -1573,71 +1678,67 @@ const Compiler = struct { try self.writer.writeAll("};\n"); } - // Handle mixin call attributes: +mixin(args)(class="foo", data-id="bar") - // Create an 'attributes' struct with optional fields that the mixin body can access - // Use unique name based on mixin depth to avoid shadowing in nested mixin calls - self.mixin_depth += 1; - const current_depth = self.mixin_depth; + // Check if mixin body actually uses &attributes before emitting the struct + const uses_attributes = mixinUsesAttributes(mixin_def.children); // Save previous attrs var and restore after mixin body const prev_attrs_var = self.current_attrs_var; - const prev_used_attrs = self.used_attrs_var; - self.used_attrs_var = false; + defer self.current_attrs_var = prev_attrs_var; - // Generate unique attribute variable name for this mixin depth - var attr_var_buf: [32]u8 = undefined; - const attr_var_name = std.fmt.bufPrint(&attr_var_buf, "mixin_attrs_{d}", .{current_depth}) catch "mixin_attrs"; + // Only emit attributes struct if the mixin actually uses it + if (uses_attributes) { + // Use unique name based on mixin depth to avoid shadowing in nested mixin calls + self.mixin_depth += 1; + const current_depth = self.mixin_depth; - // Set current attrs var for buildAccessor to use - self.current_attrs_var = attr_var_name; + var attr_var_buf: [32]u8 = undefined; + const attr_var_name = std.fmt.bufPrint(&attr_var_buf, "mixin_attrs_{d}", .{current_depth}) catch "mixin_attrs"; - try self.mixin_params.append(self.allocator, attr_var_name); - try self.writeIndent(); - try self.writer.print("const {s}: struct {{\n", .{attr_var_name}); - self.depth += 1; - // Define fields as optional with defaults - try self.writeIndent(); - try self.writer.writeAll("class: []const u8 = \"\",\n"); - try self.writeIndent(); - try self.writer.writeAll("id: []const u8 = \"\",\n"); - try self.writeIndent(); - try self.writer.writeAll("style: []const u8 = \"\",\n"); - self.depth -= 1; - try self.writeIndent(); - try self.writer.writeAll("} = .{\n"); - self.depth += 1; - for (call.attributes) |attr| { - // Only emit known attributes (class, id, style for now) - if (std.mem.eql(u8, attr.name, "class") or - std.mem.eql(u8, attr.name, "id") or - std.mem.eql(u8, attr.name, "style")) - { - try self.writeIndent(); - try self.writer.print(".{s} = ", .{attr.name}); - if (attr.value) |val| { - // Check if it's a string literal - if (val.len >= 2 and (val[0] == '"' or val[0] == '\'')) { - try self.writer.print("{s},\n", .{val}); + self.current_attrs_var = attr_var_name; + try self.mixin_params.append(self.allocator, attr_var_name); + + try self.writeIndent(); + try self.writer.print("const {s}: struct {{\n", .{attr_var_name}); + self.depth += 1; + try self.writeIndent(); + try self.writer.writeAll("class: []const u8 = \"\",\n"); + try self.writeIndent(); + try self.writer.writeAll("id: []const u8 = \"\",\n"); + try self.writeIndent(); + try self.writer.writeAll("style: []const u8 = \"\",\n"); + self.depth -= 1; + try self.writeIndent(); + try self.writer.writeAll("} = .{\n"); + self.depth += 1; + + for (call.attributes) |attr| { + if (std.mem.eql(u8, attr.name, "class") or + std.mem.eql(u8, attr.name, "id") or + std.mem.eql(u8, attr.name, "style")) + { + try self.writeIndent(); + try self.writer.print(".{s} = ", .{attr.name}); + if (attr.value) |val| { + if (val.len >= 2 and (val[0] == '"' or val[0] == '\'')) { + try self.writer.print("{s},\n", .{val}); + } else { + var accessor_buf: [512]u8 = undefined; + const accessor = self.buildAccessor(val, &accessor_buf); + try self.writer.print("{s},\n", .{accessor}); + } } else { - // It's a variable reference - var accessor_buf: [512]u8 = undefined; - const accessor = self.buildAccessor(val, &accessor_buf); - try self.writer.print("{s},\n", .{accessor}); + try self.writer.writeAll("\"\",\n"); } - } else { - try self.writer.writeAll("\"\",\n"); } } + + self.depth -= 1; + try self.writeIndent(); + try self.writer.writeAll("};\n"); } - self.depth -= 1; - try self.writeIndent(); - try self.writer.writeAll("};\n"); // Emit mixin body - // Note: block content (call.block_children) is handled by mixin_block nodes - // For now, we'll inline the mixin body directly for (mixin_def.children) |child| { - // Handle mixin_block specially - replace with call's block_children if (child == .mixin_block) { for (call.block_children) |block_child| { try self.emitNode(block_child); @@ -1647,22 +1748,15 @@ const Compiler = struct { } } - // Suppress unused variable warning if attributes wasn't used - if (!self.used_attrs_var) { - try self.writeIndent(); - try self.writer.print("_ = {s};\n", .{attr_var_name}); - } - // Close scope block try self.flush(); self.depth -= 1; try self.writeIndent(); try self.writer.writeAll("}\n"); - // Restore previous state - self.current_attrs_var = prev_attrs_var; - self.used_attrs_var = prev_used_attrs; - self.mixin_depth -= 1; + if (uses_attributes) { + self.mixin_depth -= 1; + } } /// Attempts to load a mixin from the mixins/ subdirectory. diff --git a/src/diagnostic.zig b/src/diagnostic.zig new file mode 100644 index 0000000..02bc75d --- /dev/null +++ b/src/diagnostic.zig @@ -0,0 +1,253 @@ +//! Diagnostic - Rich error reporting for Pug template parsing. +//! +//! Provides structured error information including: +//! - Line and column numbers +//! - Source code snippet showing the error location +//! - Descriptive error messages +//! - Optional fix suggestions +//! +//! ## Usage +//! ```zig +//! var lexer = Lexer.init(allocator, source); +//! const tokens = lexer.tokenize() catch |err| { +//! if (lexer.getDiagnostic()) |diag| { +//! std.debug.print("{}\n", .{diag}); +//! } +//! return err; +//! }; +//! ``` + +const std = @import("std"); + +/// Severity level for diagnostics. +pub const Severity = enum { + @"error", + warning, + hint, + + pub fn toString(self: Severity) []const u8 { + return switch (self) { + .@"error" => "error", + .warning => "warning", + .hint => "hint", + }; + } +}; + +/// A diagnostic message with rich context about an error or warning. +pub const Diagnostic = struct { + /// Severity level (error, warning, hint) + severity: Severity = .@"error", + /// 1-based line number where the error occurred + line: u32, + /// 1-based column number where the error occurred + column: u32, + /// Length of the problematic span (0 if unknown) + length: u32 = 0, + /// Human-readable error message + message: []const u8, + /// Source line containing the error (for snippet display) + source_line: ?[]const u8 = null, + /// Optional suggestion for fixing the error + suggestion: ?[]const u8 = null, + /// Optional error code for programmatic handling + code: ?[]const u8 = null, + + /// Formats the diagnostic for display. + /// Output format: + /// ``` + /// error[E001]: Unterminated string + /// --> template.pug:5:12 + /// | + /// 5 | p Hello #{name + /// | ^^^^ unterminated interpolation + /// | + /// = hint: Add closing } + /// ``` + pub fn format( + self: Diagnostic, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = fmt; + _ = options; + + // Header: error[CODE]: message + try writer.print("{s}", .{self.severity.toString()}); + if (self.code) |code| { + try writer.print("[{s}]", .{code}); + } + try writer.print(": {s}\n", .{self.message}); + + // Location: --> file:line:column + try writer.print(" --> line {d}:{d}\n", .{ self.line, self.column }); + + // Source snippet with caret pointer + if (self.source_line) |src| { + const line_num_width = digitCount(self.line); + + // Empty line with gutter + try writer.writeByteNTimes(' ', line_num_width + 1); + try writer.writeAll("|\n"); + + // Source line + try writer.print("{d} | {s}\n", .{ self.line, src }); + + // Caret line pointing to error + try writer.writeByteNTimes(' ', line_num_width + 1); + try writer.writeAll("| "); + + // Spaces before caret (account for tabs) + var col: u32 = 1; + for (src) |c| { + if (col >= self.column) break; + if (c == '\t') { + try writer.writeAll(" "); // 4-space tab + } else { + try writer.writeByte(' '); + } + col += 1; + } + + // Carets for the error span + const caret_count = if (self.length > 0) self.length else 1; + try writer.writeByteNTimes('^', caret_count); + try writer.writeByte('\n'); + } + + // Suggestion hint + if (self.suggestion) |hint| { + try writer.print(" = hint: {s}\n", .{hint}); + } + } + + /// Creates a simple diagnostic without source context. + pub fn simple(line: u32, column: u32, message: []const u8) Diagnostic { + return .{ + .line = line, + .column = column, + .message = message, + }; + } + + /// Creates a diagnostic with full context. + pub fn withContext( + line: u32, + column: u32, + message: []const u8, + source_line: []const u8, + suggestion: ?[]const u8, + ) Diagnostic { + return .{ + .line = line, + .column = column, + .message = message, + .source_line = source_line, + .suggestion = suggestion, + }; + } +}; + +/// Returns the number of digits in a number (for alignment). +fn digitCount(n: u32) usize { + if (n == 0) return 1; + var count: usize = 0; + var val = n; + while (val > 0) : (val /= 10) { + count += 1; + } + return count; +} + +/// Extracts a line from source text given a position. +/// Returns the line content and updates line_start to the beginning of the line. +pub fn extractSourceLine(source: []const u8, position: usize) ?[]const u8 { + if (position >= source.len) return null; + + // Find line start + var line_start: usize = position; + while (line_start > 0 and source[line_start - 1] != '\n') { + line_start -= 1; + } + + // Find line end + var line_end: usize = position; + while (line_end < source.len and source[line_end] != '\n') { + line_end += 1; + } + + return source[line_start..line_end]; +} + +/// Calculates line and column from a byte position in source. +pub fn positionToLineCol(source: []const u8, position: usize) struct { line: u32, column: u32 } { + var line: u32 = 1; + var col: u32 = 1; + var i: usize = 0; + + while (i < position and i < source.len) : (i += 1) { + if (source[i] == '\n') { + line += 1; + col = 1; + } else { + col += 1; + } + } + + return .{ .line = line, .column = col }; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Tests +// ───────────────────────────────────────────────────────────────────────────── + +test "Diagnostic formatting" { + const diag = Diagnostic{ + .line = 5, + .column = 12, + .message = "Unterminated interpolation", + .source_line = "p Hello #{name", + .suggestion = "Add closing }", + .code = "E001", + }; + + var buf: [512]u8 = undefined; + var fbs = std.io.fixedBufferStream(&buf); + try diag.format("", .{}, fbs.writer()); + + const output = fbs.getWritten(); + try std.testing.expect(std.mem.indexOf(u8, output, "error[E001]") != null); + try std.testing.expect(std.mem.indexOf(u8, output, "Unterminated interpolation") != null); + try std.testing.expect(std.mem.indexOf(u8, output, "line 5:12") != null); + try std.testing.expect(std.mem.indexOf(u8, output, "p Hello #{name") != null); + try std.testing.expect(std.mem.indexOf(u8, output, "hint: Add closing }") != null); +} + +test "extractSourceLine" { + const source = "line one\nline two\nline three"; + + // Position in middle of "line two" + const line = extractSourceLine(source, 12); + try std.testing.expect(line != null); + try std.testing.expectEqualStrings("line two", line.?); +} + +test "positionToLineCol" { + const source = "ab\ncde\nfghij"; + + // Position 0 = line 1, col 1 + var pos = positionToLineCol(source, 0); + try std.testing.expectEqual(@as(u32, 1), pos.line); + try std.testing.expectEqual(@as(u32, 1), pos.column); + + // Position 4 = line 2, col 2 (the 'd' in "cde") + pos = positionToLineCol(source, 4); + try std.testing.expectEqual(@as(u32, 2), pos.line); + try std.testing.expectEqual(@as(u32, 2), pos.column); + + // Position 7 = line 3, col 1 (the 'f' in "fghij") + pos = positionToLineCol(source, 7); + try std.testing.expectEqual(@as(u32, 3), pos.line); + try std.testing.expectEqual(@as(u32, 1), pos.column); +} diff --git a/src/lexer.zig b/src/lexer.zig index d97f2b1..9f08f2c 100644 --- a/src/lexer.zig +++ b/src/lexer.zig @@ -3,8 +3,23 @@ //! The lexer handles indentation-based nesting (emitting indent/dedent tokens), //! Pug-specific syntax (tags, classes, IDs, attributes), and text content //! including interpolation markers. +//! +//! ## Error Diagnostics +//! When tokenization fails, call `getDiagnostic()` to get rich error info: +//! ```zig +//! var lexer = Lexer.init(allocator, source); +//! const tokens = lexer.tokenize() catch |err| { +//! if (lexer.getDiagnostic()) |diag| { +//! std.debug.print("{}\n", .{diag}); +//! } +//! return err; +//! }; +//! ``` const std = @import("std"); +const diagnostic = @import("diagnostic.zig"); + +pub const Diagnostic = diagnostic.Diagnostic; /// All possible token types produced by the lexer. pub const TokenType = enum { @@ -136,6 +151,8 @@ pub const Lexer = struct { in_raw_block: bool, raw_block_indent: usize, raw_block_started: bool, + /// Last error diagnostic (populated on error) + last_diagnostic: ?Diagnostic, /// Creates a new lexer for the given source. /// Does not allocate; allocations happen during tokenize(). @@ -153,9 +170,27 @@ pub const Lexer = struct { .in_raw_block = false, .raw_block_indent = 0, .raw_block_started = false, + .last_diagnostic = null, }; } + /// Returns the last error diagnostic, if any. + /// Call this after tokenize() returns an error to get detailed error info. + pub fn getDiagnostic(self: *const Lexer) ?Diagnostic { + return self.last_diagnostic; + } + + /// Sets a diagnostic error with full context. + fn setDiagnostic(self: *Lexer, message: []const u8, suggestion: ?[]const u8) void { + self.last_diagnostic = Diagnostic.withContext( + @intCast(self.line), + @intCast(self.column), + message, + diagnostic.extractSourceLine(self.source, self.pos) orelse "", + suggestion, + ); + } + /// Releases all allocated memory (tokens and indent stack). /// Call this when done with the lexer, typically via defer. pub fn deinit(self: *Lexer) void { @@ -201,6 +236,7 @@ pub const Lexer = struct { self.column = 1; self.at_line_start = true; self.current_indent = 0; + self.last_diagnostic = null; } /// Appends a token to the output list. @@ -858,7 +894,10 @@ pub const Lexer = struct { brace_depth += 1; } else if (c == '}') { if (brace_depth == 0) { - // Unmatched closing brace - shouldn't happen if called correctly + self.setDiagnostic( + "Unmatched closing brace '}'", + "Remove the extra '}' or add a matching '{'", + ); return LexerError.UnmatchedBrace; } brace_depth -= 1; @@ -872,6 +911,10 @@ pub const Lexer = struct { // Check for unterminated object literal if (brace_depth > 0) { + self.setDiagnostic( + "Unterminated object literal - missing closing '}'", + "Add a closing '}' to complete the object", + ); return LexerError.UnterminatedString; } @@ -889,6 +932,10 @@ pub const Lexer = struct { bracket_depth += 1; } else if (c == ']') { if (bracket_depth == 0) { + self.setDiagnostic( + "Unmatched closing bracket ']'", + "Remove the extra ']' or add a matching '['", + ); return LexerError.UnmatchedBrace; } bracket_depth -= 1; @@ -901,6 +948,10 @@ pub const Lexer = struct { } if (bracket_depth > 0) { + self.setDiagnostic( + "Unterminated array literal - missing closing ']'", + "Add a closing ']' to complete the array", + ); return LexerError.UnterminatedString; } diff --git a/src/parser.zig b/src/parser.zig index eb494cd..355a786 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -6,10 +6,23 @@ //! - Element construction (tag, classes, id, attributes) //! - Control flow (if/else, each, while) //! - Mixins, includes, and template inheritance +//! +//! ## Error Diagnostics +//! When parsing fails, call `getDiagnostic()` to get rich error info: +//! ```zig +//! var parser = Parser.init(allocator, tokens); +//! const doc = parser.parse() catch |err| { +//! if (parser.getDiagnostic()) |diag| { +//! std.debug.print("{}\n", .{diag}); +//! } +//! return err; +//! }; +//! ``` const std = @import("std"); const lexer = @import("lexer.zig"); const ast = @import("ast.zig"); +const diagnostic = @import("diagnostic.zig"); const Token = lexer.Token; const TokenType = lexer.TokenType; @@ -17,6 +30,8 @@ const Node = ast.Node; const Attribute = ast.Attribute; const TextSegment = ast.TextSegment; +pub const Diagnostic = diagnostic.Diagnostic; + /// Errors that can occur during parsing. pub const ParserError = error{ UnexpectedToken, @@ -42,6 +57,10 @@ pub const Parser = struct { tokens: []const Token, pos: usize, allocator: std.mem.Allocator, + /// Original source text (for error snippets) + source: ?[]const u8, + /// Last error diagnostic (populated on error) + last_diagnostic: ?Diagnostic, /// Creates a new parser for the given tokens. pub fn init(allocator: std.mem.Allocator, tokens: []const Token) Parser { @@ -49,6 +68,53 @@ pub const Parser = struct { .tokens = tokens, .pos = 0, .allocator = allocator, + .source = null, + .last_diagnostic = null, + }; + } + + /// Creates a parser with source text for better error messages. + pub fn initWithSource(allocator: std.mem.Allocator, tokens: []const Token, source: []const u8) Parser { + return .{ + .tokens = tokens, + .pos = 0, + .allocator = allocator, + .source = source, + .last_diagnostic = null, + }; + } + + /// Returns the last error diagnostic, if any. + /// Call this after parse() returns an error to get detailed error info. + pub fn getDiagnostic(self: *const Parser) ?Diagnostic { + return self.last_diagnostic; + } + + /// Sets a diagnostic error with context from the current token. + fn setDiagnostic(self: *Parser, message: []const u8, suggestion: ?[]const u8) void { + const token = if (self.pos < self.tokens.len) self.tokens[self.pos] else self.tokens[self.tokens.len - 1]; + const source_line = if (self.source) |src| + diagnostic.extractSourceLine(src, 0) // Would need position mapping + else + null; + + self.last_diagnostic = .{ + .line = @intCast(token.line), + .column = @intCast(token.column), + .message = message, + .source_line = source_line, + .suggestion = suggestion, + }; + } + + /// Sets a diagnostic error for a specific token. + fn setDiagnosticAtToken(self: *Parser, token: Token, message: []const u8, suggestion: ?[]const u8) void { + self.last_diagnostic = .{ + .line = @intCast(token.line), + .column = @intCast(token.column), + .message = message, + .source_line = null, + .suggestion = suggestion, }; } @@ -562,6 +628,10 @@ pub const Parser = struct { value_name = before_in; } } else { + self.setDiagnostic( + "Missing collection in 'each' loop - expected 'in' keyword", + "Use syntax: each item in collection", + ); return ParserError.MissingCollection; } } else if (self.check(.tag)) { @@ -584,6 +654,10 @@ pub const Parser = struct { // Parse collection expression collection = try self.parseRestOfLine(); } else { + self.setDiagnostic( + "Missing iterator variable in 'each' loop", + "Use syntax: each item in collection", + ); return ParserError.MissingIterator; } @@ -774,6 +848,10 @@ pub const Parser = struct { if (self.check(.tag)) { name = self.advance().value; } else { + self.setDiagnostic( + "Missing mixin name after 'mixin' keyword", + "Use syntax: mixin name(params)", + ); return ParserError.MissingMixinName; } @@ -973,6 +1051,10 @@ pub const Parser = struct { // No name - this is a mixin block placeholder return .{ .mixin_block = {} }; } else { + self.setDiagnostic( + "Missing block name after 'block' keyword", + "Use syntax: block name", + ); return ParserError.MissingBlockName; } @@ -1005,6 +1087,10 @@ pub const Parser = struct { } else if (self.check(.text)) { name = std.mem.trim(u8, self.advance().value, " \t"); } else { + self.setDiagnostic( + "Missing block name after 'append' or 'prepend'", + "Use syntax: append blockname or prepend blockname", + ); return ParserError.MissingBlockName; } diff --git a/src/root.zig b/src/root.zig index 5ccd8b7..1aa3d39 100644 --- a/src/root.zig +++ b/src/root.zig @@ -34,6 +34,7 @@ pub const parser = @import("parser.zig"); pub const codegen = @import("codegen.zig"); pub const runtime = @import("runtime.zig"); pub const view_engine = @import("view_engine.zig"); +pub const diagnostic = @import("diagnostic.zig"); // Re-export main types for convenience pub const Lexer = lexer.Lexer;