refactor: consolidate shared utilities to runtime.zig

- Move isHtmlEntity to runtime.zig (was duplicated in codegen.zig and template.zig) - Move appendTextEscaped to runtime.zig (was in template.zig) - Add isXhtmlDoctype helper to runtime.zig for doctype detection - Update template.zig to use codegen.void_elements instead of local isSelfClosing - Update codegen.zig and zig_codegen.zig to use shared functions - Update CLAUDE.md with shared utilities documentation This establishes runtime.zig as the single source of truth for shared utilities across all three rendering modes (codegen, template, zig_codegen).
2026-01-29 22:27:57 +05:30
parent c7d53e56a9
commit b53aa16010
6 changed files with 132 additions and 162 deletions
--- a/build.zig.zon
+++ b/build.zig.zon
@@ -1,6 +1,6 @@
 .{
    .name = .pugz,
-    .version = "0.3.6",
+    .version = "0.3.7",
    .fingerprint = 0x822db0790e17621d, // Changing this has security and trust implications.
    .minimum_zig_version = "0.15.2",
    .dependencies = .{},
--- a/docs/CLAUDE.md
+++ b/docs/CLAUDE.md
@@ -44,13 +44,28 @@ Source → Lexer → Tokens → StripComments → Parser → AST → Linker →

 **codegen.zig**, **template.zig**, and **zig_codegen.zig** all consume the AST from the parser. When fixing bugs related to AST structure (like attribute handling, class merging, etc.), prefer fixing in **parser.zig** so all three rendering paths benefit from the fix automatically. Only fix in the individual codegen modules if the behavior should differ between rendering modes.

+### Shared Utilities in runtime.zig
+
+The `runtime.zig` module is the single source of truth for shared utilities used across all rendering modes:
+
+- **`isHtmlEntity(str)`** - Checks if string starts with valid HTML entity (`&name;`, `&#digits;`, `&#xhex;`)
+- **`appendTextEscaped(allocator, output, str)`** - Escapes text content (`<`, `>`, `&`) preserving existing entities
+- **`isXhtmlDoctype(val)`** - Checks if doctype is XHTML (xml, strict, transitional, frameset, 1.1, basic, mobile)
+- **`escapeChar(c)`** - O(1) lookup table for HTML character escaping
+- **`appendEscaped(allocator, output, str)`** - Escapes all HTML special chars including quotes
+- **`doctypes`** - StaticStringMap of doctype names to DOCTYPE strings
+- **`whitespace_sensitive_tags`** - Tags where whitespace matters (pre, textarea, script, style, code)
+
+The `codegen.zig` module provides:
+- **`void_elements`** - StaticStringMap of HTML5 void/self-closing elements (br, img, input, etc.)
+
 ### Core Modules

 | Module | File | Purpose |
 |--------|------|---------|
 | **Lexer** | `src/lexer.zig` | Tokenizes Pug source into tokens |
 | **Parser** | `src/parser.zig` | Builds AST from tokens |
-| **Runtime** | `src/runtime.zig` | Shared utilities (HTML escaping, etc.) |
+| **Runtime** | `src/runtime.zig` | Shared utilities (HTML escaping, entity detection, doctype helpers) |
 | **Error** | `src/error.zig` | Error formatting with source context |
 | **Walk** | `src/walk.zig` | AST traversal with visitor pattern |
 | **Strip Comments** | `src/strip_comments.zig` | Token filtering for comments |
--- a/src/codegen.zig
+++ b/src/codegen.zig
@@ -18,6 +18,8 @@ const runtime = @import("runtime.zig");
 pub const escapeChar = runtime.escapeChar;
 pub const doctypes = runtime.doctypes;
 pub const whitespace_sensitive_tags = runtime.whitespace_sensitive_tags;
+pub const isHtmlEntity = runtime.isHtmlEntity;
+pub const isXhtmlDoctype = runtime.isXhtmlDoctype;

 // Import error types
 const pug_error = @import("error.zig");
@@ -157,6 +159,7 @@ pub const Compiler = struct {
    fn writeTextEscaped(self: *Compiler, str: []const u8) CompilerError!void {
        // For text content - escapes < > & (NOT quotes)
        // Preserves existing HTML entities like &#8217; or &amp;
+        // Uses shared isHtmlEntity from runtime.zig
        var i: usize = 0;
        while (i < str.len) {
            const c = str[i];
@@ -165,7 +168,7 @@ pub const Compiler = struct {
                '>' => try self.write("&gt;"),
                '&' => {
                    // Check if this is already an HTML entity
-                    if (isHtmlEntity(str[i..])) {
+                    if (runtime.isHtmlEntity(str[i..])) {
                        // Pass through the entity as-is
                        try self.writeChar(c);
                    } else {
@@ -178,66 +181,6 @@ pub const Compiler = struct {
        }
    }

-    fn isHtmlEntity(str: []const u8) bool {
-        // Check if str starts with a valid HTML entity: &name; or &#digits; or &#xhex;
-        if (str.len < 3 or str[0] != '&') return false;
-
-        var i: usize = 1;
-
-        // Numeric entity: &#digits; or &#xhex;
-        if (str[i] == '#') {
-            i += 1;
-            if (i >= str.len) return false;
-
-            // Hex entity: &#x...;
-            if (str[i] == 'x' or str[i] == 'X') {
-                i += 1;
-                if (i >= str.len) return false;
-                // Need at least one hex digit
-                var has_hex = false;
-                while (i < str.len and i < 10) : (i += 1) {
-                    const ch = str[i];
-                    if (ch == ';') return has_hex;
-                    if ((ch >= '0' and ch <= '9') or
-                        (ch >= 'a' and ch <= 'f') or
-                        (ch >= 'A' and ch <= 'F'))
-                    {
-                        has_hex = true;
-                    } else {
-                        return false;
-                    }
-                }
-                return false;
-            }
-
-            // Decimal entity: &#digits;
-            var has_digit = false;
-            while (i < str.len and i < 10) : (i += 1) {
-                const ch = str[i];
-                if (ch == ';') return has_digit;
-                if (ch >= '0' and ch <= '9') {
-                    has_digit = true;
-                } else {
-                    return false;
-                }
-            }
-            return false;
-        }
-
-        // Named entity: &name;
-        var has_alpha = false;
-        while (i < str.len and i < 32) : (i += 1) {
-            const ch = str[i];
-            if (ch == ';') return has_alpha;
-            if ((ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or (ch >= '0' and ch <= '9')) {
-                has_alpha = true;
-            } else {
-                return false;
-            }
-        }
-        return false;
-    }
-
    fn prettyIndent(self: *Compiler) CompilerError!void {
        if (self.options.pretty and !self.escape_pretty) {
            try self.writeChar('\n');
--- a/src/runtime.zig
+++ b/src/runtime.zig
@@ -282,6 +282,107 @@ pub inline fn escapeChar(c: u8) ?[]const u8 {
    return escape_table[c];
 }

+// ============================================================================
+// HTML Entity Detection and Text Escaping - shared across codegen modules
+// ============================================================================
+
+/// Check if string starts with a valid HTML entity: &name; or &#digits; or &#xhex;
+/// Used to preserve existing entities during text escaping.
+/// Shared across codegen.zig and template.zig.
+pub fn isHtmlEntity(str: []const u8) bool {
+    if (str.len < 3 or str[0] != '&') return false;
+
+    var i: usize = 1;
+
+    // Numeric entity: &#digits; or &#xhex;
+    if (str[i] == '#') {
+        i += 1;
+        if (i >= str.len) return false;
+
+        // Hex entity: &#x...;
+        if (str[i] == 'x' or str[i] == 'X') {
+            i += 1;
+            if (i >= str.len) return false;
+            var has_hex = false;
+            while (i < str.len and i < 10) : (i += 1) {
+                const ch = str[i];
+                if (ch == ';') return has_hex;
+                if ((ch >= '0' and ch <= '9') or
+                    (ch >= 'a' and ch <= 'f') or
+                    (ch >= 'A' and ch <= 'F'))
+                {
+                    has_hex = true;
+                } else {
+                    return false;
+                }
+            }
+            return false;
+        }
+
+        // Decimal entity: &#digits;
+        var has_digit = false;
+        while (i < str.len and i < 10) : (i += 1) {
+            const ch = str[i];
+            if (ch == ';') return has_digit;
+            if (ch >= '0' and ch <= '9') {
+                has_digit = true;
+            } else {
+                return false;
+            }
+        }
+        return false;
+    }
+
+    // Named entity: &name;
+    var has_alpha = false;
+    while (i < str.len and i < 32) : (i += 1) {
+        const ch = str[i];
+        if (ch == ';') return has_alpha;
+        if ((ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or (ch >= '0' and ch <= '9')) {
+            has_alpha = true;
+        } else {
+            return false;
+        }
+    }
+    return false;
+}
+
+/// Escape for text content - escapes < > & (NOT quotes)
+/// Preserves existing HTML entities like &#8217; or &amp;
+/// Shared across codegen.zig and template.zig.
+pub fn appendTextEscaped(allocator: Allocator, output: *ArrayListUnmanaged(u8), str: []const u8) Allocator.Error!void {
+    var i: usize = 0;
+    while (i < str.len) {
+        const c = str[i];
+        switch (c) {
+            '<' => try output.appendSlice(allocator, "&lt;"),
+            '>' => try output.appendSlice(allocator, "&gt;"),
+            '&' => {
+                if (isHtmlEntity(str[i..])) {
+                    try output.append(allocator, c);
+                } else {
+                    try output.appendSlice(allocator, "&amp;");
+                }
+            },
+            else => try output.append(allocator, c),
+        }
+        i += 1;
+    }
+}
+
+/// Check if a doctype value corresponds to XHTML (non-terse mode).
+/// Returns true for XHTML doctypes, false for HTML5.
+/// Shared across codegen.zig, template.zig, and zig_codegen.zig.
+pub fn isXhtmlDoctype(val: []const u8) bool {
+    return mem.eql(u8, val, "xml") or
+        mem.eql(u8, val, "strict") or
+        mem.eql(u8, val, "transitional") or
+        mem.eql(u8, val, "frameset") or
+        mem.eql(u8, val, "1.1") or
+        mem.eql(u8, val, "basic") or
+        mem.eql(u8, val, "mobile");
+}
+
 /// Attribute entry for attrs function
 pub const AttrEntry = struct {
    key: []const u8,
--- a/src/template.zig
+++ b/src/template.zig
@@ -10,6 +10,7 @@ const pug = @import("pug.zig");
 const parser = @import("parser.zig");
 const Node = parser.Node;
 const runtime = @import("runtime.zig");
+const codegen = @import("codegen.zig");
 const mixin_mod = @import("mixin.zig");
 pub const MixinRegistry = mixin_mod.MixinRegistry;

@@ -194,14 +195,7 @@ fn detectDoctype(node: *Node, ctx: *RenderContext) void {
    if (node.type == .Doctype) {
        if (node.val) |val| {
            // XHTML doctypes use non-terse mode
-            if (std.mem.eql(u8, val, "xml") or
-                std.mem.eql(u8, val, "strict") or
-                std.mem.eql(u8, val, "transitional") or
-                std.mem.eql(u8, val, "frameset") or
-                std.mem.eql(u8, val, "1.1") or
-                std.mem.eql(u8, val, "basic") or
-                std.mem.eql(u8, val, "mobile"))
-            {
+            if (runtime.isXhtmlDoctype(val)) {
                ctx.terse = false;
            }
        }
@@ -826,7 +820,7 @@ fn processInterpolation(allocator: Allocator, output: *std.ArrayListUnmanaged(u8
                '<' => try output.appendSlice(allocator, "&lt;"),
                '>' => try output.appendSlice(allocator, "&gt;"),
                '&' => {
-                    if (isHtmlEntity(text[i..])) {
+                    if (runtime.isHtmlEntity(text[i..])) {
                        try output.append(allocator, c);
                    } else {
                        try output.appendSlice(allocator, "&amp;");
@@ -872,92 +866,15 @@ fn getFieldValue(data: anytype, name: []const u8) ?[]const u8 {

 /// Escape for text content - escapes < > & (NOT quotes)
 /// Preserves existing HTML entities like &#8217;
+/// Uses shared appendTextEscaped from runtime.zig.
 fn appendTextEscaped(allocator: Allocator, output: *std.ArrayListUnmanaged(u8), str: []const u8) Allocator.Error!void {
-    var i: usize = 0;
-    while (i < str.len) {
-        const c = str[i];
-        switch (c) {
-            '<' => try output.appendSlice(allocator, "&lt;"),
-            '>' => try output.appendSlice(allocator, "&gt;"),
-            '&' => {
-                if (isHtmlEntity(str[i..])) {
-                    try output.append(allocator, c);
-                } else {
-                    try output.appendSlice(allocator, "&amp;");
-                }
-            },
-            else => try output.append(allocator, c),
-        }
-        i += 1;
-    }
-}
-
-/// Check if string starts with a valid HTML entity
-fn isHtmlEntity(str: []const u8) bool {
-    if (str.len < 3 or str[0] != '&') return false;
-
-    var i: usize = 1;
-
-    // Numeric entity: &#digits; or &#xhex;
-    if (str[i] == '#') {
-        i += 1;
-        if (i >= str.len) return false;
-
-        if (str[i] == 'x' or str[i] == 'X') {
-            i += 1;
-            if (i >= str.len) return false;
-            var has_hex = false;
-            while (i < str.len and i < 10) : (i += 1) {
-                const ch = str[i];
-                if (ch == ';') return has_hex;
-                if ((ch >= '0' and ch <= '9') or
-                    (ch >= 'a' and ch <= 'f') or
-                    (ch >= 'A' and ch <= 'F'))
-                {
-                    has_hex = true;
-                } else {
-                    return false;
-                }
-            }
-            return false;
-        }
-
-        var has_digit = false;
-        while (i < str.len and i < 10) : (i += 1) {
-            const ch = str[i];
-            if (ch == ';') return has_digit;
-            if (ch >= '0' and ch <= '9') {
-                has_digit = true;
-            } else {
-                return false;
-            }
-        }
-        return false;
-    }
-
-    // Named entity: &name;
-    var has_alpha = false;
-    while (i < str.len and i < 32) : (i += 1) {
-        const ch = str[i];
-        if (ch == ';') return has_alpha;
-        if ((ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or (ch >= '0' and ch <= '9')) {
-            has_alpha = true;
-        } else {
-            return false;
-        }
-    }
-    return false;
+    try runtime.appendTextEscaped(allocator, output, str);
 }

+/// Check if tag is a void (self-closing) HTML element.
+/// Uses shared void_elements from codegen.zig.
 fn isSelfClosing(name: []const u8) bool {
-    const self_closing_tags = [_][]const u8{
-        "area", "base", "br",    "col",    "embed", "hr",  "img", "input",
-        "link", "meta", "param", "source", "track", "wbr",
-    };
-    for (self_closing_tags) |tag| {
-        if (std.mem.eql(u8, name, tag)) return true;
-    }
-    return false;
+    return codegen.void_elements.has(name);
 }

 // ============================================================================
--- a/src/tpl_compiler/zig_codegen.zig
+++ b/src/tpl_compiler/zig_codegen.zig
@@ -603,14 +603,8 @@ pub const Codegen = struct {
        if (node.type == .Doctype) {
            if (node.val) |val| {
                // XHTML doctypes use non-terse mode
-                if (std.mem.eql(u8, val, "xml") or
-                    std.mem.eql(u8, val, "strict") or
-                    std.mem.eql(u8, val, "transitional") or
-                    std.mem.eql(u8, val, "frameset") or
-                    std.mem.eql(u8, val, "1.1") or
-                    std.mem.eql(u8, val, "basic") or
-                    std.mem.eql(u8, val, "mobile"))
-                {
+                // Uses shared isXhtmlDoctype from runtime.zig
+                if (runtime.isXhtmlDoctype(val)) {
                    self.terse = false;
                }
            }