refactor: consolidate shared utilities to runtime.zig

- Move isHtmlEntity to runtime.zig (was duplicated in codegen.zig and template.zig)
- Move appendTextEscaped to runtime.zig (was in template.zig)
- Add isXhtmlDoctype helper to runtime.zig for doctype detection
- Update template.zig to use codegen.void_elements instead of local isSelfClosing
- Update codegen.zig and zig_codegen.zig to use shared functions
- Update CLAUDE.md with shared utilities documentation

This establishes runtime.zig as the single source of truth for shared
utilities across all three rendering modes (codegen, template, zig_codegen).
This commit is contained in:
2026-01-29 22:27:57 +05:30
parent c7d53e56a9
commit b53aa16010
6 changed files with 132 additions and 162 deletions

View File

@@ -18,6 +18,8 @@ const runtime = @import("runtime.zig");
pub const escapeChar = runtime.escapeChar;
pub const doctypes = runtime.doctypes;
pub const whitespace_sensitive_tags = runtime.whitespace_sensitive_tags;
pub const isHtmlEntity = runtime.isHtmlEntity;
pub const isXhtmlDoctype = runtime.isXhtmlDoctype;
// Import error types
const pug_error = @import("error.zig");
@@ -157,6 +159,7 @@ pub const Compiler = struct {
fn writeTextEscaped(self: *Compiler, str: []const u8) CompilerError!void {
// For text content - escapes < > & (NOT quotes)
// Preserves existing HTML entities like &#8217; or &amp;
// Uses shared isHtmlEntity from runtime.zig
var i: usize = 0;
while (i < str.len) {
const c = str[i];
@@ -165,7 +168,7 @@ pub const Compiler = struct {
'>' => try self.write("&gt;"),
'&' => {
// Check if this is already an HTML entity
if (isHtmlEntity(str[i..])) {
if (runtime.isHtmlEntity(str[i..])) {
// Pass through the entity as-is
try self.writeChar(c);
} else {
@@ -178,66 +181,6 @@ pub const Compiler = struct {
}
}
fn isHtmlEntity(str: []const u8) bool {
// Check if str starts with a valid HTML entity: &name; or &#digits; or &#xhex;
if (str.len < 3 or str[0] != '&') return false;
var i: usize = 1;
// Numeric entity: &#digits; or &#xhex;
if (str[i] == '#') {
i += 1;
if (i >= str.len) return false;
// Hex entity: &#x...;
if (str[i] == 'x' or str[i] == 'X') {
i += 1;
if (i >= str.len) return false;
// Need at least one hex digit
var has_hex = false;
while (i < str.len and i < 10) : (i += 1) {
const ch = str[i];
if (ch == ';') return has_hex;
if ((ch >= '0' and ch <= '9') or
(ch >= 'a' and ch <= 'f') or
(ch >= 'A' and ch <= 'F'))
{
has_hex = true;
} else {
return false;
}
}
return false;
}
// Decimal entity: &#digits;
var has_digit = false;
while (i < str.len and i < 10) : (i += 1) {
const ch = str[i];
if (ch == ';') return has_digit;
if (ch >= '0' and ch <= '9') {
has_digit = true;
} else {
return false;
}
}
return false;
}
// Named entity: &name;
var has_alpha = false;
while (i < str.len and i < 32) : (i += 1) {
const ch = str[i];
if (ch == ';') return has_alpha;
if ((ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or (ch >= '0' and ch <= '9')) {
has_alpha = true;
} else {
return false;
}
}
return false;
}
fn prettyIndent(self: *Compiler) CompilerError!void {
if (self.options.pretty and !self.escape_pretty) {
try self.writeChar('\n');

View File

@@ -282,6 +282,107 @@ pub inline fn escapeChar(c: u8) ?[]const u8 {
return escape_table[c];
}
// ============================================================================
// HTML Entity Detection and Text Escaping - shared across codegen modules
// ============================================================================
/// Check if string starts with a valid HTML entity: &name; or &#digits; or &#xhex;
/// Used to preserve existing entities during text escaping.
/// Shared across codegen.zig and template.zig.
pub fn isHtmlEntity(str: []const u8) bool {
if (str.len < 3 or str[0] != '&') return false;
var i: usize = 1;
// Numeric entity: &#digits; or &#xhex;
if (str[i] == '#') {
i += 1;
if (i >= str.len) return false;
// Hex entity: &#x...;
if (str[i] == 'x' or str[i] == 'X') {
i += 1;
if (i >= str.len) return false;
var has_hex = false;
while (i < str.len and i < 10) : (i += 1) {
const ch = str[i];
if (ch == ';') return has_hex;
if ((ch >= '0' and ch <= '9') or
(ch >= 'a' and ch <= 'f') or
(ch >= 'A' and ch <= 'F'))
{
has_hex = true;
} else {
return false;
}
}
return false;
}
// Decimal entity: &#digits;
var has_digit = false;
while (i < str.len and i < 10) : (i += 1) {
const ch = str[i];
if (ch == ';') return has_digit;
if (ch >= '0' and ch <= '9') {
has_digit = true;
} else {
return false;
}
}
return false;
}
// Named entity: &name;
var has_alpha = false;
while (i < str.len and i < 32) : (i += 1) {
const ch = str[i];
if (ch == ';') return has_alpha;
if ((ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or (ch >= '0' and ch <= '9')) {
has_alpha = true;
} else {
return false;
}
}
return false;
}
/// Escape for text content - escapes < > & (NOT quotes)
/// Preserves existing HTML entities like &#8217; or &amp;
/// Shared across codegen.zig and template.zig.
pub fn appendTextEscaped(allocator: Allocator, output: *ArrayListUnmanaged(u8), str: []const u8) Allocator.Error!void {
var i: usize = 0;
while (i < str.len) {
const c = str[i];
switch (c) {
'<' => try output.appendSlice(allocator, "&lt;"),
'>' => try output.appendSlice(allocator, "&gt;"),
'&' => {
if (isHtmlEntity(str[i..])) {
try output.append(allocator, c);
} else {
try output.appendSlice(allocator, "&amp;");
}
},
else => try output.append(allocator, c),
}
i += 1;
}
}
/// Check if a doctype value corresponds to XHTML (non-terse mode).
/// Returns true for XHTML doctypes, false for HTML5.
/// Shared across codegen.zig, template.zig, and zig_codegen.zig.
pub fn isXhtmlDoctype(val: []const u8) bool {
return mem.eql(u8, val, "xml") or
mem.eql(u8, val, "strict") or
mem.eql(u8, val, "transitional") or
mem.eql(u8, val, "frameset") or
mem.eql(u8, val, "1.1") or
mem.eql(u8, val, "basic") or
mem.eql(u8, val, "mobile");
}
/// Attribute entry for attrs function
pub const AttrEntry = struct {
key: []const u8,

View File

@@ -10,6 +10,7 @@ const pug = @import("pug.zig");
const parser = @import("parser.zig");
const Node = parser.Node;
const runtime = @import("runtime.zig");
const codegen = @import("codegen.zig");
const mixin_mod = @import("mixin.zig");
pub const MixinRegistry = mixin_mod.MixinRegistry;
@@ -194,14 +195,7 @@ fn detectDoctype(node: *Node, ctx: *RenderContext) void {
if (node.type == .Doctype) {
if (node.val) |val| {
// XHTML doctypes use non-terse mode
if (std.mem.eql(u8, val, "xml") or
std.mem.eql(u8, val, "strict") or
std.mem.eql(u8, val, "transitional") or
std.mem.eql(u8, val, "frameset") or
std.mem.eql(u8, val, "1.1") or
std.mem.eql(u8, val, "basic") or
std.mem.eql(u8, val, "mobile"))
{
if (runtime.isXhtmlDoctype(val)) {
ctx.terse = false;
}
}
@@ -826,7 +820,7 @@ fn processInterpolation(allocator: Allocator, output: *std.ArrayListUnmanaged(u8
'<' => try output.appendSlice(allocator, "&lt;"),
'>' => try output.appendSlice(allocator, "&gt;"),
'&' => {
if (isHtmlEntity(text[i..])) {
if (runtime.isHtmlEntity(text[i..])) {
try output.append(allocator, c);
} else {
try output.appendSlice(allocator, "&amp;");
@@ -872,92 +866,15 @@ fn getFieldValue(data: anytype, name: []const u8) ?[]const u8 {
/// Escape for text content - escapes < > & (NOT quotes)
/// Preserves existing HTML entities like &#8217;
/// Uses shared appendTextEscaped from runtime.zig.
fn appendTextEscaped(allocator: Allocator, output: *std.ArrayListUnmanaged(u8), str: []const u8) Allocator.Error!void {
var i: usize = 0;
while (i < str.len) {
const c = str[i];
switch (c) {
'<' => try output.appendSlice(allocator, "&lt;"),
'>' => try output.appendSlice(allocator, "&gt;"),
'&' => {
if (isHtmlEntity(str[i..])) {
try output.append(allocator, c);
} else {
try output.appendSlice(allocator, "&amp;");
}
},
else => try output.append(allocator, c),
}
i += 1;
}
}
/// Check if string starts with a valid HTML entity
fn isHtmlEntity(str: []const u8) bool {
if (str.len < 3 or str[0] != '&') return false;
var i: usize = 1;
// Numeric entity: &#digits; or &#xhex;
if (str[i] == '#') {
i += 1;
if (i >= str.len) return false;
if (str[i] == 'x' or str[i] == 'X') {
i += 1;
if (i >= str.len) return false;
var has_hex = false;
while (i < str.len and i < 10) : (i += 1) {
const ch = str[i];
if (ch == ';') return has_hex;
if ((ch >= '0' and ch <= '9') or
(ch >= 'a' and ch <= 'f') or
(ch >= 'A' and ch <= 'F'))
{
has_hex = true;
} else {
return false;
}
}
return false;
}
var has_digit = false;
while (i < str.len and i < 10) : (i += 1) {
const ch = str[i];
if (ch == ';') return has_digit;
if (ch >= '0' and ch <= '9') {
has_digit = true;
} else {
return false;
}
}
return false;
}
// Named entity: &name;
var has_alpha = false;
while (i < str.len and i < 32) : (i += 1) {
const ch = str[i];
if (ch == ';') return has_alpha;
if ((ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or (ch >= '0' and ch <= '9')) {
has_alpha = true;
} else {
return false;
}
}
return false;
try runtime.appendTextEscaped(allocator, output, str);
}
/// Check if tag is a void (self-closing) HTML element.
/// Uses shared void_elements from codegen.zig.
fn isSelfClosing(name: []const u8) bool {
const self_closing_tags = [_][]const u8{
"area", "base", "br", "col", "embed", "hr", "img", "input",
"link", "meta", "param", "source", "track", "wbr",
};
for (self_closing_tags) |tag| {
if (std.mem.eql(u8, name, tag)) return true;
}
return false;
return codegen.void_elements.has(name);
}
// ============================================================================

View File

@@ -603,14 +603,8 @@ pub const Codegen = struct {
if (node.type == .Doctype) {
if (node.val) |val| {
// XHTML doctypes use non-terse mode
if (std.mem.eql(u8, val, "xml") or
std.mem.eql(u8, val, "strict") or
std.mem.eql(u8, val, "transitional") or
std.mem.eql(u8, val, "frameset") or
std.mem.eql(u8, val, "1.1") or
std.mem.eql(u8, val, "basic") or
std.mem.eql(u8, val, "mobile"))
{
// Uses shared isXhtmlDoctype from runtime.zig
if (runtime.isXhtmlDoctype(val)) {
self.terse = false;
}
}