refactor: consolidate shared utilities to runtime.zig
- Move isHtmlEntity to runtime.zig (was duplicated in codegen.zig and template.zig) - Move appendTextEscaped to runtime.zig (was in template.zig) - Add isXhtmlDoctype helper to runtime.zig for doctype detection - Update template.zig to use codegen.void_elements instead of local isSelfClosing - Update codegen.zig and zig_codegen.zig to use shared functions - Update CLAUDE.md with shared utilities documentation This establishes runtime.zig as the single source of truth for shared utilities across all three rendering modes (codegen, template, zig_codegen).
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
.{
|
.{
|
||||||
.name = .pugz,
|
.name = .pugz,
|
||||||
.version = "0.3.6",
|
.version = "0.3.7",
|
||||||
.fingerprint = 0x822db0790e17621d, // Changing this has security and trust implications.
|
.fingerprint = 0x822db0790e17621d, // Changing this has security and trust implications.
|
||||||
.minimum_zig_version = "0.15.2",
|
.minimum_zig_version = "0.15.2",
|
||||||
.dependencies = .{},
|
.dependencies = .{},
|
||||||
|
|||||||
@@ -44,13 +44,28 @@ Source → Lexer → Tokens → StripComments → Parser → AST → Linker →
|
|||||||
|
|
||||||
**codegen.zig**, **template.zig**, and **zig_codegen.zig** all consume the AST from the parser. When fixing bugs related to AST structure (like attribute handling, class merging, etc.), prefer fixing in **parser.zig** so all three rendering paths benefit from the fix automatically. Only fix in the individual codegen modules if the behavior should differ between rendering modes.
|
**codegen.zig**, **template.zig**, and **zig_codegen.zig** all consume the AST from the parser. When fixing bugs related to AST structure (like attribute handling, class merging, etc.), prefer fixing in **parser.zig** so all three rendering paths benefit from the fix automatically. Only fix in the individual codegen modules if the behavior should differ between rendering modes.
|
||||||
|
|
||||||
|
### Shared Utilities in runtime.zig
|
||||||
|
|
||||||
|
The `runtime.zig` module is the single source of truth for shared utilities used across all rendering modes:
|
||||||
|
|
||||||
|
- **`isHtmlEntity(str)`** - Checks if string starts with valid HTML entity (`&name;`, `&#digits;`, `&#xhex;`)
|
||||||
|
- **`appendTextEscaped(allocator, output, str)`** - Escapes text content (`<`, `>`, `&`) preserving existing entities
|
||||||
|
- **`isXhtmlDoctype(val)`** - Checks if doctype is XHTML (xml, strict, transitional, frameset, 1.1, basic, mobile)
|
||||||
|
- **`escapeChar(c)`** - O(1) lookup table for HTML character escaping
|
||||||
|
- **`appendEscaped(allocator, output, str)`** - Escapes all HTML special chars including quotes
|
||||||
|
- **`doctypes`** - StaticStringMap of doctype names to DOCTYPE strings
|
||||||
|
- **`whitespace_sensitive_tags`** - Tags where whitespace matters (pre, textarea, script, style, code)
|
||||||
|
|
||||||
|
The `codegen.zig` module provides:
|
||||||
|
- **`void_elements`** - StaticStringMap of HTML5 void/self-closing elements (br, img, input, etc.)
|
||||||
|
|
||||||
### Core Modules
|
### Core Modules
|
||||||
|
|
||||||
| Module | File | Purpose |
|
| Module | File | Purpose |
|
||||||
|--------|------|---------|
|
|--------|------|---------|
|
||||||
| **Lexer** | `src/lexer.zig` | Tokenizes Pug source into tokens |
|
| **Lexer** | `src/lexer.zig` | Tokenizes Pug source into tokens |
|
||||||
| **Parser** | `src/parser.zig` | Builds AST from tokens |
|
| **Parser** | `src/parser.zig` | Builds AST from tokens |
|
||||||
| **Runtime** | `src/runtime.zig` | Shared utilities (HTML escaping, etc.) |
|
| **Runtime** | `src/runtime.zig` | Shared utilities (HTML escaping, entity detection, doctype helpers) |
|
||||||
| **Error** | `src/error.zig` | Error formatting with source context |
|
| **Error** | `src/error.zig` | Error formatting with source context |
|
||||||
| **Walk** | `src/walk.zig` | AST traversal with visitor pattern |
|
| **Walk** | `src/walk.zig` | AST traversal with visitor pattern |
|
||||||
| **Strip Comments** | `src/strip_comments.zig` | Token filtering for comments |
|
| **Strip Comments** | `src/strip_comments.zig` | Token filtering for comments |
|
||||||
|
|||||||
@@ -18,6 +18,8 @@ const runtime = @import("runtime.zig");
|
|||||||
pub const escapeChar = runtime.escapeChar;
|
pub const escapeChar = runtime.escapeChar;
|
||||||
pub const doctypes = runtime.doctypes;
|
pub const doctypes = runtime.doctypes;
|
||||||
pub const whitespace_sensitive_tags = runtime.whitespace_sensitive_tags;
|
pub const whitespace_sensitive_tags = runtime.whitespace_sensitive_tags;
|
||||||
|
pub const isHtmlEntity = runtime.isHtmlEntity;
|
||||||
|
pub const isXhtmlDoctype = runtime.isXhtmlDoctype;
|
||||||
|
|
||||||
// Import error types
|
// Import error types
|
||||||
const pug_error = @import("error.zig");
|
const pug_error = @import("error.zig");
|
||||||
@@ -157,6 +159,7 @@ pub const Compiler = struct {
|
|||||||
fn writeTextEscaped(self: *Compiler, str: []const u8) CompilerError!void {
|
fn writeTextEscaped(self: *Compiler, str: []const u8) CompilerError!void {
|
||||||
// For text content - escapes < > & (NOT quotes)
|
// For text content - escapes < > & (NOT quotes)
|
||||||
// Preserves existing HTML entities like ’ or &
|
// Preserves existing HTML entities like ’ or &
|
||||||
|
// Uses shared isHtmlEntity from runtime.zig
|
||||||
var i: usize = 0;
|
var i: usize = 0;
|
||||||
while (i < str.len) {
|
while (i < str.len) {
|
||||||
const c = str[i];
|
const c = str[i];
|
||||||
@@ -165,7 +168,7 @@ pub const Compiler = struct {
|
|||||||
'>' => try self.write(">"),
|
'>' => try self.write(">"),
|
||||||
'&' => {
|
'&' => {
|
||||||
// Check if this is already an HTML entity
|
// Check if this is already an HTML entity
|
||||||
if (isHtmlEntity(str[i..])) {
|
if (runtime.isHtmlEntity(str[i..])) {
|
||||||
// Pass through the entity as-is
|
// Pass through the entity as-is
|
||||||
try self.writeChar(c);
|
try self.writeChar(c);
|
||||||
} else {
|
} else {
|
||||||
@@ -178,66 +181,6 @@ pub const Compiler = struct {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn isHtmlEntity(str: []const u8) bool {
|
|
||||||
// Check if str starts with a valid HTML entity: &name; or &#digits; or &#xhex;
|
|
||||||
if (str.len < 3 or str[0] != '&') return false;
|
|
||||||
|
|
||||||
var i: usize = 1;
|
|
||||||
|
|
||||||
// Numeric entity: &#digits; or &#xhex;
|
|
||||||
if (str[i] == '#') {
|
|
||||||
i += 1;
|
|
||||||
if (i >= str.len) return false;
|
|
||||||
|
|
||||||
// Hex entity: &#x...;
|
|
||||||
if (str[i] == 'x' or str[i] == 'X') {
|
|
||||||
i += 1;
|
|
||||||
if (i >= str.len) return false;
|
|
||||||
// Need at least one hex digit
|
|
||||||
var has_hex = false;
|
|
||||||
while (i < str.len and i < 10) : (i += 1) {
|
|
||||||
const ch = str[i];
|
|
||||||
if (ch == ';') return has_hex;
|
|
||||||
if ((ch >= '0' and ch <= '9') or
|
|
||||||
(ch >= 'a' and ch <= 'f') or
|
|
||||||
(ch >= 'A' and ch <= 'F'))
|
|
||||||
{
|
|
||||||
has_hex = true;
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Decimal entity: &#digits;
|
|
||||||
var has_digit = false;
|
|
||||||
while (i < str.len and i < 10) : (i += 1) {
|
|
||||||
const ch = str[i];
|
|
||||||
if (ch == ';') return has_digit;
|
|
||||||
if (ch >= '0' and ch <= '9') {
|
|
||||||
has_digit = true;
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Named entity: &name;
|
|
||||||
var has_alpha = false;
|
|
||||||
while (i < str.len and i < 32) : (i += 1) {
|
|
||||||
const ch = str[i];
|
|
||||||
if (ch == ';') return has_alpha;
|
|
||||||
if ((ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or (ch >= '0' and ch <= '9')) {
|
|
||||||
has_alpha = true;
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn prettyIndent(self: *Compiler) CompilerError!void {
|
fn prettyIndent(self: *Compiler) CompilerError!void {
|
||||||
if (self.options.pretty and !self.escape_pretty) {
|
if (self.options.pretty and !self.escape_pretty) {
|
||||||
try self.writeChar('\n');
|
try self.writeChar('\n');
|
||||||
|
|||||||
101
src/runtime.zig
101
src/runtime.zig
@@ -282,6 +282,107 @@ pub inline fn escapeChar(c: u8) ?[]const u8 {
|
|||||||
return escape_table[c];
|
return escape_table[c];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// HTML Entity Detection and Text Escaping - shared across codegen modules
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
/// Check if string starts with a valid HTML entity: &name; or &#digits; or &#xhex;
|
||||||
|
/// Used to preserve existing entities during text escaping.
|
||||||
|
/// Shared across codegen.zig and template.zig.
|
||||||
|
pub fn isHtmlEntity(str: []const u8) bool {
|
||||||
|
if (str.len < 3 or str[0] != '&') return false;
|
||||||
|
|
||||||
|
var i: usize = 1;
|
||||||
|
|
||||||
|
// Numeric entity: &#digits; or &#xhex;
|
||||||
|
if (str[i] == '#') {
|
||||||
|
i += 1;
|
||||||
|
if (i >= str.len) return false;
|
||||||
|
|
||||||
|
// Hex entity: &#x...;
|
||||||
|
if (str[i] == 'x' or str[i] == 'X') {
|
||||||
|
i += 1;
|
||||||
|
if (i >= str.len) return false;
|
||||||
|
var has_hex = false;
|
||||||
|
while (i < str.len and i < 10) : (i += 1) {
|
||||||
|
const ch = str[i];
|
||||||
|
if (ch == ';') return has_hex;
|
||||||
|
if ((ch >= '0' and ch <= '9') or
|
||||||
|
(ch >= 'a' and ch <= 'f') or
|
||||||
|
(ch >= 'A' and ch <= 'F'))
|
||||||
|
{
|
||||||
|
has_hex = true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decimal entity: &#digits;
|
||||||
|
var has_digit = false;
|
||||||
|
while (i < str.len and i < 10) : (i += 1) {
|
||||||
|
const ch = str[i];
|
||||||
|
if (ch == ';') return has_digit;
|
||||||
|
if (ch >= '0' and ch <= '9') {
|
||||||
|
has_digit = true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Named entity: &name;
|
||||||
|
var has_alpha = false;
|
||||||
|
while (i < str.len and i < 32) : (i += 1) {
|
||||||
|
const ch = str[i];
|
||||||
|
if (ch == ';') return has_alpha;
|
||||||
|
if ((ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or (ch >= '0' and ch <= '9')) {
|
||||||
|
has_alpha = true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Escape for text content - escapes < > & (NOT quotes)
|
||||||
|
/// Preserves existing HTML entities like ’ or &
|
||||||
|
/// Shared across codegen.zig and template.zig.
|
||||||
|
pub fn appendTextEscaped(allocator: Allocator, output: *ArrayListUnmanaged(u8), str: []const u8) Allocator.Error!void {
|
||||||
|
var i: usize = 0;
|
||||||
|
while (i < str.len) {
|
||||||
|
const c = str[i];
|
||||||
|
switch (c) {
|
||||||
|
'<' => try output.appendSlice(allocator, "<"),
|
||||||
|
'>' => try output.appendSlice(allocator, ">"),
|
||||||
|
'&' => {
|
||||||
|
if (isHtmlEntity(str[i..])) {
|
||||||
|
try output.append(allocator, c);
|
||||||
|
} else {
|
||||||
|
try output.appendSlice(allocator, "&");
|
||||||
|
}
|
||||||
|
},
|
||||||
|
else => try output.append(allocator, c),
|
||||||
|
}
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if a doctype value corresponds to XHTML (non-terse mode).
|
||||||
|
/// Returns true for XHTML doctypes, false for HTML5.
|
||||||
|
/// Shared across codegen.zig, template.zig, and zig_codegen.zig.
|
||||||
|
pub fn isXhtmlDoctype(val: []const u8) bool {
|
||||||
|
return mem.eql(u8, val, "xml") or
|
||||||
|
mem.eql(u8, val, "strict") or
|
||||||
|
mem.eql(u8, val, "transitional") or
|
||||||
|
mem.eql(u8, val, "frameset") or
|
||||||
|
mem.eql(u8, val, "1.1") or
|
||||||
|
mem.eql(u8, val, "basic") or
|
||||||
|
mem.eql(u8, val, "mobile");
|
||||||
|
}
|
||||||
|
|
||||||
/// Attribute entry for attrs function
|
/// Attribute entry for attrs function
|
||||||
pub const AttrEntry = struct {
|
pub const AttrEntry = struct {
|
||||||
key: []const u8,
|
key: []const u8,
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ const pug = @import("pug.zig");
|
|||||||
const parser = @import("parser.zig");
|
const parser = @import("parser.zig");
|
||||||
const Node = parser.Node;
|
const Node = parser.Node;
|
||||||
const runtime = @import("runtime.zig");
|
const runtime = @import("runtime.zig");
|
||||||
|
const codegen = @import("codegen.zig");
|
||||||
const mixin_mod = @import("mixin.zig");
|
const mixin_mod = @import("mixin.zig");
|
||||||
pub const MixinRegistry = mixin_mod.MixinRegistry;
|
pub const MixinRegistry = mixin_mod.MixinRegistry;
|
||||||
|
|
||||||
@@ -194,14 +195,7 @@ fn detectDoctype(node: *Node, ctx: *RenderContext) void {
|
|||||||
if (node.type == .Doctype) {
|
if (node.type == .Doctype) {
|
||||||
if (node.val) |val| {
|
if (node.val) |val| {
|
||||||
// XHTML doctypes use non-terse mode
|
// XHTML doctypes use non-terse mode
|
||||||
if (std.mem.eql(u8, val, "xml") or
|
if (runtime.isXhtmlDoctype(val)) {
|
||||||
std.mem.eql(u8, val, "strict") or
|
|
||||||
std.mem.eql(u8, val, "transitional") or
|
|
||||||
std.mem.eql(u8, val, "frameset") or
|
|
||||||
std.mem.eql(u8, val, "1.1") or
|
|
||||||
std.mem.eql(u8, val, "basic") or
|
|
||||||
std.mem.eql(u8, val, "mobile"))
|
|
||||||
{
|
|
||||||
ctx.terse = false;
|
ctx.terse = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -826,7 +820,7 @@ fn processInterpolation(allocator: Allocator, output: *std.ArrayListUnmanaged(u8
|
|||||||
'<' => try output.appendSlice(allocator, "<"),
|
'<' => try output.appendSlice(allocator, "<"),
|
||||||
'>' => try output.appendSlice(allocator, ">"),
|
'>' => try output.appendSlice(allocator, ">"),
|
||||||
'&' => {
|
'&' => {
|
||||||
if (isHtmlEntity(text[i..])) {
|
if (runtime.isHtmlEntity(text[i..])) {
|
||||||
try output.append(allocator, c);
|
try output.append(allocator, c);
|
||||||
} else {
|
} else {
|
||||||
try output.appendSlice(allocator, "&");
|
try output.appendSlice(allocator, "&");
|
||||||
@@ -872,92 +866,15 @@ fn getFieldValue(data: anytype, name: []const u8) ?[]const u8 {
|
|||||||
|
|
||||||
/// Escape for text content - escapes < > & (NOT quotes)
|
/// Escape for text content - escapes < > & (NOT quotes)
|
||||||
/// Preserves existing HTML entities like ’
|
/// Preserves existing HTML entities like ’
|
||||||
|
/// Uses shared appendTextEscaped from runtime.zig.
|
||||||
fn appendTextEscaped(allocator: Allocator, output: *std.ArrayListUnmanaged(u8), str: []const u8) Allocator.Error!void {
|
fn appendTextEscaped(allocator: Allocator, output: *std.ArrayListUnmanaged(u8), str: []const u8) Allocator.Error!void {
|
||||||
var i: usize = 0;
|
try runtime.appendTextEscaped(allocator, output, str);
|
||||||
while (i < str.len) {
|
|
||||||
const c = str[i];
|
|
||||||
switch (c) {
|
|
||||||
'<' => try output.appendSlice(allocator, "<"),
|
|
||||||
'>' => try output.appendSlice(allocator, ">"),
|
|
||||||
'&' => {
|
|
||||||
if (isHtmlEntity(str[i..])) {
|
|
||||||
try output.append(allocator, c);
|
|
||||||
} else {
|
|
||||||
try output.appendSlice(allocator, "&");
|
|
||||||
}
|
|
||||||
},
|
|
||||||
else => try output.append(allocator, c),
|
|
||||||
}
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check if string starts with a valid HTML entity
|
|
||||||
fn isHtmlEntity(str: []const u8) bool {
|
|
||||||
if (str.len < 3 or str[0] != '&') return false;
|
|
||||||
|
|
||||||
var i: usize = 1;
|
|
||||||
|
|
||||||
// Numeric entity: &#digits; or &#xhex;
|
|
||||||
if (str[i] == '#') {
|
|
||||||
i += 1;
|
|
||||||
if (i >= str.len) return false;
|
|
||||||
|
|
||||||
if (str[i] == 'x' or str[i] == 'X') {
|
|
||||||
i += 1;
|
|
||||||
if (i >= str.len) return false;
|
|
||||||
var has_hex = false;
|
|
||||||
while (i < str.len and i < 10) : (i += 1) {
|
|
||||||
const ch = str[i];
|
|
||||||
if (ch == ';') return has_hex;
|
|
||||||
if ((ch >= '0' and ch <= '9') or
|
|
||||||
(ch >= 'a' and ch <= 'f') or
|
|
||||||
(ch >= 'A' and ch <= 'F'))
|
|
||||||
{
|
|
||||||
has_hex = true;
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
var has_digit = false;
|
|
||||||
while (i < str.len and i < 10) : (i += 1) {
|
|
||||||
const ch = str[i];
|
|
||||||
if (ch == ';') return has_digit;
|
|
||||||
if (ch >= '0' and ch <= '9') {
|
|
||||||
has_digit = true;
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Named entity: &name;
|
|
||||||
var has_alpha = false;
|
|
||||||
while (i < str.len and i < 32) : (i += 1) {
|
|
||||||
const ch = str[i];
|
|
||||||
if (ch == ';') return has_alpha;
|
|
||||||
if ((ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or (ch >= '0' and ch <= '9')) {
|
|
||||||
has_alpha = true;
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check if tag is a void (self-closing) HTML element.
|
||||||
|
/// Uses shared void_elements from codegen.zig.
|
||||||
fn isSelfClosing(name: []const u8) bool {
|
fn isSelfClosing(name: []const u8) bool {
|
||||||
const self_closing_tags = [_][]const u8{
|
return codegen.void_elements.has(name);
|
||||||
"area", "base", "br", "col", "embed", "hr", "img", "input",
|
|
||||||
"link", "meta", "param", "source", "track", "wbr",
|
|
||||||
};
|
|
||||||
for (self_closing_tags) |tag| {
|
|
||||||
if (std.mem.eql(u8, name, tag)) return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|||||||
@@ -603,14 +603,8 @@ pub const Codegen = struct {
|
|||||||
if (node.type == .Doctype) {
|
if (node.type == .Doctype) {
|
||||||
if (node.val) |val| {
|
if (node.val) |val| {
|
||||||
// XHTML doctypes use non-terse mode
|
// XHTML doctypes use non-terse mode
|
||||||
if (std.mem.eql(u8, val, "xml") or
|
// Uses shared isXhtmlDoctype from runtime.zig
|
||||||
std.mem.eql(u8, val, "strict") or
|
if (runtime.isXhtmlDoctype(val)) {
|
||||||
std.mem.eql(u8, val, "transitional") or
|
|
||||||
std.mem.eql(u8, val, "frameset") or
|
|
||||||
std.mem.eql(u8, val, "1.1") or
|
|
||||||
std.mem.eql(u8, val, "basic") or
|
|
||||||
std.mem.eql(u8, val, "mobile"))
|
|
||||||
{
|
|
||||||
self.terse = false;
|
self.terse = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user