pugz/src/strip_comments.zig

// strip_comments.zig - Zig port of pug-strip-comments
//
// Filters out comment tokens from a token stream.
// Handles both buffered and unbuffered comments with pipeless text support.

const std = @import("std");
const Allocator = std.mem.Allocator;

// Import token types from lexer
const lexer = @import("lexer.zig");
pub const Token = lexer.Token;
pub const TokenType = lexer.TokenType;

// Import error types
const pug_error = @import("error.zig");
pub const PugError = pug_error.PugError;

// ============================================================================
// Strip Comments Options
// ============================================================================

pub const StripCommentsOptions = struct {
    /// Strip unbuffered comments (default: true)
    strip_unbuffered: bool = true,
    /// Strip buffered comments (default: false)
    strip_buffered: bool = false,
    /// Source filename for error messages
    filename: ?[]const u8 = null,
};

// ============================================================================
// Errors
// ============================================================================

pub const StripCommentsError = error{
    OutOfMemory,
    UnexpectedToken,
};

// ============================================================================
// Strip Comments Result
// ============================================================================

pub const StripCommentsResult = struct {
    tokens: std.ArrayListUnmanaged(Token),
    err: ?PugError = null,

    pub fn deinit(self: *StripCommentsResult, allocator: Allocator) void {
        self.tokens.deinit(allocator);
    }
};

// ============================================================================
// Strip Comments Implementation
// ============================================================================

/// Strip comments from a token stream
/// Returns filtered tokens with comments removed based on options
pub fn stripComments(
    allocator: Allocator,
    input: []const Token,
    options: StripCommentsOptions,
) StripCommentsError!StripCommentsResult {
    var result = StripCommentsResult{
        .tokens = .{},
    };

    // State tracking
    var in_comment = false;
    var in_pipeless_text = false;
    var comment_is_buffered = false;

    for (input) |tok| {
        const should_include = switch (tok.type) {
            .comment => blk: {
                if (in_comment) {
                    // Unexpected comment while already in comment
                    result.err = pug_error.makeError(
                        allocator,
                        "UNEXPECTED_TOKEN",
                        "`comment` encountered when already in a comment",
                        .{
                            .line = tok.loc.start.line,
                            .column = tok.loc.start.column,
                            .filename = options.filename,
                            .src = null,
                        },
                    ) catch null;
                    return error.UnexpectedToken;
                }
                // Check if this is a buffered comment
                comment_is_buffered = tok.isBuffered();

                // Determine if we should strip this comment
                if (comment_is_buffered) {
                    in_comment = options.strip_buffered;
                } else {
                    in_comment = options.strip_unbuffered;
                }
                break :blk !in_comment;
            },

            .start_pipeless_text => blk: {
                if (!in_comment) {
                    break :blk true;
                }
                if (in_pipeless_text) {
                    // Unexpected start_pipeless_text
                    result.err = pug_error.makeError(
                        allocator,
                        "UNEXPECTED_TOKEN",
                        "`start-pipeless-text` encountered when already in pipeless text mode",
                        .{
                            .line = tok.loc.start.line,
                            .column = tok.loc.start.column,
                            .filename = options.filename,
                            .src = null,
                        },
                    ) catch null;
                    return error.UnexpectedToken;
                }
                in_pipeless_text = true;
                break :blk false;
            },

            .end_pipeless_text => blk: {
                if (!in_comment) {
                    break :blk true;
                }
                if (!in_pipeless_text) {
                    // Unexpected end_pipeless_text
                    result.err = pug_error.makeError(
                        allocator,
                        "UNEXPECTED_TOKEN",
                        "`end-pipeless-text` encountered when not in pipeless text mode",
                        .{
                            .line = tok.loc.start.line,
                            .column = tok.loc.start.column,
                            .filename = options.filename,
                            .src = null,
                        },
                    ) catch null;
                    return error.UnexpectedToken;
                }
                in_pipeless_text = false;
                in_comment = false;
                break :blk false;
            },

            // Text tokens right after comment but before pipeless text
            .text, .text_html => !in_comment,

            // All other tokens
            else => blk: {
                if (in_pipeless_text) {
                    break :blk false;
                }
                in_comment = false;
                break :blk true;
            },
        };

        if (should_include) {
            try result.tokens.append(allocator, tok);
        }
    }

    return result;
}

/// Convenience function - strip with default options (unbuffered only)
pub fn stripUnbufferedComments(
    allocator: Allocator,
    input: []const Token,
) StripCommentsError!StripCommentsResult {
    return stripComments(allocator, input, .{});
}

/// Convenience function - strip all comments
pub fn stripAllComments(
    allocator: Allocator,
    input: []const Token,
) StripCommentsError!StripCommentsResult {
    return stripComments(allocator, input, .{
        .strip_unbuffered = true,
        .strip_buffered = true,
    });
}

// ============================================================================
// Tests
// ============================================================================

test "stripComments - no comments" {
    const allocator = std.testing.allocator;

    const tokens = [_]Token{
        .{ .type = .tag, .loc = .{ .start = .{ .line = 1, .column = 1 } }, .val = .{ .string = "div" } },
        .{ .type = .newline, .loc = .{ .start = .{ .line = 1, .column = 4 } } },
        .{ .type = .eos, .loc = .{ .start = .{ .line = 2, .column = 1 } } },
    };

    var result = try stripComments(allocator, &tokens, .{});
    defer result.deinit(allocator);

    try std.testing.expectEqual(@as(usize, 3), result.tokens.items.len);
}

test "stripComments - strip unbuffered comment" {
    const allocator = std.testing.allocator;

    const tokens = [_]Token{
        .{ .type = .tag, .loc = .{ .start = .{ .line = 1, .column = 1 } }, .val = .{ .string = "div" } },
        .{ .type = .newline, .loc = .{ .start = .{ .line = 1, .column = 4 } } },
        .{ .type = .comment, .loc = .{ .start = .{ .line = 2, .column = 1 } }, .buffer = .{ .boolean = false } },
        .{ .type = .text, .loc = .{ .start = .{ .line = 2, .column = 4 } }, .val = .{ .string = "comment text" } },
        .{ .type = .newline, .loc = .{ .start = .{ .line = 2, .column = 16 } } },
        .{ .type = .tag, .loc = .{ .start = .{ .line = 3, .column = 1 } }, .val = .{ .string = "span" } },
        .{ .type = .eos, .loc = .{ .start = .{ .line = 4, .column = 1 } } },
    };

    var result = try stripComments(allocator, &tokens, .{});
    defer result.deinit(allocator);

    // Should strip comment and its text, keep tags and structure
    try std.testing.expectEqual(@as(usize, 5), result.tokens.items.len);
    try std.testing.expectEqual(TokenType.tag, result.tokens.items[0].type);
    try std.testing.expectEqual(TokenType.newline, result.tokens.items[1].type);
    try std.testing.expectEqual(TokenType.newline, result.tokens.items[2].type);
    try std.testing.expectEqual(TokenType.tag, result.tokens.items[3].type);
    try std.testing.expectEqual(TokenType.eos, result.tokens.items[4].type);
}

test "stripComments - keep buffered comment by default" {
    const allocator = std.testing.allocator;

    const tokens = [_]Token{
        .{ .type = .tag, .loc = .{ .start = .{ .line = 1, .column = 1 } }, .val = .{ .string = "div" } },
        .{ .type = .newline, .loc = .{ .start = .{ .line = 1, .column = 4 } } },
        .{ .type = .comment, .loc = .{ .start = .{ .line = 2, .column = 1 } }, .buffer = .{ .boolean = true } },
        .{ .type = .text, .loc = .{ .start = .{ .line = 2, .column = 4 } }, .val = .{ .string = "buffered comment" } },
        .{ .type = .newline, .loc = .{ .start = .{ .line = 2, .column = 20 } } },
        .{ .type = .eos, .loc = .{ .start = .{ .line = 3, .column = 1 } } },
    };

    var result = try stripComments(allocator, &tokens, .{});
    defer result.deinit(allocator);

    // Should keep buffered comment
    try std.testing.expectEqual(@as(usize, 6), result.tokens.items.len);
}

test "stripComments - strip buffered when option set" {
    const allocator = std.testing.allocator;

    const tokens = [_]Token{
        .{ .type = .tag, .loc = .{ .start = .{ .line = 1, .column = 1 } }, .val = .{ .string = "div" } },
        .{ .type = .newline, .loc = .{ .start = .{ .line = 1, .column = 4 } } },
        .{ .type = .comment, .loc = .{ .start = .{ .line = 2, .column = 1 } }, .buffer = .{ .boolean = true } },
        .{ .type = .text, .loc = .{ .start = .{ .line = 2, .column = 4 } }, .val = .{ .string = "buffered comment" } },
        .{ .type = .newline, .loc = .{ .start = .{ .line = 2, .column = 20 } } },
        .{ .type = .eos, .loc = .{ .start = .{ .line = 3, .column = 1 } } },
    };

    var result = try stripComments(allocator, &tokens, .{ .strip_buffered = true });
    defer result.deinit(allocator);

    // Should strip buffered comment
    try std.testing.expectEqual(@as(usize, 4), result.tokens.items.len);
}

test "stripComments - pipeless text in comment" {
    const allocator = std.testing.allocator;

    const tokens = [_]Token{
        .{ .type = .comment, .loc = .{ .start = .{ .line = 1, .column = 1 } }, .buffer = .{ .boolean = false } },
        .{ .type = .start_pipeless_text, .loc = .{ .start = .{ .line = 1, .column = 1 } } },
        .{ .type = .text, .loc = .{ .start = .{ .line = 2, .column = 3 } }, .val = .{ .string = "line 1" } },
        .{ .type = .text, .loc = .{ .start = .{ .line = 3, .column = 3 } }, .val = .{ .string = "line 2" } },
        .{ .type = .end_pipeless_text, .loc = .{ .start = .{ .line = 4, .column = 1 } } },
        .{ .type = .tag, .loc = .{ .start = .{ .line = 5, .column = 1 } }, .val = .{ .string = "div" } },
        .{ .type = .eos, .loc = .{ .start = .{ .line = 6, .column = 1 } } },
    };

    var result = try stripComments(allocator, &tokens, .{});
    defer result.deinit(allocator);

    // Should strip everything in the comment including pipeless text
    try std.testing.expectEqual(@as(usize, 2), result.tokens.items.len);
    try std.testing.expectEqual(TokenType.tag, result.tokens.items[0].type);
    try std.testing.expectEqual(TokenType.eos, result.tokens.items[1].type);
}

test "stripComments - pipeless text outside comment" {
    const allocator = std.testing.allocator;

    const tokens = [_]Token{
        .{ .type = .tag, .loc = .{ .start = .{ .line = 1, .column = 1 } }, .val = .{ .string = "script" } },
        .{ .type = .dot, .loc = .{ .start = .{ .line = 1, .column = 7 } } },
        .{ .type = .start_pipeless_text, .loc = .{ .start = .{ .line = 1, .column = 8 } } },
        .{ .type = .text, .loc = .{ .start = .{ .line = 2, .column = 3 } }, .val = .{ .string = "var x = 1;" } },
        .{ .type = .end_pipeless_text, .loc = .{ .start = .{ .line = 3, .column = 1 } } },
        .{ .type = .eos, .loc = .{ .start = .{ .line = 4, .column = 1 } } },
    };

    var result = try stripComments(allocator, &tokens, .{});
    defer result.deinit(allocator);

    // Should keep all tokens - no comments
    try std.testing.expectEqual(@as(usize, 6), result.tokens.items.len);
}

test "stripComments - keep unbuffered when option disabled" {
    const allocator = std.testing.allocator;

    const tokens = [_]Token{
        .{ .type = .comment, .loc = .{ .start = .{ .line = 1, .column = 1 } }, .buffer = .{ .boolean = false } },
        .{ .type = .text, .loc = .{ .start = .{ .line = 1, .column = 4 } }, .val = .{ .string = "keep me" } },
        .{ .type = .newline, .loc = .{ .start = .{ .line = 1, .column = 11 } } },
        .{ .type = .eos, .loc = .{ .start = .{ .line = 2, .column = 1 } } },
    };

    var result = try stripComments(allocator, &tokens, .{ .strip_unbuffered = false });
    defer result.deinit(allocator);

    // Should keep unbuffered comment
    try std.testing.expectEqual(@as(usize, 4), result.tokens.items.len);
}

test "stripAllComments - strips both types" {
    const allocator = std.testing.allocator;

    const tokens = [_]Token{
        .{ .type = .comment, .loc = .{ .start = .{ .line = 1, .column = 1 } }, .buffer = .{ .boolean = false } },
        .{ .type = .text, .loc = .{ .start = .{ .line = 1, .column = 4 } }, .val = .{ .string = "unbuffered" } },
        .{ .type = .newline, .loc = .{ .start = .{ .line = 1, .column = 14 } } },
        .{ .type = .comment, .loc = .{ .start = .{ .line = 2, .column = 1 } }, .buffer = .{ .boolean = true } },
        .{ .type = .text, .loc = .{ .start = .{ .line = 2, .column = 4 } }, .val = .{ .string = "buffered" } },
        .{ .type = .newline, .loc = .{ .start = .{ .line = 2, .column = 12 } } },
        .{ .type = .tag, .loc = .{ .start = .{ .line = 3, .column = 1 } }, .val = .{ .string = "div" } },
        .{ .type = .eos, .loc = .{ .start = .{ .line = 4, .column = 1 } } },
    };

    var result = try stripAllComments(allocator, &tokens);
    defer result.deinit(allocator);

    // Should strip both comments, keep tag and structure
    try std.testing.expectEqual(@as(usize, 4), result.tokens.items.len);
    try std.testing.expectEqual(TokenType.newline, result.tokens.items[0].type);
    try std.testing.expectEqual(TokenType.newline, result.tokens.items[1].type);
    try std.testing.expectEqual(TokenType.tag, result.tokens.items[2].type);
    try std.testing.expectEqual(TokenType.eos, result.tokens.items[3].type);
}