mmix-as/src/parser.zig

const std = @import("std");
const opcodes = @import("opcodes.zig");

/// A number's value can be pure or point to a register
const NumberType = enum {
    pure,
    register,
};
const NumberValue = union(NumberType) {
    pure: u64,
    register: u8,
};

/// An expression's result can be a NumberValue or a string
const ExpressionResultType = enum {
    number,
    string,
};
const ExpressionResult = union(ExpressionResultType) {
    number: NumberValue,
    string: []const u8,
};

/// A constant can be a number of a string
const ConstantType = enum {
    number,
    string,
};
const ConstantValue = union(ConstantType) {
    number: u64,
    string: []const u8,
};

/// The Parser reads a provided input and assembles it into MMIX object code
pub const Parser = struct {
    allocator: std.mem.Allocator,
    input: []const u8,
    location: u64,
    ch_pos: usize,
    symbols: std.StringHashMap(NumberValue),
    object: std.ArrayList(u8),

    /// Test is a character is whitespace
    /// Note that newlines are special and not included in this implementation.
    fn isWhitespace(ch: u8) bool {
        return ch == ' ' or ch == '\t' or ch == '\r';
    }

    /// Test if a character is a letter
    /// Note that underscores are letters for the purposes of symbol recognition.
    fn isLetter(ch: u8) bool {
        return ch == '_' or (ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z');
    }

    /// Test if a character is a decimal digit
    fn isDecimal(ch: u8) bool {
        return ch >= '0' and ch <= '9';
    }

    /// Test if a character is a hexadecimal digit
    fn isHexadecimal(ch: u8) bool {
        return isDecimal(ch) or (ch >= 'a' and ch <= 'f') or (ch >= 'A' and ch <= 'F');
    }

    /// Test if a character is a symbol character
    /// Note that all valid unicode characters larger than 126 are also valid symbol characters.
    fn isSymbolChar(ch: u8) bool {
        return isLetter(ch) or isDecimal(ch) or ch > 126;
    }

    /// Get a byte from the input at a specified location
    /// Return 0 if the requested byte is out of range
    fn getByte(self: *Parser, pos: usize) u8 {
        if (pos < self.input.len) {
            return self.input[pos];
        }
        return 0;
    }

    /// Move the cursor forward until it does not point at whitespace
    fn skipWhitespace(self: *Parser) void {
        while (isWhitespace(self.getByte(self.ch_pos))) {
            self.ch_pose += 1;
        }
    }

    const DecimalError = std.fmt.ParseIntError;

    /// Determine whether the cursor points at a valid integer in base 10
    /// Move the cursor past the integer and return it
    fn identifyDecimal(self: *Parser) DecimalError!u64 {
        const start = self.ch_pos;
        while (isDecimal(self.getByte(self.ch_pos))) {
            self.ch_pos += 1;
        }
        const end = self.ch_pos;

        return std.fmt.parseInt(u64, self.input[start..end], 10);
    }

    const HexadecimalError = std.fmt.ParseIntError || error{
        WrongStartingCharacter,
    };

    /// Determine whether the cursor points at a valid integer in base 16
    /// Base 16 is identified by a number starting with #
    /// Move the cursor past the integer and return it
    fn identifyHexadecimal(self: *Parser) HexadecimalError!u64 {
        if (self.getByte(self.ch_pos) != '#') {
            return HexadecimalError.WrongStartingCharacter;
        }

        self.ch_pos += 1;

        const start = self.ch_pos;
        while (isHexadecimal(self.getByte(self.ch_pos))) {
            self.ch_pos += 1;
        }
        const end = self.ch_pos;

        return std.fmt.parseInt(u64, self.input[start..end], 16);
    }

    const CharError = error{
        NoStartingDelimiter,
        NoEndingDelimiter,
        NoChar,
        InvalidUtf8,
    };

    /// Determine whether the cursor points at a valid unicode character wrapped in single quotes
    /// Move the cursor past the closing quote and return the character
    fn identifyChar(self: *Parser) CharError!u21 {
        if (self.getByte(self.ch_pos) != '\'') {
            return CharError.NoStartingDelimiter;
        }

        self.ch_pos += 1;

        const start = self.ch_pos;
        while (self.getByte(self.ch_pos) != 0 and self.getByte(self.ch_pos) != '\'') {
            self.ch_pos += 1;

            if (self.getByte(self.ch_pos) == ';' or self.getByte(self.ch_pos) == '\n') {
                return CharError.NoEndingDelimiter;
            }
            if (self.ch_pos - start > 4) {
                return CharError.NoEndingDelimiter;
            }
        }
        const end = self.ch_pos;
        self.ch_pos += 1;

        if (end <= start) {
            return CharError.NoChar;
        }

        const view = std.unicode.Utf8View.init(self.input[start..end]) catch return CharError.InvalidUtf8;
        var iter = view.iterator();
        var count: u8 = 0;
        var character: u21 = 0;
        while (iter.nextCodepoint()) |u| {
            character = u;
            count += 1;
            if (count > 1) {
                return CharError.NoEndingDelimiter;
            }
        }
        if (count != 1) {
            return CharError.NoEndingDelimiter;
        }

        return character;
    }

    const StringError = error{
        NoStartingDelimiter,
        NoEndingDelimiter,
        NoString,
    };

    /// Determine whether the cursor points at a valid string wrapped in double quotes
    /// Note that a string has at least one character in it and that it cannot have " or newlines in it
    /// Move the cursor past the string and return the string
    fn identifyString(self: *Parser) StringError![]const u8 {
        if (self.getByte(self.ch_pos) != '"') {
            return StringError.NoStartingDelimiter;
        }

        self.ch_pos += 1;
        const start = self.ch_pos;
        while (self.getByte(self.ch_pos) != 0 and self.getByte(self.ch_pos) != '"') {
            if (self.getByte(self.ch_pos) == '\n') {
                return StringError.NoEndingDelimiter;
            }
            self.ch_pos += 1;
        }
        const end = self.ch_pos;
        if (self.getByte(self.ch_pos) == '"') {
            self.ch_pos += 1;
        } else {
            return StringError.NoEndingDelimiter;
        }

        if (end <= start) {
            return StringError.NoString;
        }

        return self.input[start..end];
    }

    const ConstantError = DecimalError || HexadecimalError || CharError || StringError || error{
        UnexpectedSymbol,
    };

    /// Determine whether the cursor points at a valid constant
    /// The constant may be a string or a number
    /// Move the cursor past the constant and return it
    fn identifyConstant(self: *Parser) !ConstantValue {
        switch (self.getByte(self.ch_pos)) {
            '0'...'9' => {
                const number = try identifyDecimal(self);
                return ConstantValue{ .number = number };
            },
            '#' => {
                const number = try identifyHexadecimal(self);
                return ConstantValue{ .number = number };
            },
            '\'' => {
                const char = try identifyChar(self);
                return ConstantValue{ .number = char };
            },
            '"' => {
                const string = try identifyString(self);
                return ConstantValue{ .string = string };
            },
            else => return ConstantError.UnexpectedSymbol,
        }
    }

    const SymbolError = error{
        UnexpectedSymbol,
        ForwardReference0,
        ForwardReference1,
        ForwardReference2,
        ForwardReference3,
        ForwardReference4,
        ForwardReference5,
        ForwardReference6,
        ForwardReference7,
        ForwardReference8,
        ForwardReference9,
    };

    /// Determine whether the cursor points at a symbol
    /// A symbol starts with a letter and only has symbol characters after that point
    /// There is an exception that there are 30 special symbols of the form xH, xF, and xB where x is a single decimal digit
    /// Move the cursor past the symbol and return its name
    fn identifySymbol(self: *Parser) SymbolError![]const u8 {
        const start = self.ch_pos;
        if ((isLetter(self.getByte(self.ch_pos)) or self.getByte(self.ch_pos) == '_')) {
            self.ch_pos += 1;

            while (isSymbolChar(self.getByte(self.ch_pos))) {
                self.ch_pos += 1;
            }
        } else if (isDecimal(self.getByte(self.ch_pos)) and
            (self.getByte(self.ch_pos + 1) == 'H' or self.getByte(self.ch_pos + 1) == 'F' or self.getByte(self.ch_pos + 1) == 'B'))
        {
            self.ch_pos += 2;
            return self.input[self.ch_pos - 2 .. self.ch_pos];
        }
        const end = self.ch_pos;

        if (end > start) {
            return self.input[start..end];
        }

        return SymbolError.UnexpectedSymbol;
    }

    /// Get the number associated with a given symbol
    fn readSymbol(self: *Parser, symbol: []const u8) SymbolError!NumberValue {
        if (symbol.len == 2 and symbol[0] >= '0' and symbol[0] <= '9' and symbol[1] != 'B') {
            if (symbol[1] != 'F') return SymbolError.UnexpectedSymbol;

            return switch (symbol[0]) {
                '0' => SymbolError.ForwardReference0,
                '1' => SymbolError.ForwardReference1,
                '2' => SymbolError.ForwardReference2,
                '3' => SymbolError.ForwardReference3,
                '4' => SymbolError.ForwardReference4,
                '5' => SymbolError.ForwardReference5,
                '6' => SymbolError.ForwardReference6,
                '7' => SymbolError.ForwardReference7,
                '8' => SymbolError.ForwardReference8,
                '9' => SymbolError.ForwardReference9,
                else => unreachable,
            };
        }

        const n = self.symbols.get(symbol);
        if (n == null) return SymbolError.UnexpectedSymbol;
        return n.?;
    }

    const WeakOp = enum {
        add,
        sub,
        bit_or,
        bit_xor,
        none,
    };

    const ExpressionError = SymbolError || ConstantError || error{
        NoEndingDelimiter,
        UnaryOperationOnString,
        BinaryOperationOnString,
        IllegalOperationOnRegister,
        DivisionByZero,
        FractionalDivisionOversizedNumerator,
    };

    /// Determine whether the cursor points at a valid expression
    /// Move the cursor past the expression, evaluate it, and return its value
    fn identifyExpression(self: *Parser) ExpressionError!ExpressionResult {
        var result: u64 = 0;
        var last_op = WeakOp.none;
        var done = false;
        var started = false;
        var register_result: ?u8 = null;
        var string_result: ?[]const u8 = null;
        while (!done) {
            if (string_result != null) return ExpressionError.BinaryOperationOnString;
            if (register_result != null) return ExpressionError.IllegalOperationOnRegister;

            const term = try self.identifyTerm();

            switch (term) {
                .string => {
                    if (!started) {
                        string_result = term.string;
                    } else {
                        return ExpressionError.BinaryOperationOnString;
                    }
                },
                .number => |nv| {
                    switch (nv) {
                        .pure => |n| {
                            result = switch (last_op) {
                                WeakOp.add => result +% n,
                                WeakOp.sub => result -% n,
                                WeakOp.bit_or => result | n,
                                WeakOp.bit_xor => result ^ n,
                                WeakOp.none => n,
                            };
                        },
                        .register => {
                            if (!started) {
                                register_result = nv.register;
                            } else {
                                return ExpressionError.IllegalOperationOnRegister;
                            }
                        },
                    }
                },
            }

            last_op = switch (self.getByte(self.ch_pos)) {
                '+' => WeakOp.add,
                '-' => WeakOp.sub,
                '|' => WeakOp.bit_or,
                '^' => WeakOp.bit_xor,
                else => WeakOp.none,
            };

            if (last_op == WeakOp.none) {
                done = true;
            } else {
                self.ch_pos += 1;
            }

            started = true;
        }

        if (string_result != null) {
            return ExpressionResult{ .string = string_result.? };
        }
        if (register_result != null) {
            return ExpressionResult{ .number = NumberValue{ .register = register_result.? } };
        }

        return ExpressionResult{ .number = NumberValue{ .pure = result } };
    }

    const StrongOp = enum {
        mult,
        div,
        frac_div,
        rem,
        lshift,
        rshift,
        bit_and,
        none,
    };

    /// Determine whether the cursor points at a valid term
    /// Move the cursor past the term, evaluate it, and return its value
    fn identifyTerm(self: *Parser) ExpressionError!ExpressionResult {
        var result: u64 = 0;
        var last_op = StrongOp.none;
        var register_result: ?u8 = null;
        var string_result: ?[]const u8 = null;
        var started = false;
        var done = false;
        while (!done) {
            if (string_result != null) return ExpressionError.BinaryOperationOnString;
            if (register_result != null) return ExpressionError.IllegalOperationOnRegister;

            const primary = try self.identifyPrimary();

            switch (primary) {
                .string => {
                    if (!started) {
                        string_result = primary.string;
                    } else {
                        return ExpressionError.BinaryOperationOnString;
                    }
                },
                .number => |nv| {
                    switch (nv) {
                        .pure => |n| {
                            result = switch (last_op) {
                                StrongOp.mult => result *% n,
                                StrongOp.div => div: {
                                    if (n == 0) return ExpressionError.DivisionByZero;
                                    break :div result / n;
                                },
                                StrongOp.frac_div => frac_div: {
                                    if (result >= n) return ExpressionError.FractionalDivisionOversizedNumerator;
                                    if (n == 0) return ExpressionError.DivisionByZero;
                                    const shifted: u128 = (@as(u128, result)) << 64;
                                    const divided: u128 = (shifted / n) % (1 << 64);
                                    break :frac_div @as(u64, @intCast(divided));
                                },
                                StrongOp.rem => rem: {
                                    if (n == 0) return ExpressionError.DivisionByZero;
                                    break :rem result % n;
                                },
                                StrongOp.lshift => if (n >= 64) 0 else result << @as(u6, @intCast(n)),
                                StrongOp.rshift => if (n >= 64) 0 else result >> @as(u6, @intCast(n)),
                                StrongOp.bit_and => result & n,
                                StrongOp.none => n,
                            };
                        },
                        .register => {
                            if (!started) {
                                register_result = nv.register;
                            } else {
                                return ExpressionError.IllegalOperationOnRegister;
                            }
                        },
                    }
                },
            }

            last_op = switch (self.getByte(self.ch_pos)) {
                '*' => StrongOp.mult,
                '/' => div: {
                    var op = StrongOp.div;
                    if (self.getByte(self.ch_pos + 1) == '/') {
                        op = StrongOp.frac_div;
                        self.ch_pos += 1;
                    }
                    break :div op;
                },
                '%' => StrongOp.rem,
                '<' => lshift: {
                    var op = StrongOp.lshift;
                    if (self.getByte(self.ch_pos + 1) == '<') {
                        self.ch_pos += 1;
                    } else {
                        op = StrongOp.none;
                    }
                    break :lshift op;
                },
                '>' => rshift: {
                    var op = StrongOp.rshift;
                    if (self.getByte(self.ch_pos + 1) == '>') {
                        self.ch_pos += 1;
                    } else {
                        op = StrongOp.none;
                    }
                    break :rshift op;
                },
                '&' => StrongOp.bit_and,
                else => StrongOp.none,
            };

            if (last_op == StrongOp.none) {
                done = true;
            } else {
                self.ch_pos += 1;
            }

            started = true;
        }

        if (register_result != null) {
            result = result % (1 << 8);
            return ExpressionResult{ .number = NumberValue{ .register = register_result.? } };
        }

        if (string_result != null) {
            return ExpressionResult{ .string = string_result.? };
        }

        return ExpressionResult{ .number = NumberValue{ .pure = result } };
    }

    /// Determine whether the cursor points at a valid primary
    /// Move the cursor past the primary, evaluate it, and return its value
    fn identifyPrimary(self: *Parser) ExpressionError!ExpressionResult {
        if (isDecimal(self.getByte(self.ch_pos)) and
            (self.getByte(self.ch_pos + 1) == 'H' or self.getByte(self.ch_pos + 1) == 'F' or self.getByte(self.ch_pos + 1) == 'B'))
        {
            const symbol = try self.identifySymbol();
            if (symbol.len != 2) return SymbolError.UnexpectedSymbol;
            if (symbol[1] == 'H') return SymbolError.UnexpectedSymbol;
            const symbol_val = try self.readSymbol(symbol);
            return ExpressionResult{ .number = symbol_val };
        }

        switch (self.getByte(self.ch_pos)) {
            '@' => return ExpressionResult{ .number = NumberValue{ .pure = self.location } },
            '(' => {
                self.ch_pos += 1;
                const expr = try self.identifyExpression();
                if (self.getByte(self.ch_pos) != ')') return error.NoEndingDelimiter;
                self.ch_pos += 1;
                return expr;
            },
            '0'...'9', '#', '\'', '"' => {
                const constant = try self.identifyConstant();
                switch (constant) {
                    .number => |n| return ExpressionResult{ .number = NumberValue{ .pure = n } },
                    .string => |s| return ExpressionResult{ .string = s },
                }
            },
            '+' => {
                self.ch_pos += 1;
                const primary = try self.identifyPrimary();
                switch (primary) {
                    .number => return primary,
                    else => return ExpressionError.UnaryOperationOnString,
                }
            },
            '-' => {
                self.ch_pos += 1;
                const primary = try self.identifyPrimary();
                switch (primary) {
                    .number => |nv| {
                        switch (nv) {
                            .register, .pure => |n| return ExpressionResult{ .number = NumberValue{ .pure = 0 -% n } },
                        }
                    },
                    else => return ExpressionError.UnaryOperationOnString,
                }
            },
            '~' => {
                self.ch_pos += 1;
                const primary = try self.identifyPrimary();
                switch (primary) {
                    .number => |nv| {
                        switch (nv) {
                            .register, .pure => |n| return ExpressionResult{ .number = NumberValue{ .pure = ~n } },
                        }
                    },
                    else => return ExpressionError.UnaryOperationOnString,
                }
            },
            '$' => {
                self.ch_pos += 1;
                const primary = try self.identifyPrimary();
                switch (primary) {
                    .number => |nv| {
                        switch (nv) {
                            .register, .pure => |n| {
                                if (n >= 256) return ExpressionError.Overflow;
                                const n8 = @as(u8, @intCast(n));
                                return ExpressionResult{ .number = NumberValue{ .register = n8 } };
                            },
                        }
                    },
                    else => return ExpressionError.UnaryOperationOnString,
                }
            },
            else => {
                const symbol = try self.identifySymbol();
                const symbol_value = try self.readSymbol(symbol);
                return ExpressionResult{ .number = symbol_value };
            },
        }
    }

    /// Determine whether the cursor points at a valid opcode or pseudo operation
    /// An opcode consists solely of symbol characters (letters and numbers in fact)
    /// Move the cursor past the opcode and return it
    fn identifyOperation(self: *Parser) !opcodes.Operation {
        const start = self.ch_pos;
        while (isSymbolChar(self.getByte(self.ch_pos))) {
            self.ch_pos += 1;
        }
        const end = self.ch_pos;

        return opcodes.parseOp(self.allocator, self.input[start..end]);
    }

    pub fn init(allocator: std.mem.Allocator, input: []const u8) !Parser {
        var p = Parser{
            .allocator = allocator,
            .input = input,
            .location = 0,
            .ch_pos = 0,
            .symbols = std.StringHashMap(NumberValue).init(allocator),
            .object = std.ArrayList(u8).init(allocator),
        };

        try p.symbols.put("0B", NumberValue{ .pure = 0 });
        try p.symbols.put("1B", NumberValue{ .pure = 0 });
        try p.symbols.put("2B", NumberValue{ .pure = 0 });
        try p.symbols.put("3B", NumberValue{ .pure = 0 });
        try p.symbols.put("4B", NumberValue{ .pure = 0 });
        try p.symbols.put("5B", NumberValue{ .pure = 0 });
        try p.symbols.put("6B", NumberValue{ .pure = 0 });
        try p.symbols.put("7B", NumberValue{ .pure = 0 });
        try p.symbols.put("8B", NumberValue{ .pure = 0 });
        try p.symbols.put("9B", NumberValue{ .pure = 0 });

        return p;
    }

    pub fn deinit(self: *Parser) void {
        self.symbols.deinit();
        self.object.deinit();
    }
};

test "normal ascii characters are recognized as symbol chars" {
    const chars = "qwertyuiopasdfghjklzxcvbnm1234567890QWERTYUIOPASDFGHJKLZXCVBNM_";

    for (chars) |c| {
        try std.testing.expect(Parser.isSymbolChar(c));
    }
}

test "large unicode characters are recognized as symbol chars" {
    const cuneiform = "𒀀𒀁𒀂𒀃𒀄𒀅𒀆𒀇𒀈𒀉𒀊𒀋𒀌𒀍𒀎𒀏𒀐𒀑𒀒𒀓𒀔𒀕𒀖𒀗𒀘𒀙𒀚𒀛𒀜𒀝𒀞𒀟𒀠𒀡𒀢𒀣𒀤𒀥𒀦𒀧𒀨𒀩𒀪𒀫𒀬𒀭𒀮𒀯𒀰𒀱𒈷𒌄";

    for (cuneiform) |c| {
        try std.testing.expect(Parser.isSymbolChar(c));
    }
}

test "non-symbol characters are detected" {
    const chars = "\n\r \t!@#$%^&*()-=+[]{}\\|;:'\"/?,.<>`~";

    for (chars) |c| {
        try std.testing.expect(!Parser.isSymbolChar(c));
    }
}

test "symbols are identified" {
    const test_cases = [_][]const u8{
        "_asdf$%@",
        "ASFLKJ3332__q5 ;asdf;lk",
        "asdf𒀤𒀥𒀦\nalsfkd",
        "2H",
        "5F",
        "0B",
    };

    const expected = [_][]const u8{
        "_asdf",
        "ASFLKJ3332__q5",
        "asdf𒀤𒀥𒀦",
        "2H",
        "5F",
        "0B",
    };

    for (0..6) |i| {
        var parser = try Parser.init(std.testing.allocator, test_cases[i]);
        const symbol = try parser.identifySymbol();
        try std.testing.expect(std.mem.eql(u8, expected[i], symbol));
        parser.deinit();
    }
}

test "no symbols are found successfully" {
    const test_cases = [_][]const u8{
        " _asdf",
        ";ASFLKJ3332__q5",
        "\nasdf𒀤𒀥𒀦",
    };

    for (test_cases) |case| {
        var parser = try Parser.init(std.testing.allocator, case);
        const symbol = parser.identifySymbol();
        try std.testing.expectEqual(error.UnexpectedSymbol, symbol);
        parser.deinit();
    }
}

test "opcodes are identified" {
    const test_cases = [_][]const u8{
        "2ADDU%aldfk",
        "GO ",
        "ADD\taksfdjas",
        "GREG\n",
        "IS",
    };

    const expected = [_]opcodes.Operation{
        opcodes.Operation{ .opcode = opcodes.Opcode._2ADDU },
        opcodes.Operation{ .opcode = opcodes.Opcode.GO },
        opcodes.Operation{ .opcode = opcodes.Opcode.ADD },
        opcodes.Operation{ .pseudo_op = opcodes.PseudoOp.GREG },
        opcodes.Operation{ .pseudo_op = opcodes.PseudoOp.IS },
    };

    for (0..5) |i| {
        var parser = try Parser.init(std.testing.allocator, test_cases[i]);
        const op = try parser.identifyOperation();
        try std.testing.expectEqual(expected[i], op);
        parser.deinit();
    }
}

test "no opcodes are found successfully" {
    const test_cases = [_][]const u8{
        " _asdf",
        ";ASFLKJ3332__q5",
        "\nasdf𒀤𒀥𒀦",
        "asdfklajsdfl",
    };

    for (test_cases) |case| {
        var parser = try Parser.init(std.testing.allocator, case);
        const symbol = parser.identifyOperation();
        try std.testing.expectEqual(error.NoOpcode, symbol);
        parser.deinit();
    }
}

test "decimals are recognized" {
    const test_cases = [_][]const u8{
        "012314aslkfdj",
        "1234567890  43",
        "1234567891234567889\n123124",
    };

    const expected = [_]u64{
        12314,
        1234567890,
        1234567891234567889,
    };

    for (0..3) |i| {
        var parser = try Parser.init(std.testing.allocator, test_cases[i]);
        const symbol = try parser.identifyDecimal();
        try std.testing.expectEqual(expected[i], symbol);
        parser.deinit();
    }
}

test "malformed decimals are not recognized" {
    const test_cases = [_][]const u8{
        "",
        "asdf123",
        "  123",
        "12345678901234567890123456789012345678901234567890",
    };

    const expected = [_]Parser.DecimalError{
        Parser.DecimalError.InvalidCharacter,
        Parser.DecimalError.InvalidCharacter,
        Parser.DecimalError.InvalidCharacter,
        Parser.DecimalError.Overflow,
    };

    for (0..test_cases.len) |i| {
        var parser = try Parser.init(std.testing.allocator, test_cases[i]);
        const symbol = parser.identifyDecimal();
        try std.testing.expectEqual(expected[i], symbol);
        parser.deinit();
    }
}

test "hexadecimals are recognized" {
    const test_cases = [_][]const u8{
        "#012314saslkfdj",
        "#1234567890abcdef  43",
        "#1234567891\n123124",
    };

    const expected = [_]u64{
        0x12314,
        0x1234567890abcdef,
        0x1234567891,
    };

    for (0..3) |i| {
        var parser = try Parser.init(std.testing.allocator, test_cases[i]);
        const symbol = try parser.identifyHexadecimal();
        try std.testing.expectEqual(expected[i], symbol);
        parser.deinit();
    }
}

test "malformed hexadecimals are not recognized" {
    const test_cases = [_][]const u8{
        "",
        "sasdf123",
        "  123",
        "#12345678901234567890123456789012345678901234567890",
        "#",
    };

    const expected = [_]Parser.HexadecimalError{
        Parser.HexadecimalError.WrongStartingCharacter,
        Parser.HexadecimalError.WrongStartingCharacter,
        Parser.HexadecimalError.WrongStartingCharacter,
        Parser.HexadecimalError.Overflow,
        Parser.HexadecimalError.InvalidCharacter,
    };

    for (0..test_cases.len) |i| {
        var parser = try Parser.init(std.testing.allocator, test_cases[i]);
        const symbol = parser.identifyHexadecimal();
        try std.testing.expectEqual(expected[i], symbol);
        parser.deinit();
    }
}

test "characters are recognized" {
    const test_cases = [_][]const u8{
        "'a'",
        "'1'",
        "'𒀤'",
    };

    const expected = [_]u21{
        'a',
        '1',
        '𒀤',
    };

    for (0..3) |i| {
        var parser = try Parser.init(std.testing.allocator, test_cases[i]);
        const symbol = try parser.identifyChar();
        try std.testing.expectEqual(expected[i], symbol);
        parser.deinit();
    }
}

test "invalid unicode sequences are not characters" {
    const test_cases = [_][]const u8{
        "'asdf'",
        "'asdfg'",
        "'as'",
        "''",
        "'",
        "a",
    };

    const expected = [_]Parser.CharError{
        Parser.CharError.NoEndingDelimiter,
        Parser.CharError.NoEndingDelimiter,
        Parser.CharError.NoEndingDelimiter,
        Parser.CharError.NoChar,
        Parser.CharError.NoChar,
        Parser.CharError.NoStartingDelimiter,
    };

    for (0..test_cases.len) |i| {
        var parser = try Parser.init(std.testing.allocator, test_cases[i]);
        const symbol = parser.identifyChar();
        try std.testing.expectEqual(expected[i], symbol);
        parser.deinit();
    }
}

test "strings are recognized" {
    const test_cases = [_][]const u8{
        "\" \"",
        "\"aslkdfjlaskdfj lkasjflkasjdflaksjfd''12309)($)(#$[[]𒀤\"",
    };

    const expected = [_][]const u8{
        " ",
        "aslkdfjlaskdfj lkasjflkasjdflaksjfd''12309)($)(#$[[]𒀤",
    };

    for (0..2) |i| {
        var parser = try Parser.init(std.testing.allocator, test_cases[i]);
        const symbol = try parser.identifyString();
        try std.testing.expect(std.mem.eql(u8, expected[i], symbol));
        parser.deinit();
    }
}

test "invalid strings are not recognized" {
    const test_cases = [_][]const u8{
        "\"\"",
        "\"",
        "\"\n\"",
        "asdf",
    };

    const expected = [_]Parser.StringError{
        Parser.StringError.NoString,
        Parser.StringError.NoEndingDelimiter,
        Parser.StringError.NoEndingDelimiter,
        Parser.StringError.NoStartingDelimiter,
    };

    for (0..test_cases.len) |i| {
        var parser = try Parser.init(std.testing.allocator, test_cases[i]);
        const symbol = parser.identifyString();
        try std.testing.expectEqual(expected[i], symbol);
        parser.deinit();
    }
}

test "constants are recognized" {
    const test_cases = [_][]const u8{
        "1234567890 1234",
        "#1234567890abcdef;%#*(",
        "'a'uuuuuu",
        "\"hello \"world",
    };

    const expected = [_]ConstantValue{
        ConstantValue{ .number = 1234567890 },
        ConstantValue{ .number = 0x1234567890abcdef },
        ConstantValue{ .number = 'a' },
        ConstantValue{ .string = "hello " },
    };

    for (0..4) |i| {
        var parser = try Parser.init(std.testing.allocator, test_cases[i]);
        defer parser.deinit();
        const symbol = try parser.identifyConstant();
        switch (symbol) {
            .number => try std.testing.expectEqual(expected[i].number, symbol.number),
            .string => try std.testing.expect(std.mem.eql(u8, expected[i].string, symbol.string)),
        }
    }
}

test "invalid constants are recognized" {
    const test_cases = [_][]const u8{
        "$123",
        " 123",
        "'aa'",
    };

    const expected = [_]Parser.ConstantError{
        Parser.ConstantError.UnexpectedSymbol,
        Parser.ConstantError.UnexpectedSymbol,
        Parser.ConstantError.NoEndingDelimiter,
    };

    for (0..test_cases.len) |i| {
        var parser = try Parser.init(std.testing.allocator, test_cases[i]);
        defer parser.deinit();
        const symbol = parser.identifyConstant();
        try std.testing.expectEqual(expected[i], symbol);
    }
}

test "basic primaries are identified" {
    const test_cases = [_][]const u8{
        "1234",
        "@",
        "'a'",
        "\"hello world\"",
        "+1234",
        "-#1",
        "~#0",
        "$123",
    };

    const expected = [_]ExpressionResult{
        ExpressionResult{ .number = NumberValue{ .pure = 1234 } },
        ExpressionResult{ .number = NumberValue{ .pure = 0 } },
        ExpressionResult{ .number = NumberValue{ .pure = 'a' } },
        ExpressionResult{ .string = "hello world" },
        ExpressionResult{ .number = NumberValue{ .pure = 1234 } },
        ExpressionResult{ .number = NumberValue{ .pure = 0xFFFFFFFFFFFFFFFF } },
        ExpressionResult{ .number = NumberValue{ .pure = 0xFFFFFFFFFFFFFFFF } },
        ExpressionResult{ .number = NumberValue{ .register = 123 } },
    };

    for (0..test_cases.len) |i| {
        var parser = try Parser.init(std.testing.allocator, test_cases[i]);
        defer parser.deinit();
        const symbol = try parser.identifyPrimary();
        switch (symbol) {
            .number => try std.testing.expectEqual(expected[i].number, symbol.number),
            .string => try std.testing.expect(std.mem.eql(u8, expected[i].string, symbol.string)),
        }
    }
}

test "invalid primaries are detected" {
    const test_cases = [_][]const u8{
        "$256",
        "$~0",
        "~\"hello\"",
    };

    const expected = [_]Parser.ExpressionError{
        Parser.ExpressionError.Overflow,
        Parser.ExpressionError.Overflow,
        Parser.ExpressionError.UnaryOperationOnString,
    };

    for (0..test_cases.len) |i| {
        var parser = try Parser.init(std.testing.allocator, test_cases[i]);
        defer parser.deinit();
        const symbol = parser.identifyPrimary();
        try std.testing.expectEqual(expected[i], symbol);
    }
}

test "valid terms are recognized" {
    const test_cases = [_][]const u8{
        "12*34",
        "23/2",
        "2//3",
        "3%2",
        "1<<2",
        "4>>2",
        "#FF&#AA",
        "$12",
        "\"hello\"",
    };

    const expected = [_]ExpressionResult{
        ExpressionResult{ .number = NumberValue{ .pure = 408 } },
        ExpressionResult{ .number = NumberValue{ .pure = 11 } },
        ExpressionResult{ .number = NumberValue{ .pure = 12297829382473034410 } },
        ExpressionResult{ .number = NumberValue{ .pure = 1 } },
        ExpressionResult{ .number = NumberValue{ .pure = 4 } },
        ExpressionResult{ .number = NumberValue{ .pure = 1 } },
        ExpressionResult{ .number = NumberValue{ .pure = 0xAA } },
        ExpressionResult{ .number = NumberValue{ .register = 12 } },
        ExpressionResult{ .string = "hello" },
    };

    for (0..test_cases.len) |i| {
        var parser = try Parser.init(std.testing.allocator, test_cases[i]);
        defer parser.deinit();
        const symbol = try parser.identifyTerm();

        switch (symbol) {
            .number => try std.testing.expectEqual(expected[i], symbol),
            .string => try std.testing.expect(std.mem.eql(u8, expected[i].string, symbol.string)),
        }
    }
}

test "terms compute the entire chain" {
    const test_cases = [_][]const u8{
        "1*2*3*4*5",
        "2*6/4",
        "3*3%8<<2>>1",
    };

    const expected = [_]ExpressionResult{
        ExpressionResult{ .number = NumberValue{ .pure = 120 } },
        ExpressionResult{ .number = NumberValue{ .pure = 3 } },
        ExpressionResult{ .number = NumberValue{ .pure = 2 } },
    };

    for (0..test_cases.len) |i| {
        var parser = try Parser.init(std.testing.allocator, test_cases[i]);
        defer parser.deinit();
        const symbol = try parser.identifyTerm();
        try std.testing.expectEqual(expected[i], symbol);
    }
}

test "strong operations do not work on registers" {
    const test_cases = [_][]const u8{
        "$2*3",
        "1/$1",
        "$1%$1",
        "$1<<1",
        "1>>$1",
        "$1&$1",
    };

    for (test_cases) |case| {
        var parser = try Parser.init(std.testing.allocator, case);
        defer parser.deinit();
        const symbol = parser.identifyTerm();
        try std.testing.expectEqual(Parser.ExpressionError.IllegalOperationOnRegister, symbol);
    }
}

test "strong operations do not work on strings" {
    const test_cases = [_][]const u8{
        "\"hello\"*1",
        "1*\"hello\"",
    };

    for (test_cases) |case| {
        var parser = try Parser.init(std.testing.allocator, case);
        defer parser.deinit();
        const symbol = parser.identifyTerm();
        try std.testing.expectEqual(Parser.ExpressionError.BinaryOperationOnString, symbol);
    }
}

test "expressions are recognized" {
    const test_cases = [_][]const u8{
        "1+2",
        "3-2",
        "2-3",
        "#AA|#00",
        "#AA^#FF",
        "5*5+5",
        "5*5+5*5",
        "#ab<<32+k&~(k-1)",
        "\"hello\"",
        "$12",
        "$$12",
    };

    const expected = [_]ExpressionResult{
        ExpressionResult{ .number = NumberValue{ .pure = 3 } },
        ExpressionResult{ .number = NumberValue{ .pure = 1 } },
        ExpressionResult{ .number = NumberValue{ .pure = 0xFFFFFFFFFFFFFFFF } },
        ExpressionResult{ .number = NumberValue{ .pure = 0xAA } },
        ExpressionResult{ .number = NumberValue{ .pure = 0x55 } },
        ExpressionResult{ .number = NumberValue{ .pure = 30 } },
        ExpressionResult{ .number = NumberValue{ .pure = 50 } },
        ExpressionResult{ .number = NumberValue{ .pure = 0xab00000100 } },
        ExpressionResult{ .string = "hello" },
        ExpressionResult{ .number = NumberValue{ .register = 12 } },
        ExpressionResult{ .number = NumberValue{ .register = 12 } },
    };

    for (0..test_cases.len) |i| {
        var parser = try Parser.init(std.testing.allocator, test_cases[i]);
        defer parser.deinit();

        try parser.symbols.put("k", NumberValue{ .pure = 0xcdef00 });

        const symbol = try parser.identifyExpression();

        switch (symbol) {
            .number => try std.testing.expectEqual(expected[i], symbol),
            .string => try std.testing.expect(std.mem.eql(u8, expected[i].string, symbol.string)),
        }
    }
}

test "weak operations cannot be applied to strings and registers" {
    const test_cases = [_][]const u8{
        "$1+$1",
        "2-$2",
        "$3|3",
        "$4^$4",
        "1+\"hello\"",
        "\"hello\"-2",
        "3|\"hello\"^4",
    };

    const expected = [_]Parser.ExpressionError{
        Parser.ExpressionError.IllegalOperationOnRegister,
        Parser.ExpressionError.IllegalOperationOnRegister,
        Parser.ExpressionError.IllegalOperationOnRegister,
        Parser.ExpressionError.IllegalOperationOnRegister,
        Parser.ExpressionError.BinaryOperationOnString,
        Parser.ExpressionError.BinaryOperationOnString,
        Parser.ExpressionError.BinaryOperationOnString,
    };

    for (0..test_cases.len) |i| {
        var parser = try Parser.init(std.testing.allocator, test_cases[i]);
        defer parser.deinit();
        const symbol = parser.identifyExpression();
        try std.testing.expectEqual(expected[i], symbol);
    }
}