diff options
author | jjanzen <jjanzen@jjanzen.ca> | 2025-02-27 13:13:20 -0600 |
---|---|---|
committer | jjanzen <jjanzen@jjanzen.ca> | 2025-02-27 13:13:20 -0600 |
commit | 29f04314658fd9b2a93ae78fa235e71cae0d700d (patch) | |
tree | 93a29a181f6676d085ff85099b70a183b0dd29c0 | |
parent | 3b5be7ffbb55dbc6722880c175ae92e8c62ac7e1 (diff) |
handle primaries
-rw-r--r-- | src/parser.zig | 345 |
1 files changed, 329 insertions, 16 deletions
diff --git a/src/parser.zig b/src/parser.zig index 253ddf6..1b35596 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -1,16 +1,26 @@ const std = @import("std"); const opcodes = @import("opcodes.zig"); -/// A symbol's value can be pure or point to a register -const SymbolValueType = enum { +/// A number's value can be pure or point to a register +const NumberType = enum { pure, register, }; -const SymbolValue = union { +const NumberValue = union(NumberType) { pure: u64, register: u8, }; +/// An expression's result can be a NumberValue or a string +const ExpressionResultType = enum { + number, + string, +}; +const ExpressionResult = union(ExpressionResultType) { + number: NumberValue, + string: []const u8, +}; + /// A constant can be a number of a string const ConstantType = enum { number, @@ -27,7 +37,7 @@ pub const Parser = struct { input: []const u8, location: u64, ch_pos: usize, - symbols: std.StringHashMap(SymbolValue), + symbols: std.StringHashMap(NumberValue), object: std.ArrayList(u8), /// Test is a character is whitespace @@ -107,7 +117,7 @@ pub const Parser = struct { /// Determine whether the cursor points at a valid unicode character wrapped in single quotes /// Move the cursor past the closing quote and return the character - fn identifyChar(self: *Parser) ![]const u8 { + fn identifyChar(self: *Parser) !u21 { if (self.getByte(self.ch_pos) != '\'') { return error.NoChar; } @@ -132,7 +142,9 @@ pub const Parser = struct { const view = std.unicode.Utf8View.init(self.input[start..end]) catch return error.NoChar; var iter = view.iterator(); var count: u8 = 0; - while (iter.nextCodepoint()) |_| { + var character: u21 = undefined; + while (iter.nextCodepoint()) |u| { + character = u; count += 1; if (count > 1) { return error.NoChar; @@ -142,7 +154,7 @@ pub const Parser = struct { return error.NoChar; } - return self.input[start..end]; + return character; } /// Determine whether the cursor points at a valid string wrapped in double quotes @@ -187,8 +199,8 @@ pub const Parser = struct { return ConstantValue{ .number = number }; }, '\'' => { - const string = try identifyChar(self); - return ConstantValue{ .string = string }; + const char = try identifyChar(self); + return ConstantValue{ .number = char }; }, '"' => { const string = try identifyString(self); @@ -225,6 +237,258 @@ pub const Parser = struct { return error.NoSymbol; } + /// Get the number associated with a given symbol + fn handleSymbol(self: *Parser, symbol: []const u8) !NumberValue { + // TODO: handle xH, XF, xB + const n = self.symbols.get(symbol); + if (n == null) return error.UndefinedSymbol; + return n.?; + } + + const WeakOp = enum { + add, + sub, + bit_or, + bit_xor, + none, + }; + + /// Determine whether the cursor points at a valid expression + /// Move the cursor past the expression, evaluate it, and return its value + fn identifyExpression(self: *Parser) anyerror!ExpressionResult { + var result: u64 = 0; + var last_op = WeakOp.none; + var done = false; + while (!done) { + const term = try self.identifyTerm(); + + switch (term) { + .string => { + if (last_op == WeakOp.none) return term; + return error.NoExpression; + }, + .number => |nv| { + switch (nv) { + .pure => |n| { + result = switch (last_op) { + WeakOp.add => result +% n, + WeakOp.sub => result -% n, + WeakOp.bit_or => result | n, + WeakOp.bit_xor => result ^ n, + WeakOp.none => n, + }; + }, + .register => { + return error.NoExpression; + }, + } + }, + } + + last_op = switch (self.getByte(self.ch_pos)) { + '+' => WeakOp.add, + '-' => WeakOp.sub, + '|' => WeakOp.bit_or, + '^' => WeakOp.bit_xor, + else => WeakOp.none, + }; + + if (last_op == WeakOp.none) { + done = true; + } else { + self.ch_pos += 1; + } + } + return ExpressionResult{ .number = NumberValue{ .pure = result } }; + } + + const StrongOp = enum { + mult, + div, + frac_div, + rem, + lshift, + rshift, + bit_and, + none, + }; + + /// Determine whether the cursor points at a valid term + /// Move the cursor past the term, evaluate it, and return its value + fn identifyTerm(self: *Parser) anyerror!ExpressionResult { + var result: u64 = 0; + var last_op = StrongOp.none; + var done = false; + while (!done) { + const primary = try self.identifyPrimary(); + + switch (primary) { + .string => { + if (last_op == StrongOp.none) return primary; + return error.NoTerm; + }, + .number => |nv| { + switch (nv) { + .pure => |n| { + result = switch (last_op) { + StrongOp.mult => result *% n, + StrongOp.div => div: { + if (n == 0) return error.NoTerm; + break :div result / n; + }, + StrongOp.frac_div => frac_div: { + if (n == 0 or result >= n) return error.NoTerm; + const shifted: u128 = (@as(u128, result)) << 8; + const divided: u128 = shifted / n; + if (divided >= 1 << 64) return error.NoTerm; + break :frac_div @as(u64, @intCast(divided)); + }, + StrongOp.rem => rem: { + if (n == 0) return error.NoTerm; + break :rem result % n; + }, + StrongOp.lshift => if (n >= 64) 0 else result << @as(u6, @intCast(n)), + StrongOp.rshift => if (n >= 64) 0 else result >> @as(u6, @intCast(n)), + StrongOp.bit_and => result & n, + StrongOp.none => n, + }; + }, + else => { + if (last_op == StrongOp.none) return primary; + return error.NoTerm; + }, + } + }, + } + + last_op = switch (self.getByte(self.ch_pos)) { + '*' => StrongOp.mult, + '/' => div: { + var op = StrongOp.div; + if (self.getByte(self.ch_pos + 1) == '/') { + op = StrongOp.frac_div; + self.ch_pos += 1; + } + break :div op; + }, + '%' => StrongOp.rem, + '<' => lshift: { + var op = StrongOp.lshift; + if (self.getByte(self.ch_pos + 1) == '<') { + self.ch_pos += 1; + } else { + op = StrongOp.none; + } + break :lshift op; + }, + '>' => rshift: { + var op = StrongOp.rshift; + if (self.getByte(self.ch_pos + 1) == '>') { + self.ch_pos += 1; + } else { + op = StrongOp.none; + } + break :rshift op; + }, + '&' => StrongOp.bit_and, + else => StrongOp.none, + }; + + if (last_op == StrongOp.none) { + done = true; + } else { + self.ch_pos += 1; + } + } + + return ExpressionResult{ .number = NumberValue{ .pure = result } }; + } + + /// Determine whether the cursor points at a valid primary + /// Move the cursor past the primary, evaluate it, and return its value + fn identifyPrimary(self: *Parser) anyerror!ExpressionResult { + if (isDecimal(self.getByte(self.ch_pos)) and + (self.getByte(self.ch_pos + 1) == 'H' or self.getByte(self.ch_pos + 1) == 'F' or self.getByte(self.ch_pos + 1) == 'B')) + { + const symbol = try self.identifySymbol(); + if (symbol.len != 2) return error.UndefinedSymbol; + if (symbol[1] == 'H') return error.UndefinedSymbol; + const symbol_val = try self.handleSymbol(symbol); + return ExpressionResult{ .number = symbol_val }; + } + + switch (self.getByte(self.ch_pos)) { + '@' => return ExpressionResult{ .number = NumberValue{ .pure = self.location } }, + '(' => { + self.ch_pos += 1; + const expr = try self.identifyExpression(); + if (self.getByte(self.ch_pos) != ')') return error.NoPrimary; + self.ch_pos += 1; + return expr; + }, + '0'...'9', '#', '\'', '"' => { + const constant = try self.identifyConstant(); + switch (constant) { + .number => |n| return ExpressionResult{ .number = NumberValue{ .pure = n } }, + .string => |s| return ExpressionResult{ .string = s }, + } + }, + '+' => { + self.ch_pos += 1; + const primary = try self.identifyPrimary(); + switch (primary) { + .number => return primary, + else => return error.NoPimary, + } + }, + '-' => { + self.ch_pos += 1; + const primary = try self.identifyPrimary(); + switch (primary) { + .number => |nv| { + switch (nv) { + .register, .pure => |n| return ExpressionResult{ .number = NumberValue{ .pure = 0 -% n } }, + } + }, + else => return error.NoPrimary, + } + }, + '~' => { + self.ch_pos += 1; + const primary = try self.identifyPrimary(); + switch (primary) { + .number => |nv| { + switch (nv) { + .register, .pure => |n| return ExpressionResult{ .number = NumberValue{ .pure = ~n } }, + } + }, + else => return error.NoPrimary, + } + }, + '$' => { + self.ch_pos += 1; + const primary = try self.identifyPrimary(); + switch (primary) { + .number => |nv| { + switch (nv) { + .register, .pure => |n| { + if (n >= 256) return error.NoPrimary; + const n8 = @as(u8, @intCast(n)); + return ExpressionResult{ .number = NumberValue{ .register = n8 } }; + }, + } + }, + else => return error.NoPrimary, + } + }, + else => { + const symbol = try self.identifySymbol(); + const symbol_value = try self.handleSymbol(symbol); + return ExpressionResult{ .number = symbol_value }; + }, + } + } + /// Determine whether the cursor points at a valid opcode or pseudo operation /// An opcode consists solely of symbol characters (letters and numbers in fact) /// Move the cursor past the opcode and return it @@ -244,7 +508,7 @@ pub const Parser = struct { .input = input, .location = 0, .ch_pos = 0, - .symbols = std.StringHashMap(SymbolValue).init(allocator), + .symbols = std.StringHashMap(NumberValue).init(allocator), .object = std.ArrayList(u8).init(allocator), }; } @@ -444,16 +708,16 @@ test "characters are recognized" { "'𒀤'", }; - const expected = [_][]const u8{ - "a", - "1", - "𒀤", + const expected = [_]u21{ + 'a', + '1', + '𒀤', }; for (0..3) |i| { var parser = Parser.init(std.testing.allocator, test_cases[i]); const symbol = try parser.identifyChar(); - try std.testing.expect(std.mem.eql(u8, expected[i], symbol)); + try std.testing.expectEqual(expected[i], symbol); parser.deinit(); } } @@ -520,12 +784,13 @@ test "constants are recognized" { const expected = [_]ConstantValue{ ConstantValue{ .number = 1234567890 }, ConstantValue{ .number = 0x1234567890abcdef }, - ConstantValue{ .string = "a" }, + ConstantValue{ .number = 'a' }, ConstantValue{ .string = "hello " }, }; for (0..4) |i| { var parser = Parser.init(std.testing.allocator, test_cases[i]); + defer parser.deinit(); const symbol = try parser.identifyConstant(); switch (symbol) { .number => try std.testing.expectEqual(expected[i].number, symbol.number), @@ -533,3 +798,51 @@ test "constants are recognized" { } } } + +test "basic primaries are identified" { + const test_cases = [_][]const u8{ + "1234", + "@", + "'a'", + "\"hello world\"", + "+1234", + "-#1", + "~#0", + "$123", + }; + + const expected = [_]ExpressionResult{ + ExpressionResult{ .number = NumberValue{ .pure = 1234 } }, + ExpressionResult{ .number = NumberValue{ .pure = 0 } }, + ExpressionResult{ .number = NumberValue{ .pure = 'a' } }, + ExpressionResult{ .string = "hello world" }, + ExpressionResult{ .number = NumberValue{ .pure = 1234 } }, + ExpressionResult{ .number = NumberValue{ .pure = 0xFFFFFFFFFFFFFFFF } }, + ExpressionResult{ .number = NumberValue{ .pure = 0xFFFFFFFFFFFFFFFF } }, + ExpressionResult{ .number = NumberValue{ .register = 123 } }, + }; + + for (0..test_cases.len) |i| { + var parser = Parser.init(std.testing.allocator, test_cases[i]); + defer parser.deinit(); + const symbol = try parser.identifyPrimary(); + switch (symbol) { + .number => try std.testing.expectEqual(expected[i].number, symbol.number), + .string => try std.testing.expect(std.mem.eql(u8, expected[i].string, symbol.string)), + } + } +} + +test "invalid primaries are detected" { + const test_cases = [_][]const u8{ + "$256", + "$~0", + }; + + for (test_cases) |case| { + var parser = Parser.init(std.testing.allocator, case); + defer parser.deinit(); + const symbol = parser.identifyPrimary(); + try std.testing.expectEqual(error.NoPrimary, symbol); + } +} |