aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjjanzen <jjanzen@jjanzen.ca>2025-02-27 13:13:20 -0600
committerjjanzen <jjanzen@jjanzen.ca>2025-02-27 13:13:20 -0600
commit29f04314658fd9b2a93ae78fa235e71cae0d700d (patch)
tree93a29a181f6676d085ff85099b70a183b0dd29c0
parent3b5be7ffbb55dbc6722880c175ae92e8c62ac7e1 (diff)
handle primaries
-rw-r--r--src/parser.zig345
1 files changed, 329 insertions, 16 deletions
diff --git a/src/parser.zig b/src/parser.zig
index 253ddf6..1b35596 100644
--- a/src/parser.zig
+++ b/src/parser.zig
@@ -1,16 +1,26 @@
const std = @import("std");
const opcodes = @import("opcodes.zig");
-/// A symbol's value can be pure or point to a register
-const SymbolValueType = enum {
+/// A number's value can be pure or point to a register
+const NumberType = enum {
pure,
register,
};
-const SymbolValue = union {
+const NumberValue = union(NumberType) {
pure: u64,
register: u8,
};
+/// An expression's result can be a NumberValue or a string
+const ExpressionResultType = enum {
+ number,
+ string,
+};
+const ExpressionResult = union(ExpressionResultType) {
+ number: NumberValue,
+ string: []const u8,
+};
+
/// A constant can be a number of a string
const ConstantType = enum {
number,
@@ -27,7 +37,7 @@ pub const Parser = struct {
input: []const u8,
location: u64,
ch_pos: usize,
- symbols: std.StringHashMap(SymbolValue),
+ symbols: std.StringHashMap(NumberValue),
object: std.ArrayList(u8),
/// Test is a character is whitespace
@@ -107,7 +117,7 @@ pub const Parser = struct {
/// Determine whether the cursor points at a valid unicode character wrapped in single quotes
/// Move the cursor past the closing quote and return the character
- fn identifyChar(self: *Parser) ![]const u8 {
+ fn identifyChar(self: *Parser) !u21 {
if (self.getByte(self.ch_pos) != '\'') {
return error.NoChar;
}
@@ -132,7 +142,9 @@ pub const Parser = struct {
const view = std.unicode.Utf8View.init(self.input[start..end]) catch return error.NoChar;
var iter = view.iterator();
var count: u8 = 0;
- while (iter.nextCodepoint()) |_| {
+ var character: u21 = undefined;
+ while (iter.nextCodepoint()) |u| {
+ character = u;
count += 1;
if (count > 1) {
return error.NoChar;
@@ -142,7 +154,7 @@ pub const Parser = struct {
return error.NoChar;
}
- return self.input[start..end];
+ return character;
}
/// Determine whether the cursor points at a valid string wrapped in double quotes
@@ -187,8 +199,8 @@ pub const Parser = struct {
return ConstantValue{ .number = number };
},
'\'' => {
- const string = try identifyChar(self);
- return ConstantValue{ .string = string };
+ const char = try identifyChar(self);
+ return ConstantValue{ .number = char };
},
'"' => {
const string = try identifyString(self);
@@ -225,6 +237,258 @@ pub const Parser = struct {
return error.NoSymbol;
}
+ /// Get the number associated with a given symbol
+ fn handleSymbol(self: *Parser, symbol: []const u8) !NumberValue {
+ // TODO: handle xH, XF, xB
+ const n = self.symbols.get(symbol);
+ if (n == null) return error.UndefinedSymbol;
+ return n.?;
+ }
+
+ const WeakOp = enum {
+ add,
+ sub,
+ bit_or,
+ bit_xor,
+ none,
+ };
+
+ /// Determine whether the cursor points at a valid expression
+ /// Move the cursor past the expression, evaluate it, and return its value
+ fn identifyExpression(self: *Parser) anyerror!ExpressionResult {
+ var result: u64 = 0;
+ var last_op = WeakOp.none;
+ var done = false;
+ while (!done) {
+ const term = try self.identifyTerm();
+
+ switch (term) {
+ .string => {
+ if (last_op == WeakOp.none) return term;
+ return error.NoExpression;
+ },
+ .number => |nv| {
+ switch (nv) {
+ .pure => |n| {
+ result = switch (last_op) {
+ WeakOp.add => result +% n,
+ WeakOp.sub => result -% n,
+ WeakOp.bit_or => result | n,
+ WeakOp.bit_xor => result ^ n,
+ WeakOp.none => n,
+ };
+ },
+ .register => {
+ return error.NoExpression;
+ },
+ }
+ },
+ }
+
+ last_op = switch (self.getByte(self.ch_pos)) {
+ '+' => WeakOp.add,
+ '-' => WeakOp.sub,
+ '|' => WeakOp.bit_or,
+ '^' => WeakOp.bit_xor,
+ else => WeakOp.none,
+ };
+
+ if (last_op == WeakOp.none) {
+ done = true;
+ } else {
+ self.ch_pos += 1;
+ }
+ }
+ return ExpressionResult{ .number = NumberValue{ .pure = result } };
+ }
+
+ const StrongOp = enum {
+ mult,
+ div,
+ frac_div,
+ rem,
+ lshift,
+ rshift,
+ bit_and,
+ none,
+ };
+
+ /// Determine whether the cursor points at a valid term
+ /// Move the cursor past the term, evaluate it, and return its value
+ fn identifyTerm(self: *Parser) anyerror!ExpressionResult {
+ var result: u64 = 0;
+ var last_op = StrongOp.none;
+ var done = false;
+ while (!done) {
+ const primary = try self.identifyPrimary();
+
+ switch (primary) {
+ .string => {
+ if (last_op == StrongOp.none) return primary;
+ return error.NoTerm;
+ },
+ .number => |nv| {
+ switch (nv) {
+ .pure => |n| {
+ result = switch (last_op) {
+ StrongOp.mult => result *% n,
+ StrongOp.div => div: {
+ if (n == 0) return error.NoTerm;
+ break :div result / n;
+ },
+ StrongOp.frac_div => frac_div: {
+ if (n == 0 or result >= n) return error.NoTerm;
+ const shifted: u128 = (@as(u128, result)) << 8;
+ const divided: u128 = shifted / n;
+ if (divided >= 1 << 64) return error.NoTerm;
+ break :frac_div @as(u64, @intCast(divided));
+ },
+ StrongOp.rem => rem: {
+ if (n == 0) return error.NoTerm;
+ break :rem result % n;
+ },
+ StrongOp.lshift => if (n >= 64) 0 else result << @as(u6, @intCast(n)),
+ StrongOp.rshift => if (n >= 64) 0 else result >> @as(u6, @intCast(n)),
+ StrongOp.bit_and => result & n,
+ StrongOp.none => n,
+ };
+ },
+ else => {
+ if (last_op == StrongOp.none) return primary;
+ return error.NoTerm;
+ },
+ }
+ },
+ }
+
+ last_op = switch (self.getByte(self.ch_pos)) {
+ '*' => StrongOp.mult,
+ '/' => div: {
+ var op = StrongOp.div;
+ if (self.getByte(self.ch_pos + 1) == '/') {
+ op = StrongOp.frac_div;
+ self.ch_pos += 1;
+ }
+ break :div op;
+ },
+ '%' => StrongOp.rem,
+ '<' => lshift: {
+ var op = StrongOp.lshift;
+ if (self.getByte(self.ch_pos + 1) == '<') {
+ self.ch_pos += 1;
+ } else {
+ op = StrongOp.none;
+ }
+ break :lshift op;
+ },
+ '>' => rshift: {
+ var op = StrongOp.rshift;
+ if (self.getByte(self.ch_pos + 1) == '>') {
+ self.ch_pos += 1;
+ } else {
+ op = StrongOp.none;
+ }
+ break :rshift op;
+ },
+ '&' => StrongOp.bit_and,
+ else => StrongOp.none,
+ };
+
+ if (last_op == StrongOp.none) {
+ done = true;
+ } else {
+ self.ch_pos += 1;
+ }
+ }
+
+ return ExpressionResult{ .number = NumberValue{ .pure = result } };
+ }
+
+ /// Determine whether the cursor points at a valid primary
+ /// Move the cursor past the primary, evaluate it, and return its value
+ fn identifyPrimary(self: *Parser) anyerror!ExpressionResult {
+ if (isDecimal(self.getByte(self.ch_pos)) and
+ (self.getByte(self.ch_pos + 1) == 'H' or self.getByte(self.ch_pos + 1) == 'F' or self.getByte(self.ch_pos + 1) == 'B'))
+ {
+ const symbol = try self.identifySymbol();
+ if (symbol.len != 2) return error.UndefinedSymbol;
+ if (symbol[1] == 'H') return error.UndefinedSymbol;
+ const symbol_val = try self.handleSymbol(symbol);
+ return ExpressionResult{ .number = symbol_val };
+ }
+
+ switch (self.getByte(self.ch_pos)) {
+ '@' => return ExpressionResult{ .number = NumberValue{ .pure = self.location } },
+ '(' => {
+ self.ch_pos += 1;
+ const expr = try self.identifyExpression();
+ if (self.getByte(self.ch_pos) != ')') return error.NoPrimary;
+ self.ch_pos += 1;
+ return expr;
+ },
+ '0'...'9', '#', '\'', '"' => {
+ const constant = try self.identifyConstant();
+ switch (constant) {
+ .number => |n| return ExpressionResult{ .number = NumberValue{ .pure = n } },
+ .string => |s| return ExpressionResult{ .string = s },
+ }
+ },
+ '+' => {
+ self.ch_pos += 1;
+ const primary = try self.identifyPrimary();
+ switch (primary) {
+ .number => return primary,
+ else => return error.NoPimary,
+ }
+ },
+ '-' => {
+ self.ch_pos += 1;
+ const primary = try self.identifyPrimary();
+ switch (primary) {
+ .number => |nv| {
+ switch (nv) {
+ .register, .pure => |n| return ExpressionResult{ .number = NumberValue{ .pure = 0 -% n } },
+ }
+ },
+ else => return error.NoPrimary,
+ }
+ },
+ '~' => {
+ self.ch_pos += 1;
+ const primary = try self.identifyPrimary();
+ switch (primary) {
+ .number => |nv| {
+ switch (nv) {
+ .register, .pure => |n| return ExpressionResult{ .number = NumberValue{ .pure = ~n } },
+ }
+ },
+ else => return error.NoPrimary,
+ }
+ },
+ '$' => {
+ self.ch_pos += 1;
+ const primary = try self.identifyPrimary();
+ switch (primary) {
+ .number => |nv| {
+ switch (nv) {
+ .register, .pure => |n| {
+ if (n >= 256) return error.NoPrimary;
+ const n8 = @as(u8, @intCast(n));
+ return ExpressionResult{ .number = NumberValue{ .register = n8 } };
+ },
+ }
+ },
+ else => return error.NoPrimary,
+ }
+ },
+ else => {
+ const symbol = try self.identifySymbol();
+ const symbol_value = try self.handleSymbol(symbol);
+ return ExpressionResult{ .number = symbol_value };
+ },
+ }
+ }
+
/// Determine whether the cursor points at a valid opcode or pseudo operation
/// An opcode consists solely of symbol characters (letters and numbers in fact)
/// Move the cursor past the opcode and return it
@@ -244,7 +508,7 @@ pub const Parser = struct {
.input = input,
.location = 0,
.ch_pos = 0,
- .symbols = std.StringHashMap(SymbolValue).init(allocator),
+ .symbols = std.StringHashMap(NumberValue).init(allocator),
.object = std.ArrayList(u8).init(allocator),
};
}
@@ -444,16 +708,16 @@ test "characters are recognized" {
"'𒀤'",
};
- const expected = [_][]const u8{
- "a",
- "1",
- "𒀤",
+ const expected = [_]u21{
+ 'a',
+ '1',
+ '𒀤',
};
for (0..3) |i| {
var parser = Parser.init(std.testing.allocator, test_cases[i]);
const symbol = try parser.identifyChar();
- try std.testing.expect(std.mem.eql(u8, expected[i], symbol));
+ try std.testing.expectEqual(expected[i], symbol);
parser.deinit();
}
}
@@ -520,12 +784,13 @@ test "constants are recognized" {
const expected = [_]ConstantValue{
ConstantValue{ .number = 1234567890 },
ConstantValue{ .number = 0x1234567890abcdef },
- ConstantValue{ .string = "a" },
+ ConstantValue{ .number = 'a' },
ConstantValue{ .string = "hello " },
};
for (0..4) |i| {
var parser = Parser.init(std.testing.allocator, test_cases[i]);
+ defer parser.deinit();
const symbol = try parser.identifyConstant();
switch (symbol) {
.number => try std.testing.expectEqual(expected[i].number, symbol.number),
@@ -533,3 +798,51 @@ test "constants are recognized" {
}
}
}
+
+test "basic primaries are identified" {
+ const test_cases = [_][]const u8{
+ "1234",
+ "@",
+ "'a'",
+ "\"hello world\"",
+ "+1234",
+ "-#1",
+ "~#0",
+ "$123",
+ };
+
+ const expected = [_]ExpressionResult{
+ ExpressionResult{ .number = NumberValue{ .pure = 1234 } },
+ ExpressionResult{ .number = NumberValue{ .pure = 0 } },
+ ExpressionResult{ .number = NumberValue{ .pure = 'a' } },
+ ExpressionResult{ .string = "hello world" },
+ ExpressionResult{ .number = NumberValue{ .pure = 1234 } },
+ ExpressionResult{ .number = NumberValue{ .pure = 0xFFFFFFFFFFFFFFFF } },
+ ExpressionResult{ .number = NumberValue{ .pure = 0xFFFFFFFFFFFFFFFF } },
+ ExpressionResult{ .number = NumberValue{ .register = 123 } },
+ };
+
+ for (0..test_cases.len) |i| {
+ var parser = Parser.init(std.testing.allocator, test_cases[i]);
+ defer parser.deinit();
+ const symbol = try parser.identifyPrimary();
+ switch (symbol) {
+ .number => try std.testing.expectEqual(expected[i].number, symbol.number),
+ .string => try std.testing.expect(std.mem.eql(u8, expected[i].string, symbol.string)),
+ }
+ }
+}
+
+test "invalid primaries are detected" {
+ const test_cases = [_][]const u8{
+ "$256",
+ "$~0",
+ };
+
+ for (test_cases) |case| {
+ var parser = Parser.init(std.testing.allocator, case);
+ defer parser.deinit();
+ const symbol = parser.identifyPrimary();
+ try std.testing.expectEqual(error.NoPrimary, symbol);
+ }
+}