handle primaries

This commit is contained in:
jjanzen 2025-02-27 13:13:20 -06:00
parent 3b5be7ffbb
commit 29f0431465

View file

@ -1,16 +1,26 @@
const std = @import("std");
const opcodes = @import("opcodes.zig");
/// A symbol's value can be pure or point to a register
const SymbolValueType = enum {
/// A number's value can be pure or point to a register
const NumberType = enum {
pure,
register,
};
const SymbolValue = union {
const NumberValue = union(NumberType) {
pure: u64,
register: u8,
};
/// An expression's result can be a NumberValue or a string
const ExpressionResultType = enum {
number,
string,
};
const ExpressionResult = union(ExpressionResultType) {
number: NumberValue,
string: []const u8,
};
/// A constant can be a number of a string
const ConstantType = enum {
number,
@ -27,7 +37,7 @@ pub const Parser = struct {
input: []const u8,
location: u64,
ch_pos: usize,
symbols: std.StringHashMap(SymbolValue),
symbols: std.StringHashMap(NumberValue),
object: std.ArrayList(u8),
/// Test is a character is whitespace
@ -107,7 +117,7 @@ pub const Parser = struct {
/// Determine whether the cursor points at a valid unicode character wrapped in single quotes
/// Move the cursor past the closing quote and return the character
fn identifyChar(self: *Parser) ![]const u8 {
fn identifyChar(self: *Parser) !u21 {
if (self.getByte(self.ch_pos) != '\'') {
return error.NoChar;
}
@ -132,7 +142,9 @@ pub const Parser = struct {
const view = std.unicode.Utf8View.init(self.input[start..end]) catch return error.NoChar;
var iter = view.iterator();
var count: u8 = 0;
while (iter.nextCodepoint()) |_| {
var character: u21 = undefined;
while (iter.nextCodepoint()) |u| {
character = u;
count += 1;
if (count > 1) {
return error.NoChar;
@ -142,7 +154,7 @@ pub const Parser = struct {
return error.NoChar;
}
return self.input[start..end];
return character;
}
/// Determine whether the cursor points at a valid string wrapped in double quotes
@ -187,8 +199,8 @@ pub const Parser = struct {
return ConstantValue{ .number = number };
},
'\'' => {
const string = try identifyChar(self);
return ConstantValue{ .string = string };
const char = try identifyChar(self);
return ConstantValue{ .number = char };
},
'"' => {
const string = try identifyString(self);
@ -225,6 +237,258 @@ pub const Parser = struct {
return error.NoSymbol;
}
/// Get the number associated with a given symbol
fn handleSymbol(self: *Parser, symbol: []const u8) !NumberValue {
// TODO: handle xH, XF, xB
const n = self.symbols.get(symbol);
if (n == null) return error.UndefinedSymbol;
return n.?;
}
const WeakOp = enum {
add,
sub,
bit_or,
bit_xor,
none,
};
/// Determine whether the cursor points at a valid expression
/// Move the cursor past the expression, evaluate it, and return its value
fn identifyExpression(self: *Parser) anyerror!ExpressionResult {
var result: u64 = 0;
var last_op = WeakOp.none;
var done = false;
while (!done) {
const term = try self.identifyTerm();
switch (term) {
.string => {
if (last_op == WeakOp.none) return term;
return error.NoExpression;
},
.number => |nv| {
switch (nv) {
.pure => |n| {
result = switch (last_op) {
WeakOp.add => result +% n,
WeakOp.sub => result -% n,
WeakOp.bit_or => result | n,
WeakOp.bit_xor => result ^ n,
WeakOp.none => n,
};
},
.register => {
return error.NoExpression;
},
}
},
}
last_op = switch (self.getByte(self.ch_pos)) {
'+' => WeakOp.add,
'-' => WeakOp.sub,
'|' => WeakOp.bit_or,
'^' => WeakOp.bit_xor,
else => WeakOp.none,
};
if (last_op == WeakOp.none) {
done = true;
} else {
self.ch_pos += 1;
}
}
return ExpressionResult{ .number = NumberValue{ .pure = result } };
}
const StrongOp = enum {
mult,
div,
frac_div,
rem,
lshift,
rshift,
bit_and,
none,
};
/// Determine whether the cursor points at a valid term
/// Move the cursor past the term, evaluate it, and return its value
fn identifyTerm(self: *Parser) anyerror!ExpressionResult {
var result: u64 = 0;
var last_op = StrongOp.none;
var done = false;
while (!done) {
const primary = try self.identifyPrimary();
switch (primary) {
.string => {
if (last_op == StrongOp.none) return primary;
return error.NoTerm;
},
.number => |nv| {
switch (nv) {
.pure => |n| {
result = switch (last_op) {
StrongOp.mult => result *% n,
StrongOp.div => div: {
if (n == 0) return error.NoTerm;
break :div result / n;
},
StrongOp.frac_div => frac_div: {
if (n == 0 or result >= n) return error.NoTerm;
const shifted: u128 = (@as(u128, result)) << 8;
const divided: u128 = shifted / n;
if (divided >= 1 << 64) return error.NoTerm;
break :frac_div @as(u64, @intCast(divided));
},
StrongOp.rem => rem: {
if (n == 0) return error.NoTerm;
break :rem result % n;
},
StrongOp.lshift => if (n >= 64) 0 else result << @as(u6, @intCast(n)),
StrongOp.rshift => if (n >= 64) 0 else result >> @as(u6, @intCast(n)),
StrongOp.bit_and => result & n,
StrongOp.none => n,
};
},
else => {
if (last_op == StrongOp.none) return primary;
return error.NoTerm;
},
}
},
}
last_op = switch (self.getByte(self.ch_pos)) {
'*' => StrongOp.mult,
'/' => div: {
var op = StrongOp.div;
if (self.getByte(self.ch_pos + 1) == '/') {
op = StrongOp.frac_div;
self.ch_pos += 1;
}
break :div op;
},
'%' => StrongOp.rem,
'<' => lshift: {
var op = StrongOp.lshift;
if (self.getByte(self.ch_pos + 1) == '<') {
self.ch_pos += 1;
} else {
op = StrongOp.none;
}
break :lshift op;
},
'>' => rshift: {
var op = StrongOp.rshift;
if (self.getByte(self.ch_pos + 1) == '>') {
self.ch_pos += 1;
} else {
op = StrongOp.none;
}
break :rshift op;
},
'&' => StrongOp.bit_and,
else => StrongOp.none,
};
if (last_op == StrongOp.none) {
done = true;
} else {
self.ch_pos += 1;
}
}
return ExpressionResult{ .number = NumberValue{ .pure = result } };
}
/// Determine whether the cursor points at a valid primary
/// Move the cursor past the primary, evaluate it, and return its value
fn identifyPrimary(self: *Parser) anyerror!ExpressionResult {
if (isDecimal(self.getByte(self.ch_pos)) and
(self.getByte(self.ch_pos + 1) == 'H' or self.getByte(self.ch_pos + 1) == 'F' or self.getByte(self.ch_pos + 1) == 'B'))
{
const symbol = try self.identifySymbol();
if (symbol.len != 2) return error.UndefinedSymbol;
if (symbol[1] == 'H') return error.UndefinedSymbol;
const symbol_val = try self.handleSymbol(symbol);
return ExpressionResult{ .number = symbol_val };
}
switch (self.getByte(self.ch_pos)) {
'@' => return ExpressionResult{ .number = NumberValue{ .pure = self.location } },
'(' => {
self.ch_pos += 1;
const expr = try self.identifyExpression();
if (self.getByte(self.ch_pos) != ')') return error.NoPrimary;
self.ch_pos += 1;
return expr;
},
'0'...'9', '#', '\'', '"' => {
const constant = try self.identifyConstant();
switch (constant) {
.number => |n| return ExpressionResult{ .number = NumberValue{ .pure = n } },
.string => |s| return ExpressionResult{ .string = s },
}
},
'+' => {
self.ch_pos += 1;
const primary = try self.identifyPrimary();
switch (primary) {
.number => return primary,
else => return error.NoPimary,
}
},
'-' => {
self.ch_pos += 1;
const primary = try self.identifyPrimary();
switch (primary) {
.number => |nv| {
switch (nv) {
.register, .pure => |n| return ExpressionResult{ .number = NumberValue{ .pure = 0 -% n } },
}
},
else => return error.NoPrimary,
}
},
'~' => {
self.ch_pos += 1;
const primary = try self.identifyPrimary();
switch (primary) {
.number => |nv| {
switch (nv) {
.register, .pure => |n| return ExpressionResult{ .number = NumberValue{ .pure = ~n } },
}
},
else => return error.NoPrimary,
}
},
'$' => {
self.ch_pos += 1;
const primary = try self.identifyPrimary();
switch (primary) {
.number => |nv| {
switch (nv) {
.register, .pure => |n| {
if (n >= 256) return error.NoPrimary;
const n8 = @as(u8, @intCast(n));
return ExpressionResult{ .number = NumberValue{ .register = n8 } };
},
}
},
else => return error.NoPrimary,
}
},
else => {
const symbol = try self.identifySymbol();
const symbol_value = try self.handleSymbol(symbol);
return ExpressionResult{ .number = symbol_value };
},
}
}
/// Determine whether the cursor points at a valid opcode or pseudo operation
/// An opcode consists solely of symbol characters (letters and numbers in fact)
/// Move the cursor past the opcode and return it
@ -244,7 +508,7 @@ pub const Parser = struct {
.input = input,
.location = 0,
.ch_pos = 0,
.symbols = std.StringHashMap(SymbolValue).init(allocator),
.symbols = std.StringHashMap(NumberValue).init(allocator),
.object = std.ArrayList(u8).init(allocator),
};
}
@ -444,16 +708,16 @@ test "characters are recognized" {
"'𒀤'",
};
const expected = [_][]const u8{
"a",
"1",
"𒀤",
const expected = [_]u21{
'a',
'1',
'𒀤',
};
for (0..3) |i| {
var parser = Parser.init(std.testing.allocator, test_cases[i]);
const symbol = try parser.identifyChar();
try std.testing.expect(std.mem.eql(u8, expected[i], symbol));
try std.testing.expectEqual(expected[i], symbol);
parser.deinit();
}
}
@ -520,12 +784,13 @@ test "constants are recognized" {
const expected = [_]ConstantValue{
ConstantValue{ .number = 1234567890 },
ConstantValue{ .number = 0x1234567890abcdef },
ConstantValue{ .string = "a" },
ConstantValue{ .number = 'a' },
ConstantValue{ .string = "hello " },
};
for (0..4) |i| {
var parser = Parser.init(std.testing.allocator, test_cases[i]);
defer parser.deinit();
const symbol = try parser.identifyConstant();
switch (symbol) {
.number => try std.testing.expectEqual(expected[i].number, symbol.number),
@ -533,3 +798,51 @@ test "constants are recognized" {
}
}
}
test "basic primaries are identified" {
const test_cases = [_][]const u8{
"1234",
"@",
"'a'",
"\"hello world\"",
"+1234",
"-#1",
"~#0",
"$123",
};
const expected = [_]ExpressionResult{
ExpressionResult{ .number = NumberValue{ .pure = 1234 } },
ExpressionResult{ .number = NumberValue{ .pure = 0 } },
ExpressionResult{ .number = NumberValue{ .pure = 'a' } },
ExpressionResult{ .string = "hello world" },
ExpressionResult{ .number = NumberValue{ .pure = 1234 } },
ExpressionResult{ .number = NumberValue{ .pure = 0xFFFFFFFFFFFFFFFF } },
ExpressionResult{ .number = NumberValue{ .pure = 0xFFFFFFFFFFFFFFFF } },
ExpressionResult{ .number = NumberValue{ .register = 123 } },
};
for (0..test_cases.len) |i| {
var parser = Parser.init(std.testing.allocator, test_cases[i]);
defer parser.deinit();
const symbol = try parser.identifyPrimary();
switch (symbol) {
.number => try std.testing.expectEqual(expected[i].number, symbol.number),
.string => try std.testing.expect(std.mem.eql(u8, expected[i].string, symbol.string)),
}
}
}
test "invalid primaries are detected" {
const test_cases = [_][]const u8{
"$256",
"$~0",
};
for (test_cases) |case| {
var parser = Parser.init(std.testing.allocator, case);
defer parser.deinit();
const symbol = parser.identifyPrimary();
try std.testing.expectEqual(error.NoPrimary, symbol);
}
}