handle primaries
This commit is contained in:
parent
3b5be7ffbb
commit
29f0431465
1 changed files with 329 additions and 16 deletions
345
src/parser.zig
345
src/parser.zig
|
@ -1,16 +1,26 @@
|
|||
const std = @import("std");
|
||||
const opcodes = @import("opcodes.zig");
|
||||
|
||||
/// A symbol's value can be pure or point to a register
|
||||
const SymbolValueType = enum {
|
||||
/// A number's value can be pure or point to a register
|
||||
const NumberType = enum {
|
||||
pure,
|
||||
register,
|
||||
};
|
||||
const SymbolValue = union {
|
||||
const NumberValue = union(NumberType) {
|
||||
pure: u64,
|
||||
register: u8,
|
||||
};
|
||||
|
||||
/// An expression's result can be a NumberValue or a string
|
||||
const ExpressionResultType = enum {
|
||||
number,
|
||||
string,
|
||||
};
|
||||
const ExpressionResult = union(ExpressionResultType) {
|
||||
number: NumberValue,
|
||||
string: []const u8,
|
||||
};
|
||||
|
||||
/// A constant can be a number of a string
|
||||
const ConstantType = enum {
|
||||
number,
|
||||
|
@ -27,7 +37,7 @@ pub const Parser = struct {
|
|||
input: []const u8,
|
||||
location: u64,
|
||||
ch_pos: usize,
|
||||
symbols: std.StringHashMap(SymbolValue),
|
||||
symbols: std.StringHashMap(NumberValue),
|
||||
object: std.ArrayList(u8),
|
||||
|
||||
/// Test is a character is whitespace
|
||||
|
@ -107,7 +117,7 @@ pub const Parser = struct {
|
|||
|
||||
/// Determine whether the cursor points at a valid unicode character wrapped in single quotes
|
||||
/// Move the cursor past the closing quote and return the character
|
||||
fn identifyChar(self: *Parser) ![]const u8 {
|
||||
fn identifyChar(self: *Parser) !u21 {
|
||||
if (self.getByte(self.ch_pos) != '\'') {
|
||||
return error.NoChar;
|
||||
}
|
||||
|
@ -132,7 +142,9 @@ pub const Parser = struct {
|
|||
const view = std.unicode.Utf8View.init(self.input[start..end]) catch return error.NoChar;
|
||||
var iter = view.iterator();
|
||||
var count: u8 = 0;
|
||||
while (iter.nextCodepoint()) |_| {
|
||||
var character: u21 = undefined;
|
||||
while (iter.nextCodepoint()) |u| {
|
||||
character = u;
|
||||
count += 1;
|
||||
if (count > 1) {
|
||||
return error.NoChar;
|
||||
|
@ -142,7 +154,7 @@ pub const Parser = struct {
|
|||
return error.NoChar;
|
||||
}
|
||||
|
||||
return self.input[start..end];
|
||||
return character;
|
||||
}
|
||||
|
||||
/// Determine whether the cursor points at a valid string wrapped in double quotes
|
||||
|
@ -187,8 +199,8 @@ pub const Parser = struct {
|
|||
return ConstantValue{ .number = number };
|
||||
},
|
||||
'\'' => {
|
||||
const string = try identifyChar(self);
|
||||
return ConstantValue{ .string = string };
|
||||
const char = try identifyChar(self);
|
||||
return ConstantValue{ .number = char };
|
||||
},
|
||||
'"' => {
|
||||
const string = try identifyString(self);
|
||||
|
@ -225,6 +237,258 @@ pub const Parser = struct {
|
|||
return error.NoSymbol;
|
||||
}
|
||||
|
||||
/// Get the number associated with a given symbol
|
||||
fn handleSymbol(self: *Parser, symbol: []const u8) !NumberValue {
|
||||
// TODO: handle xH, XF, xB
|
||||
const n = self.symbols.get(symbol);
|
||||
if (n == null) return error.UndefinedSymbol;
|
||||
return n.?;
|
||||
}
|
||||
|
||||
const WeakOp = enum {
|
||||
add,
|
||||
sub,
|
||||
bit_or,
|
||||
bit_xor,
|
||||
none,
|
||||
};
|
||||
|
||||
/// Determine whether the cursor points at a valid expression
|
||||
/// Move the cursor past the expression, evaluate it, and return its value
|
||||
fn identifyExpression(self: *Parser) anyerror!ExpressionResult {
|
||||
var result: u64 = 0;
|
||||
var last_op = WeakOp.none;
|
||||
var done = false;
|
||||
while (!done) {
|
||||
const term = try self.identifyTerm();
|
||||
|
||||
switch (term) {
|
||||
.string => {
|
||||
if (last_op == WeakOp.none) return term;
|
||||
return error.NoExpression;
|
||||
},
|
||||
.number => |nv| {
|
||||
switch (nv) {
|
||||
.pure => |n| {
|
||||
result = switch (last_op) {
|
||||
WeakOp.add => result +% n,
|
||||
WeakOp.sub => result -% n,
|
||||
WeakOp.bit_or => result | n,
|
||||
WeakOp.bit_xor => result ^ n,
|
||||
WeakOp.none => n,
|
||||
};
|
||||
},
|
||||
.register => {
|
||||
return error.NoExpression;
|
||||
},
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
last_op = switch (self.getByte(self.ch_pos)) {
|
||||
'+' => WeakOp.add,
|
||||
'-' => WeakOp.sub,
|
||||
'|' => WeakOp.bit_or,
|
||||
'^' => WeakOp.bit_xor,
|
||||
else => WeakOp.none,
|
||||
};
|
||||
|
||||
if (last_op == WeakOp.none) {
|
||||
done = true;
|
||||
} else {
|
||||
self.ch_pos += 1;
|
||||
}
|
||||
}
|
||||
return ExpressionResult{ .number = NumberValue{ .pure = result } };
|
||||
}
|
||||
|
||||
const StrongOp = enum {
|
||||
mult,
|
||||
div,
|
||||
frac_div,
|
||||
rem,
|
||||
lshift,
|
||||
rshift,
|
||||
bit_and,
|
||||
none,
|
||||
};
|
||||
|
||||
/// Determine whether the cursor points at a valid term
|
||||
/// Move the cursor past the term, evaluate it, and return its value
|
||||
fn identifyTerm(self: *Parser) anyerror!ExpressionResult {
|
||||
var result: u64 = 0;
|
||||
var last_op = StrongOp.none;
|
||||
var done = false;
|
||||
while (!done) {
|
||||
const primary = try self.identifyPrimary();
|
||||
|
||||
switch (primary) {
|
||||
.string => {
|
||||
if (last_op == StrongOp.none) return primary;
|
||||
return error.NoTerm;
|
||||
},
|
||||
.number => |nv| {
|
||||
switch (nv) {
|
||||
.pure => |n| {
|
||||
result = switch (last_op) {
|
||||
StrongOp.mult => result *% n,
|
||||
StrongOp.div => div: {
|
||||
if (n == 0) return error.NoTerm;
|
||||
break :div result / n;
|
||||
},
|
||||
StrongOp.frac_div => frac_div: {
|
||||
if (n == 0 or result >= n) return error.NoTerm;
|
||||
const shifted: u128 = (@as(u128, result)) << 8;
|
||||
const divided: u128 = shifted / n;
|
||||
if (divided >= 1 << 64) return error.NoTerm;
|
||||
break :frac_div @as(u64, @intCast(divided));
|
||||
},
|
||||
StrongOp.rem => rem: {
|
||||
if (n == 0) return error.NoTerm;
|
||||
break :rem result % n;
|
||||
},
|
||||
StrongOp.lshift => if (n >= 64) 0 else result << @as(u6, @intCast(n)),
|
||||
StrongOp.rshift => if (n >= 64) 0 else result >> @as(u6, @intCast(n)),
|
||||
StrongOp.bit_and => result & n,
|
||||
StrongOp.none => n,
|
||||
};
|
||||
},
|
||||
else => {
|
||||
if (last_op == StrongOp.none) return primary;
|
||||
return error.NoTerm;
|
||||
},
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
last_op = switch (self.getByte(self.ch_pos)) {
|
||||
'*' => StrongOp.mult,
|
||||
'/' => div: {
|
||||
var op = StrongOp.div;
|
||||
if (self.getByte(self.ch_pos + 1) == '/') {
|
||||
op = StrongOp.frac_div;
|
||||
self.ch_pos += 1;
|
||||
}
|
||||
break :div op;
|
||||
},
|
||||
'%' => StrongOp.rem,
|
||||
'<' => lshift: {
|
||||
var op = StrongOp.lshift;
|
||||
if (self.getByte(self.ch_pos + 1) == '<') {
|
||||
self.ch_pos += 1;
|
||||
} else {
|
||||
op = StrongOp.none;
|
||||
}
|
||||
break :lshift op;
|
||||
},
|
||||
'>' => rshift: {
|
||||
var op = StrongOp.rshift;
|
||||
if (self.getByte(self.ch_pos + 1) == '>') {
|
||||
self.ch_pos += 1;
|
||||
} else {
|
||||
op = StrongOp.none;
|
||||
}
|
||||
break :rshift op;
|
||||
},
|
||||
'&' => StrongOp.bit_and,
|
||||
else => StrongOp.none,
|
||||
};
|
||||
|
||||
if (last_op == StrongOp.none) {
|
||||
done = true;
|
||||
} else {
|
||||
self.ch_pos += 1;
|
||||
}
|
||||
}
|
||||
|
||||
return ExpressionResult{ .number = NumberValue{ .pure = result } };
|
||||
}
|
||||
|
||||
/// Determine whether the cursor points at a valid primary
|
||||
/// Move the cursor past the primary, evaluate it, and return its value
|
||||
fn identifyPrimary(self: *Parser) anyerror!ExpressionResult {
|
||||
if (isDecimal(self.getByte(self.ch_pos)) and
|
||||
(self.getByte(self.ch_pos + 1) == 'H' or self.getByte(self.ch_pos + 1) == 'F' or self.getByte(self.ch_pos + 1) == 'B'))
|
||||
{
|
||||
const symbol = try self.identifySymbol();
|
||||
if (symbol.len != 2) return error.UndefinedSymbol;
|
||||
if (symbol[1] == 'H') return error.UndefinedSymbol;
|
||||
const symbol_val = try self.handleSymbol(symbol);
|
||||
return ExpressionResult{ .number = symbol_val };
|
||||
}
|
||||
|
||||
switch (self.getByte(self.ch_pos)) {
|
||||
'@' => return ExpressionResult{ .number = NumberValue{ .pure = self.location } },
|
||||
'(' => {
|
||||
self.ch_pos += 1;
|
||||
const expr = try self.identifyExpression();
|
||||
if (self.getByte(self.ch_pos) != ')') return error.NoPrimary;
|
||||
self.ch_pos += 1;
|
||||
return expr;
|
||||
},
|
||||
'0'...'9', '#', '\'', '"' => {
|
||||
const constant = try self.identifyConstant();
|
||||
switch (constant) {
|
||||
.number => |n| return ExpressionResult{ .number = NumberValue{ .pure = n } },
|
||||
.string => |s| return ExpressionResult{ .string = s },
|
||||
}
|
||||
},
|
||||
'+' => {
|
||||
self.ch_pos += 1;
|
||||
const primary = try self.identifyPrimary();
|
||||
switch (primary) {
|
||||
.number => return primary,
|
||||
else => return error.NoPimary,
|
||||
}
|
||||
},
|
||||
'-' => {
|
||||
self.ch_pos += 1;
|
||||
const primary = try self.identifyPrimary();
|
||||
switch (primary) {
|
||||
.number => |nv| {
|
||||
switch (nv) {
|
||||
.register, .pure => |n| return ExpressionResult{ .number = NumberValue{ .pure = 0 -% n } },
|
||||
}
|
||||
},
|
||||
else => return error.NoPrimary,
|
||||
}
|
||||
},
|
||||
'~' => {
|
||||
self.ch_pos += 1;
|
||||
const primary = try self.identifyPrimary();
|
||||
switch (primary) {
|
||||
.number => |nv| {
|
||||
switch (nv) {
|
||||
.register, .pure => |n| return ExpressionResult{ .number = NumberValue{ .pure = ~n } },
|
||||
}
|
||||
},
|
||||
else => return error.NoPrimary,
|
||||
}
|
||||
},
|
||||
'$' => {
|
||||
self.ch_pos += 1;
|
||||
const primary = try self.identifyPrimary();
|
||||
switch (primary) {
|
||||
.number => |nv| {
|
||||
switch (nv) {
|
||||
.register, .pure => |n| {
|
||||
if (n >= 256) return error.NoPrimary;
|
||||
const n8 = @as(u8, @intCast(n));
|
||||
return ExpressionResult{ .number = NumberValue{ .register = n8 } };
|
||||
},
|
||||
}
|
||||
},
|
||||
else => return error.NoPrimary,
|
||||
}
|
||||
},
|
||||
else => {
|
||||
const symbol = try self.identifySymbol();
|
||||
const symbol_value = try self.handleSymbol(symbol);
|
||||
return ExpressionResult{ .number = symbol_value };
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine whether the cursor points at a valid opcode or pseudo operation
|
||||
/// An opcode consists solely of symbol characters (letters and numbers in fact)
|
||||
/// Move the cursor past the opcode and return it
|
||||
|
@ -244,7 +508,7 @@ pub const Parser = struct {
|
|||
.input = input,
|
||||
.location = 0,
|
||||
.ch_pos = 0,
|
||||
.symbols = std.StringHashMap(SymbolValue).init(allocator),
|
||||
.symbols = std.StringHashMap(NumberValue).init(allocator),
|
||||
.object = std.ArrayList(u8).init(allocator),
|
||||
};
|
||||
}
|
||||
|
@ -444,16 +708,16 @@ test "characters are recognized" {
|
|||
"'𒀤'",
|
||||
};
|
||||
|
||||
const expected = [_][]const u8{
|
||||
"a",
|
||||
"1",
|
||||
"𒀤",
|
||||
const expected = [_]u21{
|
||||
'a',
|
||||
'1',
|
||||
'𒀤',
|
||||
};
|
||||
|
||||
for (0..3) |i| {
|
||||
var parser = Parser.init(std.testing.allocator, test_cases[i]);
|
||||
const symbol = try parser.identifyChar();
|
||||
try std.testing.expect(std.mem.eql(u8, expected[i], symbol));
|
||||
try std.testing.expectEqual(expected[i], symbol);
|
||||
parser.deinit();
|
||||
}
|
||||
}
|
||||
|
@ -520,12 +784,13 @@ test "constants are recognized" {
|
|||
const expected = [_]ConstantValue{
|
||||
ConstantValue{ .number = 1234567890 },
|
||||
ConstantValue{ .number = 0x1234567890abcdef },
|
||||
ConstantValue{ .string = "a" },
|
||||
ConstantValue{ .number = 'a' },
|
||||
ConstantValue{ .string = "hello " },
|
||||
};
|
||||
|
||||
for (0..4) |i| {
|
||||
var parser = Parser.init(std.testing.allocator, test_cases[i]);
|
||||
defer parser.deinit();
|
||||
const symbol = try parser.identifyConstant();
|
||||
switch (symbol) {
|
||||
.number => try std.testing.expectEqual(expected[i].number, symbol.number),
|
||||
|
@ -533,3 +798,51 @@ test "constants are recognized" {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
test "basic primaries are identified" {
|
||||
const test_cases = [_][]const u8{
|
||||
"1234",
|
||||
"@",
|
||||
"'a'",
|
||||
"\"hello world\"",
|
||||
"+1234",
|
||||
"-#1",
|
||||
"~#0",
|
||||
"$123",
|
||||
};
|
||||
|
||||
const expected = [_]ExpressionResult{
|
||||
ExpressionResult{ .number = NumberValue{ .pure = 1234 } },
|
||||
ExpressionResult{ .number = NumberValue{ .pure = 0 } },
|
||||
ExpressionResult{ .number = NumberValue{ .pure = 'a' } },
|
||||
ExpressionResult{ .string = "hello world" },
|
||||
ExpressionResult{ .number = NumberValue{ .pure = 1234 } },
|
||||
ExpressionResult{ .number = NumberValue{ .pure = 0xFFFFFFFFFFFFFFFF } },
|
||||
ExpressionResult{ .number = NumberValue{ .pure = 0xFFFFFFFFFFFFFFFF } },
|
||||
ExpressionResult{ .number = NumberValue{ .register = 123 } },
|
||||
};
|
||||
|
||||
for (0..test_cases.len) |i| {
|
||||
var parser = Parser.init(std.testing.allocator, test_cases[i]);
|
||||
defer parser.deinit();
|
||||
const symbol = try parser.identifyPrimary();
|
||||
switch (symbol) {
|
||||
.number => try std.testing.expectEqual(expected[i].number, symbol.number),
|
||||
.string => try std.testing.expect(std.mem.eql(u8, expected[i].string, symbol.string)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test "invalid primaries are detected" {
|
||||
const test_cases = [_][]const u8{
|
||||
"$256",
|
||||
"$~0",
|
||||
};
|
||||
|
||||
for (test_cases) |case| {
|
||||
var parser = Parser.init(std.testing.allocator, case);
|
||||
defer parser.deinit();
|
||||
const symbol = parser.identifyPrimary();
|
||||
try std.testing.expectEqual(error.NoPrimary, symbol);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue