mmix-as/src/parser.zig
2025-02-27 17:40:58 -06:00

1199 lines
39 KiB
Zig

const std = @import("std");
const opcodes = @import("opcodes.zig");
/// A number's value can be pure or point to a register
const NumberType = enum {
pure,
register,
};
const NumberValue = union(NumberType) {
pure: u64,
register: u8,
};
/// An expression's result can be a NumberValue or a string
const ExpressionResultType = enum {
number,
string,
};
const ExpressionResult = union(ExpressionResultType) {
number: NumberValue,
string: []const u8,
};
/// A constant can be a number of a string
const ConstantType = enum {
number,
string,
};
const ConstantValue = union(ConstantType) {
number: u64,
string: []const u8,
};
/// The Parser reads a provided input and assembles it into MMIX object code
pub const Parser = struct {
allocator: std.mem.Allocator,
input: []const u8,
location: u64,
ch_pos: usize,
symbols: std.StringHashMap(NumberValue),
object: std.ArrayList(u8),
/// Test is a character is whitespace
/// Note that newlines are special and not included in this implementation.
fn isWhitespace(ch: u8) bool {
return ch == ' ' or ch == '\t' or ch == '\r';
}
/// Test if a character is a letter
/// Note that underscores are letters for the purposes of symbol recognition.
fn isLetter(ch: u8) bool {
return ch == '_' or (ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z');
}
/// Test if a character is a decimal digit
fn isDecimal(ch: u8) bool {
return ch >= '0' and ch <= '9';
}
/// Test if a character is a hexadecimal digit
fn isHexadecimal(ch: u8) bool {
return isDecimal(ch) or (ch >= 'a' and ch <= 'f') or (ch >= 'A' and ch <= 'F');
}
/// Test if a character is a symbol character
/// Note that all valid unicode characters larger than 126 are also valid symbol characters.
fn isSymbolChar(ch: u8) bool {
return isLetter(ch) or isDecimal(ch) or ch > 126;
}
/// Get a byte from the input at a specified location
/// Return 0 if the requested byte is out of range
fn getByte(self: *Parser, pos: usize) u8 {
if (pos < self.input.len) {
return self.input[pos];
}
return 0;
}
/// Move the cursor forward until it does not point at whitespace
fn skipWhitespace(self: *Parser) void {
while (isWhitespace(self.getByte(self.ch_pos))) {
self.ch_pose += 1;
}
}
const DecimalError = std.fmt.ParseIntError;
/// Determine whether the cursor points at a valid integer in base 10
/// Move the cursor past the integer and return it
fn identifyDecimal(self: *Parser) DecimalError!u64 {
const start = self.ch_pos;
while (isDecimal(self.getByte(self.ch_pos))) {
self.ch_pos += 1;
}
const end = self.ch_pos;
return std.fmt.parseInt(u64, self.input[start..end], 10);
}
const HexadecimalError = std.fmt.ParseIntError || error{
WrongStartingCharacter,
};
/// Determine whether the cursor points at a valid integer in base 16
/// Base 16 is identified by a number starting with #
/// Move the cursor past the integer and return it
fn identifyHexadecimal(self: *Parser) HexadecimalError!u64 {
if (self.getByte(self.ch_pos) != '#') {
return HexadecimalError.WrongStartingCharacter;
}
self.ch_pos += 1;
const start = self.ch_pos;
while (isHexadecimal(self.getByte(self.ch_pos))) {
self.ch_pos += 1;
}
const end = self.ch_pos;
return std.fmt.parseInt(u64, self.input[start..end], 16);
}
const CharError = error{
NoStartingDelimiter,
NoEndingDelimiter,
NoChar,
InvalidUtf8,
};
/// Determine whether the cursor points at a valid unicode character wrapped in single quotes
/// Move the cursor past the closing quote and return the character
fn identifyChar(self: *Parser) CharError!u21 {
if (self.getByte(self.ch_pos) != '\'') {
return CharError.NoStartingDelimiter;
}
self.ch_pos += 1;
const start = self.ch_pos;
while (self.getByte(self.ch_pos) != 0 and self.getByte(self.ch_pos) != '\'') {
self.ch_pos += 1;
if (self.getByte(self.ch_pos) == ';' or self.getByte(self.ch_pos) == '\n') {
return CharError.NoEndingDelimiter;
}
if (self.ch_pos - start > 4) {
return CharError.NoEndingDelimiter;
}
}
const end = self.ch_pos;
self.ch_pos += 1;
if (end <= start) {
return CharError.NoChar;
}
const view = std.unicode.Utf8View.init(self.input[start..end]) catch return CharError.InvalidUtf8;
var iter = view.iterator();
var count: u8 = 0;
var character: u21 = 0;
while (iter.nextCodepoint()) |u| {
character = u;
count += 1;
if (count > 1) {
return CharError.NoEndingDelimiter;
}
}
if (count != 1) {
return CharError.NoEndingDelimiter;
}
return character;
}
const StringError = error{
NoStartingDelimiter,
NoEndingDelimiter,
NoString,
};
/// Determine whether the cursor points at a valid string wrapped in double quotes
/// Note that a string has at least one character in it and that it cannot have " or newlines in it
/// Move the cursor past the string and return the string
fn identifyString(self: *Parser) StringError![]const u8 {
if (self.getByte(self.ch_pos) != '"') {
return StringError.NoStartingDelimiter;
}
self.ch_pos += 1;
const start = self.ch_pos;
while (self.getByte(self.ch_pos) != 0 and self.getByte(self.ch_pos) != '"') {
if (self.getByte(self.ch_pos) == '\n') {
return StringError.NoEndingDelimiter;
}
self.ch_pos += 1;
}
const end = self.ch_pos;
if (self.getByte(self.ch_pos) == '"') {
self.ch_pos += 1;
} else {
return StringError.NoEndingDelimiter;
}
if (end <= start) {
return StringError.NoString;
}
return self.input[start..end];
}
const ConstantError = DecimalError || HexadecimalError || CharError || StringError || error{
UnexpectedSymbol,
};
/// Determine whether the cursor points at a valid constant
/// The constant may be a string or a number
/// Move the cursor past the constant and return it
fn identifyConstant(self: *Parser) !ConstantValue {
switch (self.getByte(self.ch_pos)) {
'0'...'9' => {
const number = try identifyDecimal(self);
return ConstantValue{ .number = number };
},
'#' => {
const number = try identifyHexadecimal(self);
return ConstantValue{ .number = number };
},
'\'' => {
const char = try identifyChar(self);
return ConstantValue{ .number = char };
},
'"' => {
const string = try identifyString(self);
return ConstantValue{ .string = string };
},
else => return ConstantError.UnexpectedSymbol,
}
}
const SymbolError = error{
UnexpectedSymbol,
ForwardReference0,
ForwardReference1,
ForwardReference2,
ForwardReference3,
ForwardReference4,
ForwardReference5,
ForwardReference6,
ForwardReference7,
ForwardReference8,
ForwardReference9,
};
/// Determine whether the cursor points at a symbol
/// A symbol starts with a letter and only has symbol characters after that point
/// There is an exception that there are 30 special symbols of the form xH, xF, and xB where x is a single decimal digit
/// Move the cursor past the symbol and return its name
fn identifySymbol(self: *Parser) SymbolError![]const u8 {
const start = self.ch_pos;
if ((isLetter(self.getByte(self.ch_pos)) or self.getByte(self.ch_pos) == '_')) {
self.ch_pos += 1;
while (isSymbolChar(self.getByte(self.ch_pos))) {
self.ch_pos += 1;
}
} else if (isDecimal(self.getByte(self.ch_pos)) and
(self.getByte(self.ch_pos + 1) == 'H' or self.getByte(self.ch_pos + 1) == 'F' or self.getByte(self.ch_pos + 1) == 'B'))
{
self.ch_pos += 2;
return self.input[self.ch_pos - 2 .. self.ch_pos];
}
const end = self.ch_pos;
if (end > start) {
return self.input[start..end];
}
return SymbolError.UnexpectedSymbol;
}
/// Get the number associated with a given symbol
fn readSymbol(self: *Parser, symbol: []const u8) SymbolError!NumberValue {
if (symbol.len == 2 and symbol[0] >= '0' and symbol[0] <= '9' and symbol[1] != 'B') {
if (symbol[1] != 'F') return SymbolError.UnexpectedSymbol;
return switch (symbol[0]) {
'0' => SymbolError.ForwardReference0,
'1' => SymbolError.ForwardReference1,
'2' => SymbolError.ForwardReference2,
'3' => SymbolError.ForwardReference3,
'4' => SymbolError.ForwardReference4,
'5' => SymbolError.ForwardReference5,
'6' => SymbolError.ForwardReference6,
'7' => SymbolError.ForwardReference7,
'8' => SymbolError.ForwardReference8,
'9' => SymbolError.ForwardReference9,
else => unreachable,
};
}
const n = self.symbols.get(symbol);
if (n == null) return SymbolError.UnexpectedSymbol;
return n.?;
}
const WeakOp = enum {
add,
sub,
bit_or,
bit_xor,
none,
};
const ExpressionError = SymbolError || ConstantError || error{
NoEndingDelimiter,
UnaryOperationOnString,
BinaryOperationOnString,
IllegalOperationOnRegister,
DivisionByZero,
FractionalDivisionOversizedNumerator,
};
/// Determine whether the cursor points at a valid expression
/// Move the cursor past the expression, evaluate it, and return its value
fn identifyExpression(self: *Parser) ExpressionError!ExpressionResult {
var result: u64 = 0;
var last_op = WeakOp.none;
var done = false;
var started = false;
var register_result: ?u8 = null;
var string_result: ?[]const u8 = null;
while (!done) {
if (string_result != null) return ExpressionError.BinaryOperationOnString;
if (register_result != null) return ExpressionError.IllegalOperationOnRegister;
const term = try self.identifyTerm();
switch (term) {
.string => {
if (!started) {
string_result = term.string;
} else {
return ExpressionError.BinaryOperationOnString;
}
},
.number => |nv| {
switch (nv) {
.pure => |n| {
result = switch (last_op) {
WeakOp.add => result +% n,
WeakOp.sub => result -% n,
WeakOp.bit_or => result | n,
WeakOp.bit_xor => result ^ n,
WeakOp.none => n,
};
},
.register => {
if (!started) {
register_result = nv.register;
} else {
return ExpressionError.IllegalOperationOnRegister;
}
},
}
},
}
last_op = switch (self.getByte(self.ch_pos)) {
'+' => WeakOp.add,
'-' => WeakOp.sub,
'|' => WeakOp.bit_or,
'^' => WeakOp.bit_xor,
else => WeakOp.none,
};
if (last_op == WeakOp.none) {
done = true;
} else {
self.ch_pos += 1;
}
started = true;
}
if (string_result != null) {
return ExpressionResult{ .string = string_result.? };
}
if (register_result != null) {
return ExpressionResult{ .number = NumberValue{ .register = register_result.? } };
}
return ExpressionResult{ .number = NumberValue{ .pure = result } };
}
const StrongOp = enum {
mult,
div,
frac_div,
rem,
lshift,
rshift,
bit_and,
none,
};
/// Determine whether the cursor points at a valid term
/// Move the cursor past the term, evaluate it, and return its value
fn identifyTerm(self: *Parser) ExpressionError!ExpressionResult {
var result: u64 = 0;
var last_op = StrongOp.none;
var register_result: ?u8 = null;
var string_result: ?[]const u8 = null;
var started = false;
var done = false;
while (!done) {
if (string_result != null) return ExpressionError.BinaryOperationOnString;
if (register_result != null) return ExpressionError.IllegalOperationOnRegister;
const primary = try self.identifyPrimary();
switch (primary) {
.string => {
if (!started) {
string_result = primary.string;
} else {
return ExpressionError.BinaryOperationOnString;
}
},
.number => |nv| {
switch (nv) {
.pure => |n| {
result = switch (last_op) {
StrongOp.mult => result *% n,
StrongOp.div => div: {
if (n == 0) return ExpressionError.DivisionByZero;
break :div result / n;
},
StrongOp.frac_div => frac_div: {
if (result >= n) return ExpressionError.FractionalDivisionOversizedNumerator;
if (n == 0) return ExpressionError.DivisionByZero;
const shifted: u128 = (@as(u128, result)) << 64;
const divided: u128 = (shifted / n) % (1 << 64);
break :frac_div @as(u64, @intCast(divided));
},
StrongOp.rem => rem: {
if (n == 0) return ExpressionError.DivisionByZero;
break :rem result % n;
},
StrongOp.lshift => if (n >= 64) 0 else result << @as(u6, @intCast(n)),
StrongOp.rshift => if (n >= 64) 0 else result >> @as(u6, @intCast(n)),
StrongOp.bit_and => result & n,
StrongOp.none => n,
};
},
.register => {
if (!started) {
register_result = nv.register;
} else {
return ExpressionError.IllegalOperationOnRegister;
}
},
}
},
}
last_op = switch (self.getByte(self.ch_pos)) {
'*' => StrongOp.mult,
'/' => div: {
var op = StrongOp.div;
if (self.getByte(self.ch_pos + 1) == '/') {
op = StrongOp.frac_div;
self.ch_pos += 1;
}
break :div op;
},
'%' => StrongOp.rem,
'<' => lshift: {
var op = StrongOp.lshift;
if (self.getByte(self.ch_pos + 1) == '<') {
self.ch_pos += 1;
} else {
op = StrongOp.none;
}
break :lshift op;
},
'>' => rshift: {
var op = StrongOp.rshift;
if (self.getByte(self.ch_pos + 1) == '>') {
self.ch_pos += 1;
} else {
op = StrongOp.none;
}
break :rshift op;
},
'&' => StrongOp.bit_and,
else => StrongOp.none,
};
if (last_op == StrongOp.none) {
done = true;
} else {
self.ch_pos += 1;
}
started = true;
}
if (register_result != null) {
result = result % (1 << 8);
return ExpressionResult{ .number = NumberValue{ .register = register_result.? } };
}
if (string_result != null) {
return ExpressionResult{ .string = string_result.? };
}
return ExpressionResult{ .number = NumberValue{ .pure = result } };
}
/// Determine whether the cursor points at a valid primary
/// Move the cursor past the primary, evaluate it, and return its value
fn identifyPrimary(self: *Parser) ExpressionError!ExpressionResult {
if (isDecimal(self.getByte(self.ch_pos)) and
(self.getByte(self.ch_pos + 1) == 'H' or self.getByte(self.ch_pos + 1) == 'F' or self.getByte(self.ch_pos + 1) == 'B'))
{
const symbol = try self.identifySymbol();
if (symbol.len != 2) return SymbolError.UnexpectedSymbol;
if (symbol[1] == 'H') return SymbolError.UnexpectedSymbol;
const symbol_val = try self.readSymbol(symbol);
return ExpressionResult{ .number = symbol_val };
}
switch (self.getByte(self.ch_pos)) {
'@' => return ExpressionResult{ .number = NumberValue{ .pure = self.location } },
'(' => {
self.ch_pos += 1;
const expr = try self.identifyExpression();
if (self.getByte(self.ch_pos) != ')') return error.NoEndingDelimiter;
self.ch_pos += 1;
return expr;
},
'0'...'9', '#', '\'', '"' => {
const constant = try self.identifyConstant();
switch (constant) {
.number => |n| return ExpressionResult{ .number = NumberValue{ .pure = n } },
.string => |s| return ExpressionResult{ .string = s },
}
},
'+' => {
self.ch_pos += 1;
const primary = try self.identifyPrimary();
switch (primary) {
.number => return primary,
else => return ExpressionError.UnaryOperationOnString,
}
},
'-' => {
self.ch_pos += 1;
const primary = try self.identifyPrimary();
switch (primary) {
.number => |nv| {
switch (nv) {
.register, .pure => |n| return ExpressionResult{ .number = NumberValue{ .pure = 0 -% n } },
}
},
else => return ExpressionError.UnaryOperationOnString,
}
},
'~' => {
self.ch_pos += 1;
const primary = try self.identifyPrimary();
switch (primary) {
.number => |nv| {
switch (nv) {
.register, .pure => |n| return ExpressionResult{ .number = NumberValue{ .pure = ~n } },
}
},
else => return ExpressionError.UnaryOperationOnString,
}
},
'$' => {
self.ch_pos += 1;
const primary = try self.identifyPrimary();
switch (primary) {
.number => |nv| {
switch (nv) {
.register, .pure => |n| {
if (n >= 256) return ExpressionError.Overflow;
const n8 = @as(u8, @intCast(n));
return ExpressionResult{ .number = NumberValue{ .register = n8 } };
},
}
},
else => return ExpressionError.UnaryOperationOnString,
}
},
else => {
const symbol = try self.identifySymbol();
const symbol_value = try self.readSymbol(symbol);
return ExpressionResult{ .number = symbol_value };
},
}
}
/// Determine whether the cursor points at a valid opcode or pseudo operation
/// An opcode consists solely of symbol characters (letters and numbers in fact)
/// Move the cursor past the opcode and return it
fn identifyOperation(self: *Parser) !opcodes.Operation {
const start = self.ch_pos;
while (isSymbolChar(self.getByte(self.ch_pos))) {
self.ch_pos += 1;
}
const end = self.ch_pos;
return opcodes.parseOp(self.allocator, self.input[start..end]);
}
pub fn init(allocator: std.mem.Allocator, input: []const u8) !Parser {
var p = Parser{
.allocator = allocator,
.input = input,
.location = 0,
.ch_pos = 0,
.symbols = std.StringHashMap(NumberValue).init(allocator),
.object = std.ArrayList(u8).init(allocator),
};
try p.symbols.put("0B", NumberValue{ .pure = 0 });
try p.symbols.put("1B", NumberValue{ .pure = 0 });
try p.symbols.put("2B", NumberValue{ .pure = 0 });
try p.symbols.put("3B", NumberValue{ .pure = 0 });
try p.symbols.put("4B", NumberValue{ .pure = 0 });
try p.symbols.put("5B", NumberValue{ .pure = 0 });
try p.symbols.put("6B", NumberValue{ .pure = 0 });
try p.symbols.put("7B", NumberValue{ .pure = 0 });
try p.symbols.put("8B", NumberValue{ .pure = 0 });
try p.symbols.put("9B", NumberValue{ .pure = 0 });
return p;
}
pub fn deinit(self: *Parser) void {
self.symbols.deinit();
self.object.deinit();
}
};
test "normal ascii characters are recognized as symbol chars" {
const chars = "qwertyuiopasdfghjklzxcvbnm1234567890QWERTYUIOPASDFGHJKLZXCVBNM_";
for (chars) |c| {
try std.testing.expect(Parser.isSymbolChar(c));
}
}
test "large unicode characters are recognized as symbol chars" {
const cuneiform = "𒀀𒀁𒀂𒀃𒀄𒀅𒀆𒀇𒀈𒀉𒀊𒀋𒀌𒀍𒀎𒀏𒀐𒀑𒀒𒀓𒀔𒀕𒀖𒀗𒀘𒀙𒀚𒀛𒀜𒀝𒀞𒀟𒀠𒀡𒀢𒀣𒀤𒀥𒀦𒀧𒀨𒀩𒀪𒀫𒀬𒀭𒀮𒀯𒀰𒀱𒈷𒌄";
for (cuneiform) |c| {
try std.testing.expect(Parser.isSymbolChar(c));
}
}
test "non-symbol characters are detected" {
const chars = "\n\r \t!@#$%^&*()-=+[]{}\\|;:'\"/?,.<>`~";
for (chars) |c| {
try std.testing.expect(!Parser.isSymbolChar(c));
}
}
test "symbols are identified" {
const test_cases = [_][]const u8{
"_asdf$%@",
"ASFLKJ3332__q5 ;asdf;lk",
"asdf𒀤𒀥𒀦\nalsfkd",
"2H",
"5F",
"0B",
};
const expected = [_][]const u8{
"_asdf",
"ASFLKJ3332__q5",
"asdf𒀤𒀥𒀦",
"2H",
"5F",
"0B",
};
for (0..6) |i| {
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
const symbol = try parser.identifySymbol();
try std.testing.expect(std.mem.eql(u8, expected[i], symbol));
parser.deinit();
}
}
test "no symbols are found successfully" {
const test_cases = [_][]const u8{
" _asdf",
";ASFLKJ3332__q5",
"\nasdf𒀤𒀥𒀦",
};
for (test_cases) |case| {
var parser = try Parser.init(std.testing.allocator, case);
const symbol = parser.identifySymbol();
try std.testing.expectEqual(error.UnexpectedSymbol, symbol);
parser.deinit();
}
}
test "opcodes are identified" {
const test_cases = [_][]const u8{
"2ADDU%aldfk",
"GO ",
"ADD\taksfdjas",
"GREG\n",
"IS",
};
const expected = [_]opcodes.Operation{
opcodes.Operation{ .opcode = opcodes.Opcode._2ADDU },
opcodes.Operation{ .opcode = opcodes.Opcode.GO },
opcodes.Operation{ .opcode = opcodes.Opcode.ADD },
opcodes.Operation{ .pseudo_op = opcodes.PseudoOp.GREG },
opcodes.Operation{ .pseudo_op = opcodes.PseudoOp.IS },
};
for (0..5) |i| {
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
const op = try parser.identifyOperation();
try std.testing.expectEqual(expected[i], op);
parser.deinit();
}
}
test "no opcodes are found successfully" {
const test_cases = [_][]const u8{
" _asdf",
";ASFLKJ3332__q5",
"\nasdf𒀤𒀥𒀦",
"asdfklajsdfl",
};
for (test_cases) |case| {
var parser = try Parser.init(std.testing.allocator, case);
const symbol = parser.identifyOperation();
try std.testing.expectEqual(error.NoOpcode, symbol);
parser.deinit();
}
}
test "decimals are recognized" {
const test_cases = [_][]const u8{
"012314aslkfdj",
"1234567890 43",
"1234567891234567889\n123124",
};
const expected = [_]u64{
12314,
1234567890,
1234567891234567889,
};
for (0..3) |i| {
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
const symbol = try parser.identifyDecimal();
try std.testing.expectEqual(expected[i], symbol);
parser.deinit();
}
}
test "malformed decimals are not recognized" {
const test_cases = [_][]const u8{
"",
"asdf123",
" 123",
"12345678901234567890123456789012345678901234567890",
};
const expected = [_]Parser.DecimalError{
Parser.DecimalError.InvalidCharacter,
Parser.DecimalError.InvalidCharacter,
Parser.DecimalError.InvalidCharacter,
Parser.DecimalError.Overflow,
};
for (0..test_cases.len) |i| {
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
const symbol = parser.identifyDecimal();
try std.testing.expectEqual(expected[i], symbol);
parser.deinit();
}
}
test "hexadecimals are recognized" {
const test_cases = [_][]const u8{
"#012314saslkfdj",
"#1234567890abcdef 43",
"#1234567891\n123124",
};
const expected = [_]u64{
0x12314,
0x1234567890abcdef,
0x1234567891,
};
for (0..3) |i| {
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
const symbol = try parser.identifyHexadecimal();
try std.testing.expectEqual(expected[i], symbol);
parser.deinit();
}
}
test "malformed hexadecimals are not recognized" {
const test_cases = [_][]const u8{
"",
"sasdf123",
" 123",
"#12345678901234567890123456789012345678901234567890",
"#",
};
const expected = [_]Parser.HexadecimalError{
Parser.HexadecimalError.WrongStartingCharacter,
Parser.HexadecimalError.WrongStartingCharacter,
Parser.HexadecimalError.WrongStartingCharacter,
Parser.HexadecimalError.Overflow,
Parser.HexadecimalError.InvalidCharacter,
};
for (0..test_cases.len) |i| {
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
const symbol = parser.identifyHexadecimal();
try std.testing.expectEqual(expected[i], symbol);
parser.deinit();
}
}
test "characters are recognized" {
const test_cases = [_][]const u8{
"'a'",
"'1'",
"'𒀤'",
};
const expected = [_]u21{
'a',
'1',
'𒀤',
};
for (0..3) |i| {
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
const symbol = try parser.identifyChar();
try std.testing.expectEqual(expected[i], symbol);
parser.deinit();
}
}
test "invalid unicode sequences are not characters" {
const test_cases = [_][]const u8{
"'asdf'",
"'asdfg'",
"'as'",
"''",
"'",
"a",
};
const expected = [_]Parser.CharError{
Parser.CharError.NoEndingDelimiter,
Parser.CharError.NoEndingDelimiter,
Parser.CharError.NoEndingDelimiter,
Parser.CharError.NoChar,
Parser.CharError.NoChar,
Parser.CharError.NoStartingDelimiter,
};
for (0..test_cases.len) |i| {
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
const symbol = parser.identifyChar();
try std.testing.expectEqual(expected[i], symbol);
parser.deinit();
}
}
test "strings are recognized" {
const test_cases = [_][]const u8{
"\" \"",
"\"aslkdfjlaskdfj lkasjflkasjdflaksjfd''12309)($)(#$[[]𒀤\"",
};
const expected = [_][]const u8{
" ",
"aslkdfjlaskdfj lkasjflkasjdflaksjfd''12309)($)(#$[[]𒀤",
};
for (0..2) |i| {
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
const symbol = try parser.identifyString();
try std.testing.expect(std.mem.eql(u8, expected[i], symbol));
parser.deinit();
}
}
test "invalid strings are not recognized" {
const test_cases = [_][]const u8{
"\"\"",
"\"",
"\"\n\"",
"asdf",
};
const expected = [_]Parser.StringError{
Parser.StringError.NoString,
Parser.StringError.NoEndingDelimiter,
Parser.StringError.NoEndingDelimiter,
Parser.StringError.NoStartingDelimiter,
};
for (0..test_cases.len) |i| {
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
const symbol = parser.identifyString();
try std.testing.expectEqual(expected[i], symbol);
parser.deinit();
}
}
test "constants are recognized" {
const test_cases = [_][]const u8{
"1234567890 1234",
"#1234567890abcdef;%#*(",
"'a'uuuuuu",
"\"hello \"world",
};
const expected = [_]ConstantValue{
ConstantValue{ .number = 1234567890 },
ConstantValue{ .number = 0x1234567890abcdef },
ConstantValue{ .number = 'a' },
ConstantValue{ .string = "hello " },
};
for (0..4) |i| {
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
defer parser.deinit();
const symbol = try parser.identifyConstant();
switch (symbol) {
.number => try std.testing.expectEqual(expected[i].number, symbol.number),
.string => try std.testing.expect(std.mem.eql(u8, expected[i].string, symbol.string)),
}
}
}
test "invalid constants are recognized" {
const test_cases = [_][]const u8{
"$123",
" 123",
"'aa'",
};
const expected = [_]Parser.ConstantError{
Parser.ConstantError.UnexpectedSymbol,
Parser.ConstantError.UnexpectedSymbol,
Parser.ConstantError.NoEndingDelimiter,
};
for (0..test_cases.len) |i| {
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
defer parser.deinit();
const symbol = parser.identifyConstant();
try std.testing.expectEqual(expected[i], symbol);
}
}
test "basic primaries are identified" {
const test_cases = [_][]const u8{
"1234",
"@",
"'a'",
"\"hello world\"",
"+1234",
"-#1",
"~#0",
"$123",
};
const expected = [_]ExpressionResult{
ExpressionResult{ .number = NumberValue{ .pure = 1234 } },
ExpressionResult{ .number = NumberValue{ .pure = 0 } },
ExpressionResult{ .number = NumberValue{ .pure = 'a' } },
ExpressionResult{ .string = "hello world" },
ExpressionResult{ .number = NumberValue{ .pure = 1234 } },
ExpressionResult{ .number = NumberValue{ .pure = 0xFFFFFFFFFFFFFFFF } },
ExpressionResult{ .number = NumberValue{ .pure = 0xFFFFFFFFFFFFFFFF } },
ExpressionResult{ .number = NumberValue{ .register = 123 } },
};
for (0..test_cases.len) |i| {
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
defer parser.deinit();
const symbol = try parser.identifyPrimary();
switch (symbol) {
.number => try std.testing.expectEqual(expected[i].number, symbol.number),
.string => try std.testing.expect(std.mem.eql(u8, expected[i].string, symbol.string)),
}
}
}
test "invalid primaries are detected" {
const test_cases = [_][]const u8{
"$256",
"$~0",
"~\"hello\"",
};
const expected = [_]Parser.ExpressionError{
Parser.ExpressionError.Overflow,
Parser.ExpressionError.Overflow,
Parser.ExpressionError.UnaryOperationOnString,
};
for (0..test_cases.len) |i| {
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
defer parser.deinit();
const symbol = parser.identifyPrimary();
try std.testing.expectEqual(expected[i], symbol);
}
}
test "valid terms are recognized" {
const test_cases = [_][]const u8{
"12*34",
"23/2",
"2//3",
"3%2",
"1<<2",
"4>>2",
"#FF&#AA",
"$12",
"\"hello\"",
};
const expected = [_]ExpressionResult{
ExpressionResult{ .number = NumberValue{ .pure = 408 } },
ExpressionResult{ .number = NumberValue{ .pure = 11 } },
ExpressionResult{ .number = NumberValue{ .pure = 12297829382473034410 } },
ExpressionResult{ .number = NumberValue{ .pure = 1 } },
ExpressionResult{ .number = NumberValue{ .pure = 4 } },
ExpressionResult{ .number = NumberValue{ .pure = 1 } },
ExpressionResult{ .number = NumberValue{ .pure = 0xAA } },
ExpressionResult{ .number = NumberValue{ .register = 12 } },
ExpressionResult{ .string = "hello" },
};
for (0..test_cases.len) |i| {
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
defer parser.deinit();
const symbol = try parser.identifyTerm();
switch (symbol) {
.number => try std.testing.expectEqual(expected[i], symbol),
.string => try std.testing.expect(std.mem.eql(u8, expected[i].string, symbol.string)),
}
}
}
test "terms compute the entire chain" {
const test_cases = [_][]const u8{
"1*2*3*4*5",
"2*6/4",
"3*3%8<<2>>1",
};
const expected = [_]ExpressionResult{
ExpressionResult{ .number = NumberValue{ .pure = 120 } },
ExpressionResult{ .number = NumberValue{ .pure = 3 } },
ExpressionResult{ .number = NumberValue{ .pure = 2 } },
};
for (0..test_cases.len) |i| {
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
defer parser.deinit();
const symbol = try parser.identifyTerm();
try std.testing.expectEqual(expected[i], symbol);
}
}
test "strong operations do not work on registers" {
const test_cases = [_][]const u8{
"$2*3",
"1/$1",
"$1%$1",
"$1<<1",
"1>>$1",
"$1&$1",
};
for (test_cases) |case| {
var parser = try Parser.init(std.testing.allocator, case);
defer parser.deinit();
const symbol = parser.identifyTerm();
try std.testing.expectEqual(Parser.ExpressionError.IllegalOperationOnRegister, symbol);
}
}
test "strong operations do not work on strings" {
const test_cases = [_][]const u8{
"\"hello\"*1",
"1*\"hello\"",
};
for (test_cases) |case| {
var parser = try Parser.init(std.testing.allocator, case);
defer parser.deinit();
const symbol = parser.identifyTerm();
try std.testing.expectEqual(Parser.ExpressionError.BinaryOperationOnString, symbol);
}
}
test "expressions are recognized" {
const test_cases = [_][]const u8{
"1+2",
"3-2",
"2-3",
"#AA|#00",
"#AA^#FF",
"5*5+5",
"5*5+5*5",
"#ab<<32+k&~(k-1)",
"\"hello\"",
"$12",
"$$12",
};
const expected = [_]ExpressionResult{
ExpressionResult{ .number = NumberValue{ .pure = 3 } },
ExpressionResult{ .number = NumberValue{ .pure = 1 } },
ExpressionResult{ .number = NumberValue{ .pure = 0xFFFFFFFFFFFFFFFF } },
ExpressionResult{ .number = NumberValue{ .pure = 0xAA } },
ExpressionResult{ .number = NumberValue{ .pure = 0x55 } },
ExpressionResult{ .number = NumberValue{ .pure = 30 } },
ExpressionResult{ .number = NumberValue{ .pure = 50 } },
ExpressionResult{ .number = NumberValue{ .pure = 0xab00000100 } },
ExpressionResult{ .string = "hello" },
ExpressionResult{ .number = NumberValue{ .register = 12 } },
ExpressionResult{ .number = NumberValue{ .register = 12 } },
};
for (0..test_cases.len) |i| {
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
defer parser.deinit();
try parser.symbols.put("k", NumberValue{ .pure = 0xcdef00 });
const symbol = try parser.identifyExpression();
switch (symbol) {
.number => try std.testing.expectEqual(expected[i], symbol),
.string => try std.testing.expect(std.mem.eql(u8, expected[i].string, symbol.string)),
}
}
}
test "weak operations cannot be applied to strings and registers" {
const test_cases = [_][]const u8{
"$1+$1",
"2-$2",
"$3|3",
"$4^$4",
"1+\"hello\"",
"\"hello\"-2",
"3|\"hello\"^4",
};
const expected = [_]Parser.ExpressionError{
Parser.ExpressionError.IllegalOperationOnRegister,
Parser.ExpressionError.IllegalOperationOnRegister,
Parser.ExpressionError.IllegalOperationOnRegister,
Parser.ExpressionError.IllegalOperationOnRegister,
Parser.ExpressionError.BinaryOperationOnString,
Parser.ExpressionError.BinaryOperationOnString,
Parser.ExpressionError.BinaryOperationOnString,
};
for (0..test_cases.len) |i| {
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
defer parser.deinit();
const symbol = parser.identifyExpression();
try std.testing.expectEqual(expected[i], symbol);
}
}