1199 lines
39 KiB
Zig
1199 lines
39 KiB
Zig
const std = @import("std");
|
|
const opcodes = @import("opcodes.zig");
|
|
|
|
/// A number's value can be pure or point to a register
|
|
const NumberType = enum {
|
|
pure,
|
|
register,
|
|
};
|
|
const NumberValue = union(NumberType) {
|
|
pure: u64,
|
|
register: u8,
|
|
};
|
|
|
|
/// An expression's result can be a NumberValue or a string
|
|
const ExpressionResultType = enum {
|
|
number,
|
|
string,
|
|
};
|
|
const ExpressionResult = union(ExpressionResultType) {
|
|
number: NumberValue,
|
|
string: []const u8,
|
|
};
|
|
|
|
/// A constant can be a number of a string
|
|
const ConstantType = enum {
|
|
number,
|
|
string,
|
|
};
|
|
const ConstantValue = union(ConstantType) {
|
|
number: u64,
|
|
string: []const u8,
|
|
};
|
|
|
|
/// The Parser reads a provided input and assembles it into MMIX object code
|
|
pub const Parser = struct {
|
|
allocator: std.mem.Allocator,
|
|
input: []const u8,
|
|
location: u64,
|
|
ch_pos: usize,
|
|
symbols: std.StringHashMap(NumberValue),
|
|
object: std.ArrayList(u8),
|
|
|
|
/// Test is a character is whitespace
|
|
/// Note that newlines are special and not included in this implementation.
|
|
fn isWhitespace(ch: u8) bool {
|
|
return ch == ' ' or ch == '\t' or ch == '\r';
|
|
}
|
|
|
|
/// Test if a character is a letter
|
|
/// Note that underscores are letters for the purposes of symbol recognition.
|
|
fn isLetter(ch: u8) bool {
|
|
return ch == '_' or (ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z');
|
|
}
|
|
|
|
/// Test if a character is a decimal digit
|
|
fn isDecimal(ch: u8) bool {
|
|
return ch >= '0' and ch <= '9';
|
|
}
|
|
|
|
/// Test if a character is a hexadecimal digit
|
|
fn isHexadecimal(ch: u8) bool {
|
|
return isDecimal(ch) or (ch >= 'a' and ch <= 'f') or (ch >= 'A' and ch <= 'F');
|
|
}
|
|
|
|
/// Test if a character is a symbol character
|
|
/// Note that all valid unicode characters larger than 126 are also valid symbol characters.
|
|
fn isSymbolChar(ch: u8) bool {
|
|
return isLetter(ch) or isDecimal(ch) or ch > 126;
|
|
}
|
|
|
|
/// Get a byte from the input at a specified location
|
|
/// Return 0 if the requested byte is out of range
|
|
fn getByte(self: *Parser, pos: usize) u8 {
|
|
if (pos < self.input.len) {
|
|
return self.input[pos];
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/// Move the cursor forward until it does not point at whitespace
|
|
fn skipWhitespace(self: *Parser) void {
|
|
while (isWhitespace(self.getByte(self.ch_pos))) {
|
|
self.ch_pose += 1;
|
|
}
|
|
}
|
|
|
|
const DecimalError = std.fmt.ParseIntError;
|
|
|
|
/// Determine whether the cursor points at a valid integer in base 10
|
|
/// Move the cursor past the integer and return it
|
|
fn identifyDecimal(self: *Parser) DecimalError!u64 {
|
|
const start = self.ch_pos;
|
|
while (isDecimal(self.getByte(self.ch_pos))) {
|
|
self.ch_pos += 1;
|
|
}
|
|
const end = self.ch_pos;
|
|
|
|
return std.fmt.parseInt(u64, self.input[start..end], 10);
|
|
}
|
|
|
|
const HexadecimalError = std.fmt.ParseIntError || error{
|
|
WrongStartingCharacter,
|
|
};
|
|
|
|
/// Determine whether the cursor points at a valid integer in base 16
|
|
/// Base 16 is identified by a number starting with #
|
|
/// Move the cursor past the integer and return it
|
|
fn identifyHexadecimal(self: *Parser) HexadecimalError!u64 {
|
|
if (self.getByte(self.ch_pos) != '#') {
|
|
return HexadecimalError.WrongStartingCharacter;
|
|
}
|
|
|
|
self.ch_pos += 1;
|
|
|
|
const start = self.ch_pos;
|
|
while (isHexadecimal(self.getByte(self.ch_pos))) {
|
|
self.ch_pos += 1;
|
|
}
|
|
const end = self.ch_pos;
|
|
|
|
return std.fmt.parseInt(u64, self.input[start..end], 16);
|
|
}
|
|
|
|
const CharError = error{
|
|
NoStartingDelimiter,
|
|
NoEndingDelimiter,
|
|
NoChar,
|
|
InvalidUtf8,
|
|
};
|
|
|
|
/// Determine whether the cursor points at a valid unicode character wrapped in single quotes
|
|
/// Move the cursor past the closing quote and return the character
|
|
fn identifyChar(self: *Parser) CharError!u21 {
|
|
if (self.getByte(self.ch_pos) != '\'') {
|
|
return CharError.NoStartingDelimiter;
|
|
}
|
|
|
|
self.ch_pos += 1;
|
|
|
|
const start = self.ch_pos;
|
|
while (self.getByte(self.ch_pos) != 0 and self.getByte(self.ch_pos) != '\'') {
|
|
self.ch_pos += 1;
|
|
|
|
if (self.getByte(self.ch_pos) == ';' or self.getByte(self.ch_pos) == '\n') {
|
|
return CharError.NoEndingDelimiter;
|
|
}
|
|
if (self.ch_pos - start > 4) {
|
|
return CharError.NoEndingDelimiter;
|
|
}
|
|
}
|
|
const end = self.ch_pos;
|
|
self.ch_pos += 1;
|
|
|
|
if (end <= start) {
|
|
return CharError.NoChar;
|
|
}
|
|
|
|
const view = std.unicode.Utf8View.init(self.input[start..end]) catch return CharError.InvalidUtf8;
|
|
var iter = view.iterator();
|
|
var count: u8 = 0;
|
|
var character: u21 = 0;
|
|
while (iter.nextCodepoint()) |u| {
|
|
character = u;
|
|
count += 1;
|
|
if (count > 1) {
|
|
return CharError.NoEndingDelimiter;
|
|
}
|
|
}
|
|
if (count != 1) {
|
|
return CharError.NoEndingDelimiter;
|
|
}
|
|
|
|
return character;
|
|
}
|
|
|
|
const StringError = error{
|
|
NoStartingDelimiter,
|
|
NoEndingDelimiter,
|
|
NoString,
|
|
};
|
|
|
|
/// Determine whether the cursor points at a valid string wrapped in double quotes
|
|
/// Note that a string has at least one character in it and that it cannot have " or newlines in it
|
|
/// Move the cursor past the string and return the string
|
|
fn identifyString(self: *Parser) StringError![]const u8 {
|
|
if (self.getByte(self.ch_pos) != '"') {
|
|
return StringError.NoStartingDelimiter;
|
|
}
|
|
|
|
self.ch_pos += 1;
|
|
const start = self.ch_pos;
|
|
while (self.getByte(self.ch_pos) != 0 and self.getByte(self.ch_pos) != '"') {
|
|
if (self.getByte(self.ch_pos) == '\n') {
|
|
return StringError.NoEndingDelimiter;
|
|
}
|
|
self.ch_pos += 1;
|
|
}
|
|
const end = self.ch_pos;
|
|
if (self.getByte(self.ch_pos) == '"') {
|
|
self.ch_pos += 1;
|
|
} else {
|
|
return StringError.NoEndingDelimiter;
|
|
}
|
|
|
|
if (end <= start) {
|
|
return StringError.NoString;
|
|
}
|
|
|
|
return self.input[start..end];
|
|
}
|
|
|
|
const ConstantError = DecimalError || HexadecimalError || CharError || StringError || error{
|
|
UnexpectedSymbol,
|
|
};
|
|
|
|
/// Determine whether the cursor points at a valid constant
|
|
/// The constant may be a string or a number
|
|
/// Move the cursor past the constant and return it
|
|
fn identifyConstant(self: *Parser) !ConstantValue {
|
|
switch (self.getByte(self.ch_pos)) {
|
|
'0'...'9' => {
|
|
const number = try identifyDecimal(self);
|
|
return ConstantValue{ .number = number };
|
|
},
|
|
'#' => {
|
|
const number = try identifyHexadecimal(self);
|
|
return ConstantValue{ .number = number };
|
|
},
|
|
'\'' => {
|
|
const char = try identifyChar(self);
|
|
return ConstantValue{ .number = char };
|
|
},
|
|
'"' => {
|
|
const string = try identifyString(self);
|
|
return ConstantValue{ .string = string };
|
|
},
|
|
else => return ConstantError.UnexpectedSymbol,
|
|
}
|
|
}
|
|
|
|
const SymbolError = error{
|
|
UnexpectedSymbol,
|
|
ForwardReference0,
|
|
ForwardReference1,
|
|
ForwardReference2,
|
|
ForwardReference3,
|
|
ForwardReference4,
|
|
ForwardReference5,
|
|
ForwardReference6,
|
|
ForwardReference7,
|
|
ForwardReference8,
|
|
ForwardReference9,
|
|
};
|
|
|
|
/// Determine whether the cursor points at a symbol
|
|
/// A symbol starts with a letter and only has symbol characters after that point
|
|
/// There is an exception that there are 30 special symbols of the form xH, xF, and xB where x is a single decimal digit
|
|
/// Move the cursor past the symbol and return its name
|
|
fn identifySymbol(self: *Parser) SymbolError![]const u8 {
|
|
const start = self.ch_pos;
|
|
if ((isLetter(self.getByte(self.ch_pos)) or self.getByte(self.ch_pos) == '_')) {
|
|
self.ch_pos += 1;
|
|
|
|
while (isSymbolChar(self.getByte(self.ch_pos))) {
|
|
self.ch_pos += 1;
|
|
}
|
|
} else if (isDecimal(self.getByte(self.ch_pos)) and
|
|
(self.getByte(self.ch_pos + 1) == 'H' or self.getByte(self.ch_pos + 1) == 'F' or self.getByte(self.ch_pos + 1) == 'B'))
|
|
{
|
|
self.ch_pos += 2;
|
|
return self.input[self.ch_pos - 2 .. self.ch_pos];
|
|
}
|
|
const end = self.ch_pos;
|
|
|
|
if (end > start) {
|
|
return self.input[start..end];
|
|
}
|
|
|
|
return SymbolError.UnexpectedSymbol;
|
|
}
|
|
|
|
/// Get the number associated with a given symbol
|
|
fn readSymbol(self: *Parser, symbol: []const u8) SymbolError!NumberValue {
|
|
if (symbol.len == 2 and symbol[0] >= '0' and symbol[0] <= '9' and symbol[1] != 'B') {
|
|
if (symbol[1] != 'F') return SymbolError.UnexpectedSymbol;
|
|
|
|
return switch (symbol[0]) {
|
|
'0' => SymbolError.ForwardReference0,
|
|
'1' => SymbolError.ForwardReference1,
|
|
'2' => SymbolError.ForwardReference2,
|
|
'3' => SymbolError.ForwardReference3,
|
|
'4' => SymbolError.ForwardReference4,
|
|
'5' => SymbolError.ForwardReference5,
|
|
'6' => SymbolError.ForwardReference6,
|
|
'7' => SymbolError.ForwardReference7,
|
|
'8' => SymbolError.ForwardReference8,
|
|
'9' => SymbolError.ForwardReference9,
|
|
else => unreachable,
|
|
};
|
|
}
|
|
|
|
const n = self.symbols.get(symbol);
|
|
if (n == null) return SymbolError.UnexpectedSymbol;
|
|
return n.?;
|
|
}
|
|
|
|
const WeakOp = enum {
|
|
add,
|
|
sub,
|
|
bit_or,
|
|
bit_xor,
|
|
none,
|
|
};
|
|
|
|
const ExpressionError = SymbolError || ConstantError || error{
|
|
NoEndingDelimiter,
|
|
UnaryOperationOnString,
|
|
BinaryOperationOnString,
|
|
IllegalOperationOnRegister,
|
|
DivisionByZero,
|
|
FractionalDivisionOversizedNumerator,
|
|
};
|
|
|
|
/// Determine whether the cursor points at a valid expression
|
|
/// Move the cursor past the expression, evaluate it, and return its value
|
|
fn identifyExpression(self: *Parser) ExpressionError!ExpressionResult {
|
|
var result: u64 = 0;
|
|
var last_op = WeakOp.none;
|
|
var done = false;
|
|
var started = false;
|
|
var register_result: ?u8 = null;
|
|
var string_result: ?[]const u8 = null;
|
|
while (!done) {
|
|
if (string_result != null) return ExpressionError.BinaryOperationOnString;
|
|
if (register_result != null) return ExpressionError.IllegalOperationOnRegister;
|
|
|
|
const term = try self.identifyTerm();
|
|
|
|
switch (term) {
|
|
.string => {
|
|
if (!started) {
|
|
string_result = term.string;
|
|
} else {
|
|
return ExpressionError.BinaryOperationOnString;
|
|
}
|
|
},
|
|
.number => |nv| {
|
|
switch (nv) {
|
|
.pure => |n| {
|
|
result = switch (last_op) {
|
|
WeakOp.add => result +% n,
|
|
WeakOp.sub => result -% n,
|
|
WeakOp.bit_or => result | n,
|
|
WeakOp.bit_xor => result ^ n,
|
|
WeakOp.none => n,
|
|
};
|
|
},
|
|
.register => {
|
|
if (!started) {
|
|
register_result = nv.register;
|
|
} else {
|
|
return ExpressionError.IllegalOperationOnRegister;
|
|
}
|
|
},
|
|
}
|
|
},
|
|
}
|
|
|
|
last_op = switch (self.getByte(self.ch_pos)) {
|
|
'+' => WeakOp.add,
|
|
'-' => WeakOp.sub,
|
|
'|' => WeakOp.bit_or,
|
|
'^' => WeakOp.bit_xor,
|
|
else => WeakOp.none,
|
|
};
|
|
|
|
if (last_op == WeakOp.none) {
|
|
done = true;
|
|
} else {
|
|
self.ch_pos += 1;
|
|
}
|
|
|
|
started = true;
|
|
}
|
|
|
|
if (string_result != null) {
|
|
return ExpressionResult{ .string = string_result.? };
|
|
}
|
|
if (register_result != null) {
|
|
return ExpressionResult{ .number = NumberValue{ .register = register_result.? } };
|
|
}
|
|
|
|
return ExpressionResult{ .number = NumberValue{ .pure = result } };
|
|
}
|
|
|
|
const StrongOp = enum {
|
|
mult,
|
|
div,
|
|
frac_div,
|
|
rem,
|
|
lshift,
|
|
rshift,
|
|
bit_and,
|
|
none,
|
|
};
|
|
|
|
/// Determine whether the cursor points at a valid term
|
|
/// Move the cursor past the term, evaluate it, and return its value
|
|
fn identifyTerm(self: *Parser) ExpressionError!ExpressionResult {
|
|
var result: u64 = 0;
|
|
var last_op = StrongOp.none;
|
|
var register_result: ?u8 = null;
|
|
var string_result: ?[]const u8 = null;
|
|
var started = false;
|
|
var done = false;
|
|
while (!done) {
|
|
if (string_result != null) return ExpressionError.BinaryOperationOnString;
|
|
if (register_result != null) return ExpressionError.IllegalOperationOnRegister;
|
|
|
|
const primary = try self.identifyPrimary();
|
|
|
|
switch (primary) {
|
|
.string => {
|
|
if (!started) {
|
|
string_result = primary.string;
|
|
} else {
|
|
return ExpressionError.BinaryOperationOnString;
|
|
}
|
|
},
|
|
.number => |nv| {
|
|
switch (nv) {
|
|
.pure => |n| {
|
|
result = switch (last_op) {
|
|
StrongOp.mult => result *% n,
|
|
StrongOp.div => div: {
|
|
if (n == 0) return ExpressionError.DivisionByZero;
|
|
break :div result / n;
|
|
},
|
|
StrongOp.frac_div => frac_div: {
|
|
if (result >= n) return ExpressionError.FractionalDivisionOversizedNumerator;
|
|
if (n == 0) return ExpressionError.DivisionByZero;
|
|
const shifted: u128 = (@as(u128, result)) << 64;
|
|
const divided: u128 = (shifted / n) % (1 << 64);
|
|
break :frac_div @as(u64, @intCast(divided));
|
|
},
|
|
StrongOp.rem => rem: {
|
|
if (n == 0) return ExpressionError.DivisionByZero;
|
|
break :rem result % n;
|
|
},
|
|
StrongOp.lshift => if (n >= 64) 0 else result << @as(u6, @intCast(n)),
|
|
StrongOp.rshift => if (n >= 64) 0 else result >> @as(u6, @intCast(n)),
|
|
StrongOp.bit_and => result & n,
|
|
StrongOp.none => n,
|
|
};
|
|
},
|
|
.register => {
|
|
if (!started) {
|
|
register_result = nv.register;
|
|
} else {
|
|
return ExpressionError.IllegalOperationOnRegister;
|
|
}
|
|
},
|
|
}
|
|
},
|
|
}
|
|
|
|
last_op = switch (self.getByte(self.ch_pos)) {
|
|
'*' => StrongOp.mult,
|
|
'/' => div: {
|
|
var op = StrongOp.div;
|
|
if (self.getByte(self.ch_pos + 1) == '/') {
|
|
op = StrongOp.frac_div;
|
|
self.ch_pos += 1;
|
|
}
|
|
break :div op;
|
|
},
|
|
'%' => StrongOp.rem,
|
|
'<' => lshift: {
|
|
var op = StrongOp.lshift;
|
|
if (self.getByte(self.ch_pos + 1) == '<') {
|
|
self.ch_pos += 1;
|
|
} else {
|
|
op = StrongOp.none;
|
|
}
|
|
break :lshift op;
|
|
},
|
|
'>' => rshift: {
|
|
var op = StrongOp.rshift;
|
|
if (self.getByte(self.ch_pos + 1) == '>') {
|
|
self.ch_pos += 1;
|
|
} else {
|
|
op = StrongOp.none;
|
|
}
|
|
break :rshift op;
|
|
},
|
|
'&' => StrongOp.bit_and,
|
|
else => StrongOp.none,
|
|
};
|
|
|
|
if (last_op == StrongOp.none) {
|
|
done = true;
|
|
} else {
|
|
self.ch_pos += 1;
|
|
}
|
|
|
|
started = true;
|
|
}
|
|
|
|
if (register_result != null) {
|
|
result = result % (1 << 8);
|
|
return ExpressionResult{ .number = NumberValue{ .register = register_result.? } };
|
|
}
|
|
|
|
if (string_result != null) {
|
|
return ExpressionResult{ .string = string_result.? };
|
|
}
|
|
|
|
return ExpressionResult{ .number = NumberValue{ .pure = result } };
|
|
}
|
|
|
|
/// Determine whether the cursor points at a valid primary
|
|
/// Move the cursor past the primary, evaluate it, and return its value
|
|
fn identifyPrimary(self: *Parser) ExpressionError!ExpressionResult {
|
|
if (isDecimal(self.getByte(self.ch_pos)) and
|
|
(self.getByte(self.ch_pos + 1) == 'H' or self.getByte(self.ch_pos + 1) == 'F' or self.getByte(self.ch_pos + 1) == 'B'))
|
|
{
|
|
const symbol = try self.identifySymbol();
|
|
if (symbol.len != 2) return SymbolError.UnexpectedSymbol;
|
|
if (symbol[1] == 'H') return SymbolError.UnexpectedSymbol;
|
|
const symbol_val = try self.readSymbol(symbol);
|
|
return ExpressionResult{ .number = symbol_val };
|
|
}
|
|
|
|
switch (self.getByte(self.ch_pos)) {
|
|
'@' => return ExpressionResult{ .number = NumberValue{ .pure = self.location } },
|
|
'(' => {
|
|
self.ch_pos += 1;
|
|
const expr = try self.identifyExpression();
|
|
if (self.getByte(self.ch_pos) != ')') return error.NoEndingDelimiter;
|
|
self.ch_pos += 1;
|
|
return expr;
|
|
},
|
|
'0'...'9', '#', '\'', '"' => {
|
|
const constant = try self.identifyConstant();
|
|
switch (constant) {
|
|
.number => |n| return ExpressionResult{ .number = NumberValue{ .pure = n } },
|
|
.string => |s| return ExpressionResult{ .string = s },
|
|
}
|
|
},
|
|
'+' => {
|
|
self.ch_pos += 1;
|
|
const primary = try self.identifyPrimary();
|
|
switch (primary) {
|
|
.number => return primary,
|
|
else => return ExpressionError.UnaryOperationOnString,
|
|
}
|
|
},
|
|
'-' => {
|
|
self.ch_pos += 1;
|
|
const primary = try self.identifyPrimary();
|
|
switch (primary) {
|
|
.number => |nv| {
|
|
switch (nv) {
|
|
.register, .pure => |n| return ExpressionResult{ .number = NumberValue{ .pure = 0 -% n } },
|
|
}
|
|
},
|
|
else => return ExpressionError.UnaryOperationOnString,
|
|
}
|
|
},
|
|
'~' => {
|
|
self.ch_pos += 1;
|
|
const primary = try self.identifyPrimary();
|
|
switch (primary) {
|
|
.number => |nv| {
|
|
switch (nv) {
|
|
.register, .pure => |n| return ExpressionResult{ .number = NumberValue{ .pure = ~n } },
|
|
}
|
|
},
|
|
else => return ExpressionError.UnaryOperationOnString,
|
|
}
|
|
},
|
|
'$' => {
|
|
self.ch_pos += 1;
|
|
const primary = try self.identifyPrimary();
|
|
switch (primary) {
|
|
.number => |nv| {
|
|
switch (nv) {
|
|
.register, .pure => |n| {
|
|
if (n >= 256) return ExpressionError.Overflow;
|
|
const n8 = @as(u8, @intCast(n));
|
|
return ExpressionResult{ .number = NumberValue{ .register = n8 } };
|
|
},
|
|
}
|
|
},
|
|
else => return ExpressionError.UnaryOperationOnString,
|
|
}
|
|
},
|
|
else => {
|
|
const symbol = try self.identifySymbol();
|
|
const symbol_value = try self.readSymbol(symbol);
|
|
return ExpressionResult{ .number = symbol_value };
|
|
},
|
|
}
|
|
}
|
|
|
|
/// Determine whether the cursor points at a valid opcode or pseudo operation
|
|
/// An opcode consists solely of symbol characters (letters and numbers in fact)
|
|
/// Move the cursor past the opcode and return it
|
|
fn identifyOperation(self: *Parser) !opcodes.Operation {
|
|
const start = self.ch_pos;
|
|
while (isSymbolChar(self.getByte(self.ch_pos))) {
|
|
self.ch_pos += 1;
|
|
}
|
|
const end = self.ch_pos;
|
|
|
|
return opcodes.parseOp(self.allocator, self.input[start..end]);
|
|
}
|
|
|
|
pub fn init(allocator: std.mem.Allocator, input: []const u8) !Parser {
|
|
var p = Parser{
|
|
.allocator = allocator,
|
|
.input = input,
|
|
.location = 0,
|
|
.ch_pos = 0,
|
|
.symbols = std.StringHashMap(NumberValue).init(allocator),
|
|
.object = std.ArrayList(u8).init(allocator),
|
|
};
|
|
|
|
try p.symbols.put("0B", NumberValue{ .pure = 0 });
|
|
try p.symbols.put("1B", NumberValue{ .pure = 0 });
|
|
try p.symbols.put("2B", NumberValue{ .pure = 0 });
|
|
try p.symbols.put("3B", NumberValue{ .pure = 0 });
|
|
try p.symbols.put("4B", NumberValue{ .pure = 0 });
|
|
try p.symbols.put("5B", NumberValue{ .pure = 0 });
|
|
try p.symbols.put("6B", NumberValue{ .pure = 0 });
|
|
try p.symbols.put("7B", NumberValue{ .pure = 0 });
|
|
try p.symbols.put("8B", NumberValue{ .pure = 0 });
|
|
try p.symbols.put("9B", NumberValue{ .pure = 0 });
|
|
|
|
return p;
|
|
}
|
|
|
|
pub fn deinit(self: *Parser) void {
|
|
self.symbols.deinit();
|
|
self.object.deinit();
|
|
}
|
|
};
|
|
|
|
test "normal ascii characters are recognized as symbol chars" {
|
|
const chars = "qwertyuiopasdfghjklzxcvbnm1234567890QWERTYUIOPASDFGHJKLZXCVBNM_";
|
|
|
|
for (chars) |c| {
|
|
try std.testing.expect(Parser.isSymbolChar(c));
|
|
}
|
|
}
|
|
|
|
test "large unicode characters are recognized as symbol chars" {
|
|
const cuneiform = "𒀀𒀁𒀂𒀃𒀄𒀅𒀆𒀇𒀈𒀉𒀊𒀋𒀌𒀍𒀎𒀏𒀐𒀑𒀒𒀓𒀔𒀕𒀖𒀗𒀘𒀙𒀚𒀛𒀜𒀝𒀞𒀟𒀠𒀡𒀢𒀣𒀤𒀥𒀦𒀧𒀨𒀩𒀪𒀫𒀬𒀭𒀮𒀯𒀰𒀱𒈷𒌄";
|
|
|
|
for (cuneiform) |c| {
|
|
try std.testing.expect(Parser.isSymbolChar(c));
|
|
}
|
|
}
|
|
|
|
test "non-symbol characters are detected" {
|
|
const chars = "\n\r \t!@#$%^&*()-=+[]{}\\|;:'\"/?,.<>`~";
|
|
|
|
for (chars) |c| {
|
|
try std.testing.expect(!Parser.isSymbolChar(c));
|
|
}
|
|
}
|
|
|
|
test "symbols are identified" {
|
|
const test_cases = [_][]const u8{
|
|
"_asdf$%@",
|
|
"ASFLKJ3332__q5 ;asdf;lk",
|
|
"asdf𒀤𒀥𒀦\nalsfkd",
|
|
"2H",
|
|
"5F",
|
|
"0B",
|
|
};
|
|
|
|
const expected = [_][]const u8{
|
|
"_asdf",
|
|
"ASFLKJ3332__q5",
|
|
"asdf𒀤𒀥𒀦",
|
|
"2H",
|
|
"5F",
|
|
"0B",
|
|
};
|
|
|
|
for (0..6) |i| {
|
|
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
|
|
const symbol = try parser.identifySymbol();
|
|
try std.testing.expect(std.mem.eql(u8, expected[i], symbol));
|
|
parser.deinit();
|
|
}
|
|
}
|
|
|
|
test "no symbols are found successfully" {
|
|
const test_cases = [_][]const u8{
|
|
" _asdf",
|
|
";ASFLKJ3332__q5",
|
|
"\nasdf𒀤𒀥𒀦",
|
|
};
|
|
|
|
for (test_cases) |case| {
|
|
var parser = try Parser.init(std.testing.allocator, case);
|
|
const symbol = parser.identifySymbol();
|
|
try std.testing.expectEqual(error.UnexpectedSymbol, symbol);
|
|
parser.deinit();
|
|
}
|
|
}
|
|
|
|
test "opcodes are identified" {
|
|
const test_cases = [_][]const u8{
|
|
"2ADDU%aldfk",
|
|
"GO ",
|
|
"ADD\taksfdjas",
|
|
"GREG\n",
|
|
"IS",
|
|
};
|
|
|
|
const expected = [_]opcodes.Operation{
|
|
opcodes.Operation{ .opcode = opcodes.Opcode._2ADDU },
|
|
opcodes.Operation{ .opcode = opcodes.Opcode.GO },
|
|
opcodes.Operation{ .opcode = opcodes.Opcode.ADD },
|
|
opcodes.Operation{ .pseudo_op = opcodes.PseudoOp.GREG },
|
|
opcodes.Operation{ .pseudo_op = opcodes.PseudoOp.IS },
|
|
};
|
|
|
|
for (0..5) |i| {
|
|
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
|
|
const op = try parser.identifyOperation();
|
|
try std.testing.expectEqual(expected[i], op);
|
|
parser.deinit();
|
|
}
|
|
}
|
|
|
|
test "no opcodes are found successfully" {
|
|
const test_cases = [_][]const u8{
|
|
" _asdf",
|
|
";ASFLKJ3332__q5",
|
|
"\nasdf𒀤𒀥𒀦",
|
|
"asdfklajsdfl",
|
|
};
|
|
|
|
for (test_cases) |case| {
|
|
var parser = try Parser.init(std.testing.allocator, case);
|
|
const symbol = parser.identifyOperation();
|
|
try std.testing.expectEqual(error.NoOpcode, symbol);
|
|
parser.deinit();
|
|
}
|
|
}
|
|
|
|
test "decimals are recognized" {
|
|
const test_cases = [_][]const u8{
|
|
"012314aslkfdj",
|
|
"1234567890 43",
|
|
"1234567891234567889\n123124",
|
|
};
|
|
|
|
const expected = [_]u64{
|
|
12314,
|
|
1234567890,
|
|
1234567891234567889,
|
|
};
|
|
|
|
for (0..3) |i| {
|
|
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
|
|
const symbol = try parser.identifyDecimal();
|
|
try std.testing.expectEqual(expected[i], symbol);
|
|
parser.deinit();
|
|
}
|
|
}
|
|
|
|
test "malformed decimals are not recognized" {
|
|
const test_cases = [_][]const u8{
|
|
"",
|
|
"asdf123",
|
|
" 123",
|
|
"12345678901234567890123456789012345678901234567890",
|
|
};
|
|
|
|
const expected = [_]Parser.DecimalError{
|
|
Parser.DecimalError.InvalidCharacter,
|
|
Parser.DecimalError.InvalidCharacter,
|
|
Parser.DecimalError.InvalidCharacter,
|
|
Parser.DecimalError.Overflow,
|
|
};
|
|
|
|
for (0..test_cases.len) |i| {
|
|
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
|
|
const symbol = parser.identifyDecimal();
|
|
try std.testing.expectEqual(expected[i], symbol);
|
|
parser.deinit();
|
|
}
|
|
}
|
|
|
|
test "hexadecimals are recognized" {
|
|
const test_cases = [_][]const u8{
|
|
"#012314saslkfdj",
|
|
"#1234567890abcdef 43",
|
|
"#1234567891\n123124",
|
|
};
|
|
|
|
const expected = [_]u64{
|
|
0x12314,
|
|
0x1234567890abcdef,
|
|
0x1234567891,
|
|
};
|
|
|
|
for (0..3) |i| {
|
|
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
|
|
const symbol = try parser.identifyHexadecimal();
|
|
try std.testing.expectEqual(expected[i], symbol);
|
|
parser.deinit();
|
|
}
|
|
}
|
|
|
|
test "malformed hexadecimals are not recognized" {
|
|
const test_cases = [_][]const u8{
|
|
"",
|
|
"sasdf123",
|
|
" 123",
|
|
"#12345678901234567890123456789012345678901234567890",
|
|
"#",
|
|
};
|
|
|
|
const expected = [_]Parser.HexadecimalError{
|
|
Parser.HexadecimalError.WrongStartingCharacter,
|
|
Parser.HexadecimalError.WrongStartingCharacter,
|
|
Parser.HexadecimalError.WrongStartingCharacter,
|
|
Parser.HexadecimalError.Overflow,
|
|
Parser.HexadecimalError.InvalidCharacter,
|
|
};
|
|
|
|
for (0..test_cases.len) |i| {
|
|
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
|
|
const symbol = parser.identifyHexadecimal();
|
|
try std.testing.expectEqual(expected[i], symbol);
|
|
parser.deinit();
|
|
}
|
|
}
|
|
|
|
test "characters are recognized" {
|
|
const test_cases = [_][]const u8{
|
|
"'a'",
|
|
"'1'",
|
|
"'𒀤'",
|
|
};
|
|
|
|
const expected = [_]u21{
|
|
'a',
|
|
'1',
|
|
'𒀤',
|
|
};
|
|
|
|
for (0..3) |i| {
|
|
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
|
|
const symbol = try parser.identifyChar();
|
|
try std.testing.expectEqual(expected[i], symbol);
|
|
parser.deinit();
|
|
}
|
|
}
|
|
|
|
test "invalid unicode sequences are not characters" {
|
|
const test_cases = [_][]const u8{
|
|
"'asdf'",
|
|
"'asdfg'",
|
|
"'as'",
|
|
"''",
|
|
"'",
|
|
"a",
|
|
};
|
|
|
|
const expected = [_]Parser.CharError{
|
|
Parser.CharError.NoEndingDelimiter,
|
|
Parser.CharError.NoEndingDelimiter,
|
|
Parser.CharError.NoEndingDelimiter,
|
|
Parser.CharError.NoChar,
|
|
Parser.CharError.NoChar,
|
|
Parser.CharError.NoStartingDelimiter,
|
|
};
|
|
|
|
for (0..test_cases.len) |i| {
|
|
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
|
|
const symbol = parser.identifyChar();
|
|
try std.testing.expectEqual(expected[i], symbol);
|
|
parser.deinit();
|
|
}
|
|
}
|
|
|
|
test "strings are recognized" {
|
|
const test_cases = [_][]const u8{
|
|
"\" \"",
|
|
"\"aslkdfjlaskdfj lkasjflkasjdflaksjfd''12309)($)(#$[[]𒀤\"",
|
|
};
|
|
|
|
const expected = [_][]const u8{
|
|
" ",
|
|
"aslkdfjlaskdfj lkasjflkasjdflaksjfd''12309)($)(#$[[]𒀤",
|
|
};
|
|
|
|
for (0..2) |i| {
|
|
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
|
|
const symbol = try parser.identifyString();
|
|
try std.testing.expect(std.mem.eql(u8, expected[i], symbol));
|
|
parser.deinit();
|
|
}
|
|
}
|
|
|
|
test "invalid strings are not recognized" {
|
|
const test_cases = [_][]const u8{
|
|
"\"\"",
|
|
"\"",
|
|
"\"\n\"",
|
|
"asdf",
|
|
};
|
|
|
|
const expected = [_]Parser.StringError{
|
|
Parser.StringError.NoString,
|
|
Parser.StringError.NoEndingDelimiter,
|
|
Parser.StringError.NoEndingDelimiter,
|
|
Parser.StringError.NoStartingDelimiter,
|
|
};
|
|
|
|
for (0..test_cases.len) |i| {
|
|
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
|
|
const symbol = parser.identifyString();
|
|
try std.testing.expectEqual(expected[i], symbol);
|
|
parser.deinit();
|
|
}
|
|
}
|
|
|
|
test "constants are recognized" {
|
|
const test_cases = [_][]const u8{
|
|
"1234567890 1234",
|
|
"#1234567890abcdef;%#*(",
|
|
"'a'uuuuuu",
|
|
"\"hello \"world",
|
|
};
|
|
|
|
const expected = [_]ConstantValue{
|
|
ConstantValue{ .number = 1234567890 },
|
|
ConstantValue{ .number = 0x1234567890abcdef },
|
|
ConstantValue{ .number = 'a' },
|
|
ConstantValue{ .string = "hello " },
|
|
};
|
|
|
|
for (0..4) |i| {
|
|
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
|
|
defer parser.deinit();
|
|
const symbol = try parser.identifyConstant();
|
|
switch (symbol) {
|
|
.number => try std.testing.expectEqual(expected[i].number, symbol.number),
|
|
.string => try std.testing.expect(std.mem.eql(u8, expected[i].string, symbol.string)),
|
|
}
|
|
}
|
|
}
|
|
|
|
test "invalid constants are recognized" {
|
|
const test_cases = [_][]const u8{
|
|
"$123",
|
|
" 123",
|
|
"'aa'",
|
|
};
|
|
|
|
const expected = [_]Parser.ConstantError{
|
|
Parser.ConstantError.UnexpectedSymbol,
|
|
Parser.ConstantError.UnexpectedSymbol,
|
|
Parser.ConstantError.NoEndingDelimiter,
|
|
};
|
|
|
|
for (0..test_cases.len) |i| {
|
|
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
|
|
defer parser.deinit();
|
|
const symbol = parser.identifyConstant();
|
|
try std.testing.expectEqual(expected[i], symbol);
|
|
}
|
|
}
|
|
|
|
test "basic primaries are identified" {
|
|
const test_cases = [_][]const u8{
|
|
"1234",
|
|
"@",
|
|
"'a'",
|
|
"\"hello world\"",
|
|
"+1234",
|
|
"-#1",
|
|
"~#0",
|
|
"$123",
|
|
};
|
|
|
|
const expected = [_]ExpressionResult{
|
|
ExpressionResult{ .number = NumberValue{ .pure = 1234 } },
|
|
ExpressionResult{ .number = NumberValue{ .pure = 0 } },
|
|
ExpressionResult{ .number = NumberValue{ .pure = 'a' } },
|
|
ExpressionResult{ .string = "hello world" },
|
|
ExpressionResult{ .number = NumberValue{ .pure = 1234 } },
|
|
ExpressionResult{ .number = NumberValue{ .pure = 0xFFFFFFFFFFFFFFFF } },
|
|
ExpressionResult{ .number = NumberValue{ .pure = 0xFFFFFFFFFFFFFFFF } },
|
|
ExpressionResult{ .number = NumberValue{ .register = 123 } },
|
|
};
|
|
|
|
for (0..test_cases.len) |i| {
|
|
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
|
|
defer parser.deinit();
|
|
const symbol = try parser.identifyPrimary();
|
|
switch (symbol) {
|
|
.number => try std.testing.expectEqual(expected[i].number, symbol.number),
|
|
.string => try std.testing.expect(std.mem.eql(u8, expected[i].string, symbol.string)),
|
|
}
|
|
}
|
|
}
|
|
|
|
test "invalid primaries are detected" {
|
|
const test_cases = [_][]const u8{
|
|
"$256",
|
|
"$~0",
|
|
"~\"hello\"",
|
|
};
|
|
|
|
const expected = [_]Parser.ExpressionError{
|
|
Parser.ExpressionError.Overflow,
|
|
Parser.ExpressionError.Overflow,
|
|
Parser.ExpressionError.UnaryOperationOnString,
|
|
};
|
|
|
|
for (0..test_cases.len) |i| {
|
|
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
|
|
defer parser.deinit();
|
|
const symbol = parser.identifyPrimary();
|
|
try std.testing.expectEqual(expected[i], symbol);
|
|
}
|
|
}
|
|
|
|
test "valid terms are recognized" {
|
|
const test_cases = [_][]const u8{
|
|
"12*34",
|
|
"23/2",
|
|
"2//3",
|
|
"3%2",
|
|
"1<<2",
|
|
"4>>2",
|
|
"#FF&#AA",
|
|
"$12",
|
|
"\"hello\"",
|
|
};
|
|
|
|
const expected = [_]ExpressionResult{
|
|
ExpressionResult{ .number = NumberValue{ .pure = 408 } },
|
|
ExpressionResult{ .number = NumberValue{ .pure = 11 } },
|
|
ExpressionResult{ .number = NumberValue{ .pure = 12297829382473034410 } },
|
|
ExpressionResult{ .number = NumberValue{ .pure = 1 } },
|
|
ExpressionResult{ .number = NumberValue{ .pure = 4 } },
|
|
ExpressionResult{ .number = NumberValue{ .pure = 1 } },
|
|
ExpressionResult{ .number = NumberValue{ .pure = 0xAA } },
|
|
ExpressionResult{ .number = NumberValue{ .register = 12 } },
|
|
ExpressionResult{ .string = "hello" },
|
|
};
|
|
|
|
for (0..test_cases.len) |i| {
|
|
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
|
|
defer parser.deinit();
|
|
const symbol = try parser.identifyTerm();
|
|
|
|
switch (symbol) {
|
|
.number => try std.testing.expectEqual(expected[i], symbol),
|
|
.string => try std.testing.expect(std.mem.eql(u8, expected[i].string, symbol.string)),
|
|
}
|
|
}
|
|
}
|
|
|
|
test "terms compute the entire chain" {
|
|
const test_cases = [_][]const u8{
|
|
"1*2*3*4*5",
|
|
"2*6/4",
|
|
"3*3%8<<2>>1",
|
|
};
|
|
|
|
const expected = [_]ExpressionResult{
|
|
ExpressionResult{ .number = NumberValue{ .pure = 120 } },
|
|
ExpressionResult{ .number = NumberValue{ .pure = 3 } },
|
|
ExpressionResult{ .number = NumberValue{ .pure = 2 } },
|
|
};
|
|
|
|
for (0..test_cases.len) |i| {
|
|
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
|
|
defer parser.deinit();
|
|
const symbol = try parser.identifyTerm();
|
|
try std.testing.expectEqual(expected[i], symbol);
|
|
}
|
|
}
|
|
|
|
test "strong operations do not work on registers" {
|
|
const test_cases = [_][]const u8{
|
|
"$2*3",
|
|
"1/$1",
|
|
"$1%$1",
|
|
"$1<<1",
|
|
"1>>$1",
|
|
"$1&$1",
|
|
};
|
|
|
|
for (test_cases) |case| {
|
|
var parser = try Parser.init(std.testing.allocator, case);
|
|
defer parser.deinit();
|
|
const symbol = parser.identifyTerm();
|
|
try std.testing.expectEqual(Parser.ExpressionError.IllegalOperationOnRegister, symbol);
|
|
}
|
|
}
|
|
|
|
test "strong operations do not work on strings" {
|
|
const test_cases = [_][]const u8{
|
|
"\"hello\"*1",
|
|
"1*\"hello\"",
|
|
};
|
|
|
|
for (test_cases) |case| {
|
|
var parser = try Parser.init(std.testing.allocator, case);
|
|
defer parser.deinit();
|
|
const symbol = parser.identifyTerm();
|
|
try std.testing.expectEqual(Parser.ExpressionError.BinaryOperationOnString, symbol);
|
|
}
|
|
}
|
|
|
|
test "expressions are recognized" {
|
|
const test_cases = [_][]const u8{
|
|
"1+2",
|
|
"3-2",
|
|
"2-3",
|
|
"#AA|#00",
|
|
"#AA^#FF",
|
|
"5*5+5",
|
|
"5*5+5*5",
|
|
"#ab<<32+k&~(k-1)",
|
|
"\"hello\"",
|
|
"$12",
|
|
"$$12",
|
|
};
|
|
|
|
const expected = [_]ExpressionResult{
|
|
ExpressionResult{ .number = NumberValue{ .pure = 3 } },
|
|
ExpressionResult{ .number = NumberValue{ .pure = 1 } },
|
|
ExpressionResult{ .number = NumberValue{ .pure = 0xFFFFFFFFFFFFFFFF } },
|
|
ExpressionResult{ .number = NumberValue{ .pure = 0xAA } },
|
|
ExpressionResult{ .number = NumberValue{ .pure = 0x55 } },
|
|
ExpressionResult{ .number = NumberValue{ .pure = 30 } },
|
|
ExpressionResult{ .number = NumberValue{ .pure = 50 } },
|
|
ExpressionResult{ .number = NumberValue{ .pure = 0xab00000100 } },
|
|
ExpressionResult{ .string = "hello" },
|
|
ExpressionResult{ .number = NumberValue{ .register = 12 } },
|
|
ExpressionResult{ .number = NumberValue{ .register = 12 } },
|
|
};
|
|
|
|
for (0..test_cases.len) |i| {
|
|
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
|
|
defer parser.deinit();
|
|
|
|
try parser.symbols.put("k", NumberValue{ .pure = 0xcdef00 });
|
|
|
|
const symbol = try parser.identifyExpression();
|
|
|
|
switch (symbol) {
|
|
.number => try std.testing.expectEqual(expected[i], symbol),
|
|
.string => try std.testing.expect(std.mem.eql(u8, expected[i].string, symbol.string)),
|
|
}
|
|
}
|
|
}
|
|
|
|
test "weak operations cannot be applied to strings and registers" {
|
|
const test_cases = [_][]const u8{
|
|
"$1+$1",
|
|
"2-$2",
|
|
"$3|3",
|
|
"$4^$4",
|
|
"1+\"hello\"",
|
|
"\"hello\"-2",
|
|
"3|\"hello\"^4",
|
|
};
|
|
|
|
const expected = [_]Parser.ExpressionError{
|
|
Parser.ExpressionError.IllegalOperationOnRegister,
|
|
Parser.ExpressionError.IllegalOperationOnRegister,
|
|
Parser.ExpressionError.IllegalOperationOnRegister,
|
|
Parser.ExpressionError.IllegalOperationOnRegister,
|
|
Parser.ExpressionError.BinaryOperationOnString,
|
|
Parser.ExpressionError.BinaryOperationOnString,
|
|
Parser.ExpressionError.BinaryOperationOnString,
|
|
};
|
|
|
|
for (0..test_cases.len) |i| {
|
|
var parser = try Parser.init(std.testing.allocator, test_cases[i]);
|
|
defer parser.deinit();
|
|
const symbol = parser.identifyExpression();
|
|
try std.testing.expectEqual(expected[i], symbol);
|
|
}
|
|
}
|