initial commit

This commit is contained in:
jjanzen 2025-02-26 23:23:37 -06:00
commit 0c7ae37e3f
8 changed files with 1987 additions and 0 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
/.zig-cache/
/zig-out/

91
build.zig Normal file
View file

@ -0,0 +1,91 @@
const std = @import("std");
// Although this function looks imperative, note that its job is to
// declaratively construct a build graph that will be executed by an external
// runner.
pub fn build(b: *std.Build) void {
// Standard target options allows the person running `zig build` to choose
// what target to build for. Here we do not override the defaults, which
// means any target is allowed, and the default is native. Other options
// for restricting supported target set are available.
const target = b.standardTargetOptions(.{});
// Standard optimization options allow the person running `zig build` to select
// between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
// set a preferred release mode, allowing the user to decide how to optimize.
const optimize = b.standardOptimizeOption(.{});
const lib = b.addStaticLibrary(.{
.name = "mmix-as",
// In this case the main source file is merely a path, however, in more
// complicated build scripts, this could be a generated file.
.root_source_file = b.path("src/root.zig"),
.target = target,
.optimize = optimize,
});
// This declares intent for the library to be installed into the standard
// location when the user invokes the "install" step (the default step when
// running `zig build`).
b.installArtifact(lib);
const exe = b.addExecutable(.{
.name = "mmix-as",
.root_source_file = b.path("src/main.zig"),
.target = target,
.optimize = optimize,
});
// This declares intent for the executable to be installed into the
// standard location when the user invokes the "install" step (the default
// step when running `zig build`).
b.installArtifact(exe);
// This *creates* a Run step in the build graph, to be executed when another
// step is evaluated that depends on it. The next line below will establish
// such a dependency.
const run_cmd = b.addRunArtifact(exe);
// By making the run step depend on the install step, it will be run from the
// installation directory rather than directly from within the cache directory.
// This is not necessary, however, if the application depends on other installed
// files, this ensures they will be present and in the expected location.
run_cmd.step.dependOn(b.getInstallStep());
// This allows the user to pass arguments to the application in the build
// command itself, like this: `zig build run -- arg1 arg2 etc`
if (b.args) |args| {
run_cmd.addArgs(args);
}
// This creates a build step. It will be visible in the `zig build --help` menu,
// and can be selected like this: `zig build run`
// This will evaluate the `run` step rather than the default, which is "install".
const run_step = b.step("run", "Run the app");
run_step.dependOn(&run_cmd.step);
// Creates a step for unit testing. This only builds the test executable
// but does not run it.
const lib_unit_tests = b.addTest(.{
.root_source_file = b.path("src/root.zig"),
.target = target,
.optimize = optimize,
});
const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests);
const exe_unit_tests = b.addTest(.{
.root_source_file = b.path("src/main.zig"),
.target = target,
.optimize = optimize,
});
const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests);
// Similar to creating the run step earlier, this exposes a `test` step to
// the `zig build --help` menu, providing a way for the user to request
// running the unit tests.
const test_step = b.step("test", "Run unit tests");
test_step.dependOn(&run_lib_unit_tests.step);
test_step.dependOn(&run_exe_unit_tests.step);
}

72
build.zig.zon Normal file
View file

@ -0,0 +1,72 @@
.{
// This is the default name used by packages depending on this one. For
// example, when a user runs `zig fetch --save <url>`, this field is used
// as the key in the `dependencies` table. Although the user can choose a
// different name, most users will stick with this provided value.
//
// It is redundant to include "zig" in this name because it is already
// within the Zig package namespace.
.name = "mmix-as",
// This is a [Semantic Version](https://semver.org/).
// In a future version of Zig it will be used for package deduplication.
.version = "0.0.0",
// This field is optional.
// This is currently advisory only; Zig does not yet do anything
// with this value.
//.minimum_zig_version = "0.11.0",
// This field is optional.
// Each dependency must either provide a `url` and `hash`, or a `path`.
// `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
// Once all dependencies are fetched, `zig build` no longer requires
// internet connectivity.
.dependencies = .{
// See `zig fetch --save <url>` for a command-line interface for adding dependencies.
//.example = .{
// // When updating this field to a new URL, be sure to delete the corresponding
// // `hash`, otherwise you are communicating that you expect to find the old hash at
// // the new URL.
// .url = "https://example.com/foo.tar.gz",
//
// // This is computed from the file contents of the directory of files that is
// // obtained after fetching `url` and applying the inclusion rules given by
// // `paths`.
// //
// // This field is the source of truth; packages do not come from a `url`; they
// // come from a `hash`. `url` is just one of many possible mirrors for how to
// // obtain a package matching this `hash`.
// //
// // Uses the [multihash](https://multiformats.io/multihash/) format.
// .hash = "...",
//
// // When this is provided, the package is found in a directory relative to the
// // build root. In this case the package's hash is irrelevant and therefore not
// // computed. This field and `url` are mutually exclusive.
// .path = "foo",
// // When this is set to `true`, a package is declared to be lazily
// // fetched. This makes the dependency only get fetched if it is
// // actually used.
// .lazy = false,
//},
},
// Specifies the set of files and directories that are included in this package.
// Only files and directories listed here are included in the `hash` that
// is computed for this package. Only files listed here will remain on disk
// when using the zig package manager. As a rule of thumb, one should list
// files required for compilation plus any license(s).
// Paths are relative to the build root. Use the empty string (`""`) to refer to
// the build root itself.
// A directory listed here means that all files within, recursively, are included.
.paths = .{
"build.zig",
"build.zig.zon",
"src",
// For example...
//"LICENSE",
//"README.md",
},
}

24
src/main.zig Normal file
View file

@ -0,0 +1,24 @@
const std = @import("std");
pub fn main() !void {
// Prints to stderr (it's a shortcut based on `std.io.getStdErr()`)
std.debug.print("All your {s} are belong to us.\n", .{"codebase"});
// stdout is for the actual output of your application, for example if you
// are implementing gzip, then only the compressed bytes should be sent to
// stdout, not any debugging messages.
const stdout_file = std.io.getStdOut().writer();
var bw = std.io.bufferedWriter(stdout_file);
const stdout = bw.writer();
try stdout.print("Run `zig build test` to run the tests.\n", .{});
try bw.flush(); // don't forget to flush!
}
test "simple test" {
var list = std.ArrayList(i32).init(std.testing.allocator);
defer list.deinit(); // try commenting this out and see if zig detects the memory leak!
try list.append(42);
try std.testing.expectEqual(@as(i32, 42), list.pop());
}

1100
src/opcodes.zig Normal file

File diff suppressed because it is too large Load diff

535
src/parser.zig Normal file
View file

@ -0,0 +1,535 @@
const std = @import("std");
const opcodes = @import("opcodes.zig");
/// A symbol's value can be pure or point to a register
const SymbolValueType = enum {
pure,
register,
};
const SymbolValue = union {
pure: u64,
register: u8,
};
/// A constant can be a number of a string
const ConstantType = enum {
number,
string,
};
const ConstantValue = union(ConstantType) {
number: u64,
string: []const u8,
};
/// The Parser reads a provided input and assembles it into MMIX object code
pub const Parser = struct {
allocator: std.mem.Allocator,
input: []const u8,
location: u64,
ch_pos: usize,
symbols: std.StringHashMap(SymbolValue),
object: std.ArrayList(u8),
/// Test is a character is whitespace
/// Note that newlines are special and not included in this implementation.
fn isWhitespace(ch: u8) bool {
return ch == ' ' or ch == '\t' or ch == '\r';
}
/// Test if a character is a letter
/// Note that underscores are letters for the purposes of symbol recognition.
fn isLetter(ch: u8) bool {
return ch == '_' or (ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z');
}
/// Test if a character is a decimal digit
fn isDecimal(ch: u8) bool {
return ch >= '0' and ch <= '9';
}
/// Test if a character is a hexadecimal digit
fn isHexadecimal(ch: u8) bool {
return isDecimal(ch) or (ch >= 'a' and ch <= 'f') or (ch >= 'A' and ch <= 'F');
}
/// Test if a character is a symbol character
/// Note that all valid unicode characters larger than 126 are also valid symbol characters.
fn isSymbolChar(ch: u8) bool {
return isLetter(ch) or isDecimal(ch) or ch > 126;
}
/// Get a byte from the input at a specified location
/// Return 0 if the requested byte is out of range
fn getByte(self: *Parser, pos: usize) u8 {
if (pos < self.input.len) {
return self.input[pos];
}
return 0;
}
/// Move the cursor forward until it does not point at whitespace
fn skipWhitespace(self: *Parser) void {
while (isWhitespace(self.getByte(self.ch_pos))) {
self.ch_pose += 1;
}
}
/// Determine whether the cursor points at a valid integer in base 10
/// Move the cursor past the integer and return it
fn identifyDecimal(self: *Parser) !u64 {
const start = self.ch_pos;
while (isDecimal(self.getByte(self.ch_pos))) {
self.ch_pos += 1;
}
const end = self.ch_pos;
return std.fmt.parseInt(u64, self.input[start..end], 10) catch return error.NoDecimal;
}
/// Determine whether the cursor points at a valid integer in base 16
/// Base 16 is identified by a number starting with #
/// Move the cursor past the integer and return it
fn identifyHexadecimal(self: *Parser) !u64 {
if (self.getByte(self.ch_pos) != '#') {
return error.NoHexadecimal;
}
self.ch_pos += 1;
const start = self.ch_pos;
while (isHexadecimal(self.getByte(self.ch_pos))) {
self.ch_pos += 1;
}
const end = self.ch_pos;
return std.fmt.parseInt(u64, self.input[start..end], 16) catch return error.NoHexadecimal;
}
/// Determine whether the cursor points at a valid unicode character wrapped in single quotes
/// Move the cursor past the closing quote and return the character
fn identifyChar(self: *Parser) ![]const u8 {
if (self.getByte(self.ch_pos) != '\'') {
return error.NoChar;
}
self.ch_pos += 1;
const start = self.ch_pos;
while (self.getByte(self.ch_pos) != 0 and self.getByte(self.ch_pos) != '\'') {
self.ch_pos += 1;
if (self.ch_pos - start > 4) {
return error.NoChar;
}
}
const end = self.ch_pos;
self.ch_pos += 1;
if (end <= start) {
return error.NoChar;
}
const view = std.unicode.Utf8View.init(self.input[start..end]) catch return error.NoChar;
var iter = view.iterator();
var count: u8 = 0;
while (iter.nextCodepoint()) |_| {
count += 1;
if (count > 1) {
return error.NoChar;
}
}
if (count != 1) {
return error.NoChar;
}
return self.input[start..end];
}
/// Determine whether the cursor points at a valid string wrapped in double quotes
/// Note that a string has at least one character in it and that it cannot have " or newlines in it
/// Move the cursor past the string and return the string
fn identifyString(self: *Parser) ![]const u8 {
if (self.getByte(self.ch_pos) != '"') {
return error.NoString;
}
self.ch_pos += 1;
const start = self.ch_pos;
while (self.getByte(self.ch_pos) != 0 and self.getByte(self.ch_pos) != '"') {
if (self.getByte(self.ch_pos) == '\n') {
return error.NoString;
}
self.ch_pos += 1;
}
const end = self.ch_pos;
if (self.getByte(self.ch_pos) == '"') {
self.ch_pos += 1;
}
if (end <= start) {
return error.NoString;
}
return self.input[start..end];
}
/// Determine whether the cursor points at a valid constant
/// The constant may be a string or a number
/// Move the cursor past the constant and return it
fn identifyConstant(self: *Parser) !ConstantValue {
switch (self.getByte(self.ch_pos)) {
'0'...'9' => {
const number = try identifyDecimal(self);
return ConstantValue{ .number = number };
},
'#' => {
const number = try identifyHexadecimal(self);
return ConstantValue{ .number = number };
},
'\'' => {
const string = try identifyChar(self);
return ConstantValue{ .string = string };
},
'"' => {
const string = try identifyString(self);
return ConstantValue{ .string = string };
},
else => return error.NoConstant,
}
}
/// Determine whether the cursor points at a symbol
/// A symbol starts with a letter and only has symbol characters after that point
/// There is an exception that there are 30 special symbols of the form xH, xF, and xB where x is a single decimal digit
/// Move the cursor past the symbol and return its name
fn identifySymbol(self: *Parser) ![]const u8 {
const start = self.ch_pos;
if ((isLetter(self.getByte(self.ch_pos)) or self.getByte(self.ch_pos) == '_')) {
self.ch_pos += 1;
while (isSymbolChar(self.getByte(self.ch_pos))) {
self.ch_pos += 1;
}
} else if (isDecimal(self.getByte(self.ch_pos)) and
(self.getByte(self.ch_pos + 1) == 'H' or self.getByte(self.ch_pos + 1) == 'F' or self.getByte(self.ch_pos + 1) == 'B'))
{
self.ch_pos += 2;
return self.input[self.ch_pos - 2 .. self.ch_pos];
}
const end = self.ch_pos;
if (end > start) {
return self.input[start..end];
}
return error.NoSymbol;
}
/// Determine whether the cursor points at a valid opcode or pseudo operation
/// An opcode consists solely of symbol characters (letters and numbers in fact)
/// Move the cursor past the opcode and return it
fn identifyOperation(self: *Parser) !opcodes.Operation {
const start = self.ch_pos;
while (isSymbolChar(self.getByte(self.ch_pos))) {
self.ch_pos += 1;
}
const end = self.ch_pos;
return opcodes.parseOp(self.allocator, self.input[start..end]);
}
pub fn init(allocator: std.mem.Allocator, input: []const u8) Parser {
return Parser{
.allocator = allocator,
.input = input,
.location = 0,
.ch_pos = 0,
.symbols = std.StringHashMap(SymbolValue).init(allocator),
.object = std.ArrayList(u8).init(allocator),
};
}
pub fn deinit(self: *Parser) void {
self.symbols.deinit();
self.object.deinit();
}
};
test "normal ascii characters are recognized as symbol chars" {
const chars = "qwertyuiopasdfghjklzxcvbnm1234567890QWERTYUIOPASDFGHJKLZXCVBNM_";
for (chars) |c| {
try std.testing.expect(Parser.isSymbolChar(c));
}
}
test "large unicode characters are recognized as symbol chars" {
const cuneiform = "𒀀𒀁𒀂𒀃𒀄𒀅𒀆𒀇𒀈𒀉𒀊𒀋𒀌𒀍𒀎𒀏𒀐𒀑𒀒𒀓𒀔𒀕𒀖𒀗𒀘𒀙𒀚𒀛𒀜𒀝𒀞𒀟𒀠𒀡𒀢𒀣𒀤𒀥𒀦𒀧𒀨𒀩𒀪𒀫𒀬𒀭𒀮𒀯𒀰𒀱𒈷𒌄";
for (cuneiform) |c| {
try std.testing.expect(Parser.isSymbolChar(c));
}
}
test "non-symbol characters are detected" {
const chars = "\n\r \t!@#$%^&*()-=+[]{}\\|;:'\"/?,.<>`~";
for (chars) |c| {
try std.testing.expect(!Parser.isSymbolChar(c));
}
}
test "symbols are identified" {
const test_cases = [_][]const u8{
"_asdf$%@",
"ASFLKJ3332__q5 ;asdf;lk",
"asdf𒀤𒀥𒀦\nalsfkd",
"2H",
"5F",
"0B",
};
const expected = [_][]const u8{
"_asdf",
"ASFLKJ3332__q5",
"asdf𒀤𒀥𒀦",
"2H",
"5F",
"0B",
};
for (0..6) |i| {
var parser = Parser.init(std.testing.allocator, test_cases[i]);
const symbol = try parser.identifySymbol();
try std.testing.expect(std.mem.eql(u8, expected[i], symbol));
parser.deinit();
}
}
test "no symbols are found successfully" {
const test_cases = [_][]const u8{
" _asdf",
";ASFLKJ3332__q5",
"\nasdf𒀤𒀥𒀦",
};
for (test_cases) |case| {
var parser = Parser.init(std.testing.allocator, case);
const symbol = parser.identifySymbol();
try std.testing.expectEqual(error.NoSymbol, symbol);
parser.deinit();
}
}
test "opcodes are identified" {
const test_cases = [_][]const u8{
"2ADDU%aldfk",
"GO ",
"ADD\taksfdjas",
"GREG\n",
"IS",
};
const expected = [_]opcodes.Operation{
opcodes.Operation{ .opcode = opcodes.Opcode._2ADDU },
opcodes.Operation{ .opcode = opcodes.Opcode.GO },
opcodes.Operation{ .opcode = opcodes.Opcode.ADD },
opcodes.Operation{ .pseudo_op = opcodes.PseudoOp.GREG },
opcodes.Operation{ .pseudo_op = opcodes.PseudoOp.IS },
};
for (0..5) |i| {
var parser = Parser.init(std.testing.allocator, test_cases[i]);
const op = try parser.identifyOperation();
try std.testing.expectEqual(expected[i], op);
parser.deinit();
}
}
test "no opcodes are found successfully" {
const test_cases = [_][]const u8{
" _asdf",
";ASFLKJ3332__q5",
"\nasdf𒀤𒀥𒀦",
"asdfklajsdfl",
};
for (test_cases) |case| {
var parser = Parser.init(std.testing.allocator, case);
const symbol = parser.identifyOperation();
try std.testing.expectEqual(error.NoOpcode, symbol);
parser.deinit();
}
}
test "decimals are recognized" {
const test_cases = [_][]const u8{
"012314aslkfdj",
"1234567890 43",
"1234567891234567889\n123124",
};
const expected = [_]u64{
12314,
1234567890,
1234567891234567889,
};
for (0..3) |i| {
var parser = Parser.init(std.testing.allocator, test_cases[i]);
const symbol = try parser.identifyDecimal();
try std.testing.expectEqual(expected[i], symbol);
parser.deinit();
}
}
test "malformed decimals are not recognized" {
const test_cases = [_][]const u8{
"",
"asdf123",
" 123",
"12345678901234567890123456789012345678901234567890",
};
for (test_cases) |case| {
var parser = Parser.init(std.testing.allocator, case);
const symbol = parser.identifyDecimal();
try std.testing.expectEqual(error.NoDecimal, symbol);
parser.deinit();
}
}
test "hexadecimals are recognized" {
const test_cases = [_][]const u8{
"#012314saslkfdj",
"#1234567890abcdef 43",
"#1234567891\n123124",
};
const expected = [_]u64{
0x12314,
0x1234567890abcdef,
0x1234567891,
};
for (0..3) |i| {
var parser = Parser.init(std.testing.allocator, test_cases[i]);
const symbol = try parser.identifyHexadecimal();
try std.testing.expectEqual(expected[i], symbol);
parser.deinit();
}
}
test "malformed hexadecimals are not recognized" {
const test_cases = [_][]const u8{
"",
"sasdf123",
" 123",
"#12345678901234567890123456789012345678901234567890",
"#",
};
for (test_cases) |case| {
var parser = Parser.init(std.testing.allocator, case);
const symbol = parser.identifyHexadecimal();
try std.testing.expectEqual(error.NoHexadecimal, symbol);
parser.deinit();
}
}
test "characters are recognized" {
const test_cases = [_][]const u8{
"'a'",
"'1'",
"'𒀤'",
};
const expected = [_][]const u8{
"a",
"1",
"𒀤",
};
for (0..3) |i| {
var parser = Parser.init(std.testing.allocator, test_cases[i]);
const symbol = try parser.identifyChar();
try std.testing.expect(std.mem.eql(u8, expected[i], symbol));
parser.deinit();
}
}
test "invalid unicode sequences are not characters" {
const test_cases = [_][]const u8{
"'asdf'",
"'asdfg'",
"'as'",
"''",
"'",
};
for (test_cases) |case| {
var parser = Parser.init(std.testing.allocator, case);
const symbol = parser.identifyChar();
try std.testing.expectEqual(error.NoChar, symbol);
parser.deinit();
}
}
test "strings are recognized" {
const test_cases = [_][]const u8{
"\" \"",
"\"aslkdfjlaskdfj lkasjflkasjdflaksjfd''12309)($)(#$[[]𒀤\"",
};
const expected = [_][]const u8{
" ",
"aslkdfjlaskdfj lkasjflkasjdflaksjfd''12309)($)(#$[[]𒀤",
};
for (0..2) |i| {
var parser = Parser.init(std.testing.allocator, test_cases[i]);
const symbol = try parser.identifyString();
try std.testing.expect(std.mem.eql(u8, expected[i], symbol));
parser.deinit();
}
}
test "invalid strings are not recognized" {
const test_cases = [_][]const u8{
"\"\"",
"\"",
"\"\n\"",
};
for (test_cases) |case| {
var parser = Parser.init(std.testing.allocator, case);
const symbol = parser.identifyString();
try std.testing.expectEqual(error.NoString, symbol);
parser.deinit();
}
}
test "constants are recognized" {
const test_cases = [_][]const u8{
"1234567890 1234",
"#1234567890abcdef;%#*(",
"'a'uuuuuu",
"\"hello \"world",
};
const expected = [_]ConstantValue{
ConstantValue{ .number = 1234567890 },
ConstantValue{ .number = 0x1234567890abcdef },
ConstantValue{ .string = "a" },
ConstantValue{ .string = "hello " },
};
for (0..4) |i| {
var parser = Parser.init(std.testing.allocator, test_cases[i]);
const symbol = try parser.identifyConstant();
switch (symbol) {
.number => try std.testing.expectEqual(expected[i].number, symbol.number),
.string => try std.testing.expect(std.mem.eql(u8, expected[i].string, symbol.string)),
}
}
}

10
src/root.zig Normal file
View file

@ -0,0 +1,10 @@
const std = @import("std");
const testing = std.testing;
const opcodes = @import("opcodes.zig");
const parser = @import("parser.zig");
test {
_ = opcodes;
_ = parser;
}

153
src/temp.py Normal file
View file

@ -0,0 +1,153 @@
x = [
"TRAP",
"FCMP",
"FUN",
"FEQL",
"FADD",
"FIX",
"FSUB",
"FIXU",
"FLOT",
"FLOTU",
"SFLOT",
"SFLOTU",
"FMUL",
"FCMPE",
"FUNE",
"FEQLE",
"FDIV",
"FSQRT",
"FREM",
"FINT",
"MUL",
"MULU",
"DIV",
"DIVU",
"ADD",
"ADDU",
"SUB",
"SUBU",
"2ADDU",
"4ADDU",
"8ADDU",
"16ADDU",
"CMP",
"CMPU",
"NEG",
"NEGU",
"SL",
"SLU",
"SR",
"SRU",
"BN",
"BZ",
"BP",
"BOD",
"BNN",
"BNZ",
"BNP",
"BEV",
"PBN",
"PBZ",
"PBP",
"PBOD",
"PBNN",
"PBNZ",
"PBNP",
"PBEV",
"CSN",
"CSZ",
"CSP",
"CSOD",
"CSNN",
"CSNZ",
"CSNP",
"CSEV",
"ZSN",
"ZSZ",
"ZSP",
"ZSOD",
"ZSNN",
"ZSNZ",
"ZSNP",
"ZSEV",
"LDB",
"LDBU",
"LDW",
"LDWU",
"LDT",
"LDTU",
"LDO",
"LDOU",
"LDSF",
"LDHT",
"CSWAP",
"LDUNC",
"LDVTS",
"PRELD",
"PREGO",
"GO",
"STB",
"STBU",
"STW",
"STWU",
"STT",
"STTU",
"STO",
"STOU",
"STSF",
"STHT",
"STCO",
"STUNC",
"SYNCD",
"PREST",
"SYNCID",
"PUSHGO",
"OR",
"ORN",
"NOR",
"XOR",
"AND",
"ANDN",
"NAND",
"NXOR",
"BDIF",
"WDIF",
"TDIF",
"ODIF",
"MUX",
"SADD",
"MOR",
"MXOR",
"SETH",
"SETMH",
"SETML",
"SETL",
"INCH",
"INCMH",
"INCML",
"INCL",
"ORH",
"ORMH",
"ORML",
"ORL",
"ANDNH",
"ANDNMH",
"ANDNML",
"ANDNL",
"JMP",
"PUSHJ",
"GETA",
"PUT",
"POP",
"RESUME",
"SAVE",
"UNSAVE",
"SYNC",
"SWYM",
"GET",
"TRIP",
]
for val in x:
print(f"Operation{{.opcode = Opcode.{val}}},")