aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rw-r--r--build.zig91
-rw-r--r--build.zig.zon72
-rw-r--r--src/main.zig24
-rw-r--r--src/opcodes.zig1100
-rw-r--r--src/parser.zig535
-rw-r--r--src/root.zig10
-rw-r--r--src/temp.py153
8 files changed, 1987 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..db3cb54
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+/.zig-cache/
+/zig-out/
diff --git a/build.zig b/build.zig
new file mode 100644
index 0000000..720cddd
--- /dev/null
+++ b/build.zig
@@ -0,0 +1,91 @@
+const std = @import("std");
+
+// Although this function looks imperative, note that its job is to
+// declaratively construct a build graph that will be executed by an external
+// runner.
+pub fn build(b: *std.Build) void {
+ // Standard target options allows the person running `zig build` to choose
+ // what target to build for. Here we do not override the defaults, which
+ // means any target is allowed, and the default is native. Other options
+ // for restricting supported target set are available.
+ const target = b.standardTargetOptions(.{});
+
+ // Standard optimization options allow the person running `zig build` to select
+ // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
+ // set a preferred release mode, allowing the user to decide how to optimize.
+ const optimize = b.standardOptimizeOption(.{});
+
+ const lib = b.addStaticLibrary(.{
+ .name = "mmix-as",
+ // In this case the main source file is merely a path, however, in more
+ // complicated build scripts, this could be a generated file.
+ .root_source_file = b.path("src/root.zig"),
+ .target = target,
+ .optimize = optimize,
+ });
+
+ // This declares intent for the library to be installed into the standard
+ // location when the user invokes the "install" step (the default step when
+ // running `zig build`).
+ b.installArtifact(lib);
+
+ const exe = b.addExecutable(.{
+ .name = "mmix-as",
+ .root_source_file = b.path("src/main.zig"),
+ .target = target,
+ .optimize = optimize,
+ });
+
+ // This declares intent for the executable to be installed into the
+ // standard location when the user invokes the "install" step (the default
+ // step when running `zig build`).
+ b.installArtifact(exe);
+
+ // This *creates* a Run step in the build graph, to be executed when another
+ // step is evaluated that depends on it. The next line below will establish
+ // such a dependency.
+ const run_cmd = b.addRunArtifact(exe);
+
+ // By making the run step depend on the install step, it will be run from the
+ // installation directory rather than directly from within the cache directory.
+ // This is not necessary, however, if the application depends on other installed
+ // files, this ensures they will be present and in the expected location.
+ run_cmd.step.dependOn(b.getInstallStep());
+
+ // This allows the user to pass arguments to the application in the build
+ // command itself, like this: `zig build run -- arg1 arg2 etc`
+ if (b.args) |args| {
+ run_cmd.addArgs(args);
+ }
+
+ // This creates a build step. It will be visible in the `zig build --help` menu,
+ // and can be selected like this: `zig build run`
+ // This will evaluate the `run` step rather than the default, which is "install".
+ const run_step = b.step("run", "Run the app");
+ run_step.dependOn(&run_cmd.step);
+
+ // Creates a step for unit testing. This only builds the test executable
+ // but does not run it.
+ const lib_unit_tests = b.addTest(.{
+ .root_source_file = b.path("src/root.zig"),
+ .target = target,
+ .optimize = optimize,
+ });
+
+ const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests);
+
+ const exe_unit_tests = b.addTest(.{
+ .root_source_file = b.path("src/main.zig"),
+ .target = target,
+ .optimize = optimize,
+ });
+
+ const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests);
+
+ // Similar to creating the run step earlier, this exposes a `test` step to
+ // the `zig build --help` menu, providing a way for the user to request
+ // running the unit tests.
+ const test_step = b.step("test", "Run unit tests");
+ test_step.dependOn(&run_lib_unit_tests.step);
+ test_step.dependOn(&run_exe_unit_tests.step);
+}
diff --git a/build.zig.zon b/build.zig.zon
new file mode 100644
index 0000000..3a2334c
--- /dev/null
+++ b/build.zig.zon
@@ -0,0 +1,72 @@
+.{
+ // This is the default name used by packages depending on this one. For
+ // example, when a user runs `zig fetch --save <url>`, this field is used
+ // as the key in the `dependencies` table. Although the user can choose a
+ // different name, most users will stick with this provided value.
+ //
+ // It is redundant to include "zig" in this name because it is already
+ // within the Zig package namespace.
+ .name = "mmix-as",
+
+ // This is a [Semantic Version](https://semver.org/).
+ // In a future version of Zig it will be used for package deduplication.
+ .version = "0.0.0",
+
+ // This field is optional.
+ // This is currently advisory only; Zig does not yet do anything
+ // with this value.
+ //.minimum_zig_version = "0.11.0",
+
+ // This field is optional.
+ // Each dependency must either provide a `url` and `hash`, or a `path`.
+ // `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
+ // Once all dependencies are fetched, `zig build` no longer requires
+ // internet connectivity.
+ .dependencies = .{
+ // See `zig fetch --save <url>` for a command-line interface for adding dependencies.
+ //.example = .{
+ // // When updating this field to a new URL, be sure to delete the corresponding
+ // // `hash`, otherwise you are communicating that you expect to find the old hash at
+ // // the new URL.
+ // .url = "https://example.com/foo.tar.gz",
+ //
+ // // This is computed from the file contents of the directory of files that is
+ // // obtained after fetching `url` and applying the inclusion rules given by
+ // // `paths`.
+ // //
+ // // This field is the source of truth; packages do not come from a `url`; they
+ // // come from a `hash`. `url` is just one of many possible mirrors for how to
+ // // obtain a package matching this `hash`.
+ // //
+ // // Uses the [multihash](https://multiformats.io/multihash/) format.
+ // .hash = "...",
+ //
+ // // When this is provided, the package is found in a directory relative to the
+ // // build root. In this case the package's hash is irrelevant and therefore not
+ // // computed. This field and `url` are mutually exclusive.
+ // .path = "foo",
+
+ // // When this is set to `true`, a package is declared to be lazily
+ // // fetched. This makes the dependency only get fetched if it is
+ // // actually used.
+ // .lazy = false,
+ //},
+ },
+
+ // Specifies the set of files and directories that are included in this package.
+ // Only files and directories listed here are included in the `hash` that
+ // is computed for this package. Only files listed here will remain on disk
+ // when using the zig package manager. As a rule of thumb, one should list
+ // files required for compilation plus any license(s).
+ // Paths are relative to the build root. Use the empty string (`""`) to refer to
+ // the build root itself.
+ // A directory listed here means that all files within, recursively, are included.
+ .paths = .{
+ "build.zig",
+ "build.zig.zon",
+ "src",
+ // For example...
+ //"LICENSE",
+ //"README.md",
+ },
+}
diff --git a/src/main.zig b/src/main.zig
new file mode 100644
index 0000000..c8a3f67
--- /dev/null
+++ b/src/main.zig
@@ -0,0 +1,24 @@
+const std = @import("std");
+
+pub fn main() !void {
+ // Prints to stderr (it's a shortcut based on `std.io.getStdErr()`)
+ std.debug.print("All your {s} are belong to us.\n", .{"codebase"});
+
+ // stdout is for the actual output of your application, for example if you
+ // are implementing gzip, then only the compressed bytes should be sent to
+ // stdout, not any debugging messages.
+ const stdout_file = std.io.getStdOut().writer();
+ var bw = std.io.bufferedWriter(stdout_file);
+ const stdout = bw.writer();
+
+ try stdout.print("Run `zig build test` to run the tests.\n", .{});
+
+ try bw.flush(); // don't forget to flush!
+}
+
+test "simple test" {
+ var list = std.ArrayList(i32).init(std.testing.allocator);
+ defer list.deinit(); // try commenting this out and see if zig detects the memory leak!
+ try list.append(42);
+ try std.testing.expectEqual(@as(i32, 42), list.pop());
+}
diff --git a/src/opcodes.zig b/src/opcodes.zig
new file mode 100644
index 0000000..7f43547
--- /dev/null
+++ b/src/opcodes.zig
@@ -0,0 +1,1100 @@
+const std = @import("std");
+
+pub const PseudoOp = enum {
+ IS,
+ GREG,
+ LOC,
+ BYTE,
+ WYDE,
+ TETRA,
+ OCTA,
+};
+
+pub const Opcode = enum(u8) {
+ TRAP,
+ FCMP,
+ FUN,
+ FEQL,
+ FADD,
+ FIX,
+ FSUB,
+ FIXU,
+ FLOT,
+ FLOTI,
+ FLOTU,
+ FLOTUI,
+ SFLOT,
+ SFLOTI,
+ SFLOTU,
+ SFLOTUI,
+ FMUL,
+ FCMPE,
+ FUNE,
+ FEQLE,
+ FDIV,
+ FSQRT,
+ FREM,
+ FINT,
+ MUL,
+ MULI,
+ MULU,
+ MULUI,
+ DIV,
+ DIVI,
+ DIVU,
+ DIVUI,
+ ADD,
+ ADDI,
+ ADDU,
+ ADDUI,
+ SUB,
+ SUBI,
+ SUBU,
+ SUBUI,
+ _2ADDU,
+ _2ADDUI,
+ _4ADDU,
+ _4ADDUI,
+ _8ADDU,
+ _8ADDUI,
+ _16ADDU,
+ _16ADDUI,
+ CMP,
+ CMPI,
+ CMPU,
+ CMPUI,
+ NEG,
+ NEGI,
+ NEGU,
+ NEGUI,
+ SL,
+ SLI,
+ SLU,
+ SLUI,
+ SR,
+ SRI,
+ SRU,
+ SRUI,
+ BN,
+ BNB,
+ BZ,
+ BZB,
+ BP,
+ BPB,
+ BOD,
+ BODB,
+ BNN,
+ BNNB,
+ BNZ,
+ BNZB,
+ BNP,
+ BNPB,
+ BEV,
+ BEVB,
+ PBN,
+ PBNB,
+ PBZ,
+ PBZB,
+ PBP,
+ PBPB,
+ PBOD,
+ PBODB,
+ PBNN,
+ PBNNB,
+ PBNZ,
+ PBNZB,
+ PBNP,
+ PBNPB,
+ PBEV,
+ PBEVB,
+ CSN,
+ CSNI,
+ CSZ,
+ CSZI,
+ CSP,
+ CSPI,
+ CSOD,
+ CSODI,
+ CSNN,
+ CSNNI,
+ CSNZ,
+ CSNZI,
+ CSNP,
+ CSNPI,
+ CSEV,
+ CSEVI,
+ ZSN,
+ ZSNI,
+ ZSZ,
+ ZSZI,
+ ZSP,
+ ZSPI,
+ ZSOD,
+ ZSODI,
+ ZSNN,
+ ZSNNI,
+ ZSNZ,
+ ZSNZI,
+ ZSNP,
+ ZSNPI,
+ ZSEV,
+ ZSEVI,
+ LDB,
+ LDBI,
+ LDBU,
+ LDBUI,
+ LDW,
+ LDWI,
+ LDWU,
+ LDWUI,
+ LDT,
+ LDTI,
+ LDTU,
+ LDTUI,
+ LDO,
+ LDOI,
+ LDOU,
+ LDOUI,
+ LDSF,
+ LDSFI,
+ LDHT,
+ LDHTI,
+ CSWAP,
+ CSWAPI,
+ LDUNC,
+ LDUNCI,
+ LDVTS,
+ LDVTSI,
+ PRELD,
+ PRELDI,
+ PREGO,
+ PREGOI,
+ GO,
+ GOI,
+ STB,
+ STBI,
+ STBU,
+ STBUI,
+ STW,
+ STWI,
+ STWU,
+ STWUI,
+ STT,
+ STTI,
+ STTU,
+ STTUI,
+ STO,
+ STOI,
+ STOU,
+ STOUI,
+ STSF,
+ STSFI,
+ STHT,
+ STHTI,
+ STCO,
+ STCOI,
+ STUNC,
+ STUNCI,
+ SYNCD,
+ SYNCDI,
+ PREST,
+ PRESTI,
+ SYNCID,
+ SYNCIDI,
+ PUSHGO,
+ PUSHGOI,
+ OR,
+ ORI,
+ ORN,
+ ORNI,
+ NOR,
+ NORI,
+ XOR,
+ XORI,
+ AND,
+ ANDI,
+ ANDN,
+ ANDNI,
+ NAND,
+ NANDI,
+ NXOR,
+ NXORI,
+ BDIF,
+ BDIFI,
+ WDIF,
+ WDIFI,
+ TDIF,
+ TDIFI,
+ ODIF,
+ ODIFI,
+ MUX,
+ MUXI,
+ SADD,
+ SADDI,
+ MOR,
+ MORI,
+ MXOR,
+ MXORI,
+ SETH,
+ SETMH,
+ SETML,
+ SETL,
+ INCH,
+ INCMH,
+ INCML,
+ INCL,
+ ORH,
+ ORMH,
+ ORML,
+ ORL,
+ ANDNH,
+ ANDNMH,
+ ANDNML,
+ ANDNL,
+ JMP,
+ JMPB,
+ PUSHJ,
+ PUSHJB,
+ GETA,
+ GETAB,
+ PUT,
+ PUTI,
+ POP,
+ RESUME,
+ SAVE,
+ UNSAVE,
+ SYNC,
+ SWYM,
+ GET,
+ TRIP,
+};
+
+pub const OperationType = enum {
+ pseudo_op,
+ opcode,
+};
+
+pub const Operation = union(OperationType) {
+ pseudo_op: PseudoOp,
+ opcode: Opcode,
+};
+
+pub fn parseOp(allocator: std.mem.Allocator, text: []const u8) !Operation {
+ const upper_str = try std.ascii.allocUpperString(allocator, text);
+
+ var out: Operation = undefined;
+ const opcode = getOpcode(upper_str) catch |err| {
+ if (err == error.NoOpcode) {
+ if (std.mem.eql(u8, upper_str, "IS")) {
+ out = Operation{ .pseudo_op = PseudoOp.IS };
+ } else if (std.mem.eql(u8, upper_str, "GREG")) {
+ out = Operation{ .pseudo_op = PseudoOp.GREG };
+ } else if (std.mem.eql(u8, upper_str, "LOC")) {
+ out = Operation{ .pseudo_op = PseudoOp.LOC };
+ } else if (std.mem.eql(u8, upper_str, "BYTE")) {
+ out = Operation{ .pseudo_op = PseudoOp.BYTE };
+ } else if (std.mem.eql(u8, upper_str, "WYDE")) {
+ out = Operation{ .pseudo_op = PseudoOp.WYDE };
+ } else if (std.mem.eql(u8, upper_str, "TETRA")) {
+ out = Operation{ .pseudo_op = PseudoOp.TETRA };
+ } else if (std.mem.eql(u8, upper_str, "OCTA")) {
+ out = Operation{ .pseudo_op = PseudoOp.OCTA };
+ } else {
+ allocator.free(upper_str);
+ return err;
+ }
+
+ allocator.free(upper_str);
+
+ return out;
+ }
+ allocator.free(upper_str);
+
+ return err;
+ };
+
+ allocator.free(upper_str);
+ return Operation{ .opcode = opcode };
+}
+
+fn getOpcode(upper_str: []const u8) !Opcode {
+ if (std.mem.eql(u8, upper_str, "TRAP")) {
+ return Opcode.TRAP;
+ }
+ if (std.mem.eql(u8, upper_str, "FCMP")) {
+ return Opcode.FCMP;
+ }
+ if (std.mem.eql(u8, upper_str, "FUN")) {
+ return Opcode.FUN;
+ }
+ if (std.mem.eql(u8, upper_str, "FEQL")) {
+ return Opcode.FEQL;
+ }
+ if (std.mem.eql(u8, upper_str, "FADD")) {
+ return Opcode.FADD;
+ }
+ if (std.mem.eql(u8, upper_str, "FIX")) {
+ return Opcode.FIX;
+ }
+ if (std.mem.eql(u8, upper_str, "FSUB")) {
+ return Opcode.FSUB;
+ }
+ if (std.mem.eql(u8, upper_str, "FIXU")) {
+ return Opcode.FIXU;
+ }
+ if (std.mem.eql(u8, upper_str, "FLOT")) {
+ return Opcode.FLOT;
+ }
+ if (std.mem.eql(u8, upper_str, "FLOTU")) {
+ return Opcode.FLOTU;
+ }
+ if (std.mem.eql(u8, upper_str, "SFLOT")) {
+ return Opcode.SFLOT;
+ }
+ if (std.mem.eql(u8, upper_str, "SFLOTU")) {
+ return Opcode.SFLOTU;
+ }
+ if (std.mem.eql(u8, upper_str, "FMUL")) {
+ return Opcode.FMUL;
+ }
+ if (std.mem.eql(u8, upper_str, "FCMPE")) {
+ return Opcode.FCMPE;
+ }
+ if (std.mem.eql(u8, upper_str, "FUNE")) {
+ return Opcode.FUNE;
+ }
+ if (std.mem.eql(u8, upper_str, "FEQLE")) {
+ return Opcode.FEQLE;
+ }
+ if (std.mem.eql(u8, upper_str, "FDIV")) {
+ return Opcode.FDIV;
+ }
+ if (std.mem.eql(u8, upper_str, "FSQRT")) {
+ return Opcode.FSQRT;
+ }
+ if (std.mem.eql(u8, upper_str, "FREM")) {
+ return Opcode.FREM;
+ }
+ if (std.mem.eql(u8, upper_str, "FINT")) {
+ return Opcode.FINT;
+ }
+ if (std.mem.eql(u8, upper_str, "MUL")) {
+ return Opcode.MUL;
+ }
+ if (std.mem.eql(u8, upper_str, "MULU")) {
+ return Opcode.MULU;
+ }
+ if (std.mem.eql(u8, upper_str, "DIV")) {
+ return Opcode.DIV;
+ }
+ if (std.mem.eql(u8, upper_str, "DIVU")) {
+ return Opcode.DIVU;
+ }
+ if (std.mem.eql(u8, upper_str, "ADD")) {
+ return Opcode.ADD;
+ }
+ if (std.mem.eql(u8, upper_str, "ADDU")) {
+ return Opcode.ADDU;
+ }
+ if (std.mem.eql(u8, upper_str, "SUB")) {
+ return Opcode.SUB;
+ }
+ if (std.mem.eql(u8, upper_str, "SUBU")) {
+ return Opcode.SUBU;
+ }
+ if (std.mem.eql(u8, upper_str, "2ADDU")) {
+ return Opcode._2ADDU;
+ }
+ if (std.mem.eql(u8, upper_str, "4ADDU")) {
+ return Opcode._4ADDU;
+ }
+ if (std.mem.eql(u8, upper_str, "8ADDU")) {
+ return Opcode._8ADDU;
+ }
+ if (std.mem.eql(u8, upper_str, "16ADDU")) {
+ return Opcode._16ADDU;
+ }
+ if (std.mem.eql(u8, upper_str, "CMP")) {
+ return Opcode.CMP;
+ }
+ if (std.mem.eql(u8, upper_str, "CMPU")) {
+ return Opcode.CMPU;
+ }
+ if (std.mem.eql(u8, upper_str, "NEG")) {
+ return Opcode.NEG;
+ }
+ if (std.mem.eql(u8, upper_str, "NEGU")) {
+ return Opcode.NEGU;
+ }
+ if (std.mem.eql(u8, upper_str, "SL")) {
+ return Opcode.SL;
+ }
+ if (std.mem.eql(u8, upper_str, "SLU")) {
+ return Opcode.SLU;
+ }
+ if (std.mem.eql(u8, upper_str, "SR")) {
+ return Opcode.SR;
+ }
+ if (std.mem.eql(u8, upper_str, "SRU")) {
+ return Opcode.SRU;
+ }
+ if (std.mem.eql(u8, upper_str, "BN")) {
+ return Opcode.BN;
+ }
+ if (std.mem.eql(u8, upper_str, "BZ")) {
+ return Opcode.BZ;
+ }
+ if (std.mem.eql(u8, upper_str, "BP")) {
+ return Opcode.BP;
+ }
+ if (std.mem.eql(u8, upper_str, "BOD")) {
+ return Opcode.BOD;
+ }
+ if (std.mem.eql(u8, upper_str, "BNN")) {
+ return Opcode.BNN;
+ }
+ if (std.mem.eql(u8, upper_str, "BNZ")) {
+ return Opcode.BNZ;
+ }
+ if (std.mem.eql(u8, upper_str, "BNP")) {
+ return Opcode.BNP;
+ }
+ if (std.mem.eql(u8, upper_str, "BEV")) {
+ return Opcode.BEV;
+ }
+ if (std.mem.eql(u8, upper_str, "PBN")) {
+ return Opcode.PBN;
+ }
+ if (std.mem.eql(u8, upper_str, "PBZ")) {
+ return Opcode.PBZ;
+ }
+ if (std.mem.eql(u8, upper_str, "PBP")) {
+ return Opcode.PBP;
+ }
+ if (std.mem.eql(u8, upper_str, "PBOD")) {
+ return Opcode.PBOD;
+ }
+ if (std.mem.eql(u8, upper_str, "PBNN")) {
+ return Opcode.PBNN;
+ }
+ if (std.mem.eql(u8, upper_str, "PBNZ")) {
+ return Opcode.PBNZ;
+ }
+ if (std.mem.eql(u8, upper_str, "PBNP")) {
+ return Opcode.PBNP;
+ }
+ if (std.mem.eql(u8, upper_str, "PBEV")) {
+ return Opcode.PBEV;
+ }
+ if (std.mem.eql(u8, upper_str, "CSN")) {
+ return Opcode.CSN;
+ }
+ if (std.mem.eql(u8, upper_str, "CSZ")) {
+ return Opcode.CSZ;
+ }
+ if (std.mem.eql(u8, upper_str, "CSP")) {
+ return Opcode.CSP;
+ }
+ if (std.mem.eql(u8, upper_str, "CSOD")) {
+ return Opcode.CSOD;
+ }
+ if (std.mem.eql(u8, upper_str, "CSNN")) {
+ return Opcode.CSNN;
+ }
+ if (std.mem.eql(u8, upper_str, "CSNZ")) {
+ return Opcode.CSNZ;
+ }
+ if (std.mem.eql(u8, upper_str, "CSNP")) {
+ return Opcode.CSNP;
+ }
+ if (std.mem.eql(u8, upper_str, "CSEV")) {
+ return Opcode.CSEV;
+ }
+ if (std.mem.eql(u8, upper_str, "ZSN")) {
+ return Opcode.ZSN;
+ }
+ if (std.mem.eql(u8, upper_str, "ZSZ")) {
+ return Opcode.ZSZ;
+ }
+ if (std.mem.eql(u8, upper_str, "ZSP")) {
+ return Opcode.ZSP;
+ }
+ if (std.mem.eql(u8, upper_str, "ZSOD")) {
+ return Opcode.ZSOD;
+ }
+ if (std.mem.eql(u8, upper_str, "ZSNN")) {
+ return Opcode.ZSNN;
+ }
+ if (std.mem.eql(u8, upper_str, "ZSNZ")) {
+ return Opcode.ZSNZ;
+ }
+ if (std.mem.eql(u8, upper_str, "ZSNP")) {
+ return Opcode.ZSNP;
+ }
+ if (std.mem.eql(u8, upper_str, "ZSEV")) {
+ return Opcode.ZSEV;
+ }
+ if (std.mem.eql(u8, upper_str, "LDB")) {
+ return Opcode.LDB;
+ }
+ if (std.mem.eql(u8, upper_str, "LDBU")) {
+ return Opcode.LDBU;
+ }
+ if (std.mem.eql(u8, upper_str, "LDW")) {
+ return Opcode.LDW;
+ }
+ if (std.mem.eql(u8, upper_str, "LDWU")) {
+ return Opcode.LDWU;
+ }
+ if (std.mem.eql(u8, upper_str, "LDT")) {
+ return Opcode.LDT;
+ }
+ if (std.mem.eql(u8, upper_str, "LDTU")) {
+ return Opcode.LDTU;
+ }
+ if (std.mem.eql(u8, upper_str, "LDO")) {
+ return Opcode.LDO;
+ }
+ if (std.mem.eql(u8, upper_str, "LDOU")) {
+ return Opcode.LDOU;
+ }
+ if (std.mem.eql(u8, upper_str, "LDSF")) {
+ return Opcode.LDSF;
+ }
+ if (std.mem.eql(u8, upper_str, "LDHT")) {
+ return Opcode.LDHT;
+ }
+ if (std.mem.eql(u8, upper_str, "CSWAP")) {
+ return Opcode.CSWAP;
+ }
+ if (std.mem.eql(u8, upper_str, "LDUNC")) {
+ return Opcode.LDUNC;
+ }
+ if (std.mem.eql(u8, upper_str, "LDVTS")) {
+ return Opcode.LDVTS;
+ }
+ if (std.mem.eql(u8, upper_str, "PRELD")) {
+ return Opcode.PRELD;
+ }
+ if (std.mem.eql(u8, upper_str, "PREGO")) {
+ return Opcode.PREGO;
+ }
+ if (std.mem.eql(u8, upper_str, "GO")) {
+ return Opcode.GO;
+ }
+ if (std.mem.eql(u8, upper_str, "STB")) {
+ return Opcode.STB;
+ }
+ if (std.mem.eql(u8, upper_str, "STBU")) {
+ return Opcode.STBU;
+ }
+ if (std.mem.eql(u8, upper_str, "STW")) {
+ return Opcode.STW;
+ }
+ if (std.mem.eql(u8, upper_str, "STWU")) {
+ return Opcode.STWU;
+ }
+ if (std.mem.eql(u8, upper_str, "STT")) {
+ return Opcode.STT;
+ }
+ if (std.mem.eql(u8, upper_str, "STTU")) {
+ return Opcode.STTU;
+ }
+ if (std.mem.eql(u8, upper_str, "STO")) {
+ return Opcode.STO;
+ }
+ if (std.mem.eql(u8, upper_str, "STOU")) {
+ return Opcode.STOU;
+ }
+ if (std.mem.eql(u8, upper_str, "STSF")) {
+ return Opcode.STSF;
+ }
+ if (std.mem.eql(u8, upper_str, "STHT")) {
+ return Opcode.STHT;
+ }
+ if (std.mem.eql(u8, upper_str, "STCO")) {
+ return Opcode.STCO;
+ }
+ if (std.mem.eql(u8, upper_str, "STUNC")) {
+ return Opcode.STUNC;
+ }
+ if (std.mem.eql(u8, upper_str, "SYNCD")) {
+ return Opcode.SYNCD;
+ }
+ if (std.mem.eql(u8, upper_str, "PREST")) {
+ return Opcode.PREST;
+ }
+ if (std.mem.eql(u8, upper_str, "SYNCID")) {
+ return Opcode.SYNCID;
+ }
+ if (std.mem.eql(u8, upper_str, "PUSHGO")) {
+ return Opcode.PUSHGO;
+ }
+ if (std.mem.eql(u8, upper_str, "OR")) {
+ return Opcode.OR;
+ }
+ if (std.mem.eql(u8, upper_str, "ORN")) {
+ return Opcode.ORN;
+ }
+ if (std.mem.eql(u8, upper_str, "NOR")) {
+ return Opcode.NOR;
+ }
+ if (std.mem.eql(u8, upper_str, "XOR")) {
+ return Opcode.XOR;
+ }
+ if (std.mem.eql(u8, upper_str, "AND")) {
+ return Opcode.AND;
+ }
+ if (std.mem.eql(u8, upper_str, "ANDN")) {
+ return Opcode.ANDN;
+ }
+ if (std.mem.eql(u8, upper_str, "NAND")) {
+ return Opcode.NAND;
+ }
+ if (std.mem.eql(u8, upper_str, "NXOR")) {
+ return Opcode.NXOR;
+ }
+ if (std.mem.eql(u8, upper_str, "BDIF")) {
+ return Opcode.BDIF;
+ }
+ if (std.mem.eql(u8, upper_str, "WDIF")) {
+ return Opcode.WDIF;
+ }
+ if (std.mem.eql(u8, upper_str, "TDIF")) {
+ return Opcode.TDIF;
+ }
+ if (std.mem.eql(u8, upper_str, "ODIF")) {
+ return Opcode.ODIF;
+ }
+ if (std.mem.eql(u8, upper_str, "MUX")) {
+ return Opcode.MUX;
+ }
+ if (std.mem.eql(u8, upper_str, "SADD")) {
+ return Opcode.SADD;
+ }
+ if (std.mem.eql(u8, upper_str, "MOR")) {
+ return Opcode.MOR;
+ }
+ if (std.mem.eql(u8, upper_str, "MXOR")) {
+ return Opcode.MXOR;
+ }
+ if (std.mem.eql(u8, upper_str, "SETH")) {
+ return Opcode.SETH;
+ }
+ if (std.mem.eql(u8, upper_str, "SETMH")) {
+ return Opcode.SETMH;
+ }
+ if (std.mem.eql(u8, upper_str, "SETML")) {
+ return Opcode.SETML;
+ }
+ if (std.mem.eql(u8, upper_str, "SETL")) {
+ return Opcode.SETL;
+ }
+ if (std.mem.eql(u8, upper_str, "INCH")) {
+ return Opcode.INCH;
+ }
+ if (std.mem.eql(u8, upper_str, "INCMH")) {
+ return Opcode.INCMH;
+ }
+ if (std.mem.eql(u8, upper_str, "INCML")) {
+ return Opcode.INCML;
+ }
+ if (std.mem.eql(u8, upper_str, "INCL")) {
+ return Opcode.INCL;
+ }
+ if (std.mem.eql(u8, upper_str, "ORH")) {
+ return Opcode.ORH;
+ }
+ if (std.mem.eql(u8, upper_str, "ORMH")) {
+ return Opcode.ORMH;
+ }
+ if (std.mem.eql(u8, upper_str, "ORML")) {
+ return Opcode.ORML;
+ }
+ if (std.mem.eql(u8, upper_str, "ORL")) {
+ return Opcode.ORL;
+ }
+ if (std.mem.eql(u8, upper_str, "ANDNH")) {
+ return Opcode.ANDNH;
+ }
+ if (std.mem.eql(u8, upper_str, "ANDNMH")) {
+ return Opcode.ANDNMH;
+ }
+ if (std.mem.eql(u8, upper_str, "ANDNML")) {
+ return Opcode.ANDNML;
+ }
+ if (std.mem.eql(u8, upper_str, "ANDNL")) {
+ return Opcode.ANDNL;
+ }
+ if (std.mem.eql(u8, upper_str, "JMP")) {
+ return Opcode.JMP;
+ }
+ if (std.mem.eql(u8, upper_str, "PUSHJ")) {
+ return Opcode.PUSHJ;
+ }
+ if (std.mem.eql(u8, upper_str, "GETA")) {
+ return Opcode.GETA;
+ }
+ if (std.mem.eql(u8, upper_str, "PUT")) {
+ return Opcode.PUT;
+ }
+ if (std.mem.eql(u8, upper_str, "POP")) {
+ return Opcode.POP;
+ }
+ if (std.mem.eql(u8, upper_str, "RESUME")) {
+ return Opcode.RESUME;
+ }
+ if (std.mem.eql(u8, upper_str, "SAVE")) {
+ return Opcode.SAVE;
+ }
+ if (std.mem.eql(u8, upper_str, "UNSAVE")) {
+ return Opcode.UNSAVE;
+ }
+ if (std.mem.eql(u8, upper_str, "SYNC")) {
+ return Opcode.SYNC;
+ }
+ if (std.mem.eql(u8, upper_str, "SWYM")) {
+ return Opcode.SWYM;
+ }
+ if (std.mem.eql(u8, upper_str, "GET")) {
+ return Opcode.GET;
+ }
+ if (std.mem.eql(u8, upper_str, "TRIP")) {
+ return Opcode.TRIP;
+ }
+
+ return error.NoOpcode;
+}
+
+test "Opcodes encode correctly" {
+ const test_cases = [_][]const u8{
+ // opcodes
+ "TRAP",
+ "FCMP",
+ "FUN",
+ "FEQL",
+ "FADD",
+ "FIX",
+ "FSUB",
+ "FIXU",
+ "FLOT",
+ "FLOTU",
+ "SFLOT",
+ "SFLOTU",
+ "FMUL",
+ "FCMPE",
+ "FUNE",
+ "FEQLE",
+ "FDIV",
+ "FSQRT",
+ "FREM",
+ "FINT",
+ "MUL",
+ "MULU",
+ "DIV",
+ "DIVU",
+ "ADD",
+ "ADDU",
+ "SUB",
+ "SUBU",
+ "2ADDU",
+ "4ADDU",
+ "8ADDU",
+ "16ADDU",
+ "CMP",
+ "CMPU",
+ "NEG",
+ "NEGU",
+ "SL",
+ "SLU",
+ "SR",
+ "SRU",
+ "BN",
+ "BZ",
+ "BP",
+ "BOD",
+ "BNN",
+ "BNZ",
+ "BNP",
+ "BEV",
+ "PBN",
+ "PBZ",
+ "PBP",
+ "PBOD",
+ "PBNN",
+ "PBNZ",
+ "PBNP",
+ "PBEV",
+ "CSN",
+ "CSZ",
+ "CSP",
+ "CSOD",
+ "CSNN",
+ "CSNZ",
+ "CSNP",
+ "CSEV",
+ "ZSN",
+ "ZSZ",
+ "ZSP",
+ "ZSOD",
+ "ZSNN",
+ "ZSNZ",
+ "ZSNP",
+ "ZSEV",
+ "LDB",
+ "LDBU",
+ "LDW",
+ "LDWU",
+ "LDT",
+ "LDTU",
+ "LDO",
+ "LDOU",
+ "LDSF",
+ "LDHT",
+ "CSWAP",
+ "LDUNC",
+ "LDVTS",
+ "PRELD",
+ "PREGO",
+ "GO",
+ "STB",
+ "STBU",
+ "STW",
+ "STWU",
+ "STT",
+ "STTU",
+ "STO",
+ "STOU",
+ "STSF",
+ "STHT",
+ "STCO",
+ "STUNC",
+ "SYNCD",
+ "PREST",
+ "SYNCID",
+ "PUSHGO",
+ "OR",
+ "ORN",
+ "NOR",
+ "XOR",
+ "AND",
+ "ANDN",
+ "NAND",
+ "NXOR",
+ "BDIF",
+ "WDIF",
+ "TDIF",
+ "ODIF",
+ "MUX",
+ "SADD",
+ "MOR",
+ "MXOR",
+ "SETH",
+ "SETMH",
+ "SETML",
+ "SETL",
+ "INCH",
+ "INCMH",
+ "INCML",
+ "INCL",
+ "ORH",
+ "ORMH",
+ "ORML",
+ "ORL",
+ "ANDNH",
+ "ANDNMH",
+ "ANDNML",
+ "ANDNL",
+ "JMP",
+ "PUSHJ",
+ "GETA",
+ "PUT",
+ "POP",
+ "RESUME",
+ "SAVE",
+ "UNSAVE",
+ "SYNC",
+ "SWYM",
+ "GET",
+ "TRIP",
+
+ // pseudo ops
+ "IS",
+ "GREG",
+ "LOC",
+ "BYTE",
+ "WYDE",
+ "TETRA",
+ "OCTA",
+ };
+
+ const expected = [_]Operation{
+ Operation{ .opcode = Opcode.TRAP },
+ Operation{ .opcode = Opcode.FCMP },
+ Operation{ .opcode = Opcode.FUN },
+ Operation{ .opcode = Opcode.FEQL },
+ Operation{ .opcode = Opcode.FADD },
+ Operation{ .opcode = Opcode.FIX },
+ Operation{ .opcode = Opcode.FSUB },
+ Operation{ .opcode = Opcode.FIXU },
+ Operation{ .opcode = Opcode.FLOT },
+ Operation{ .opcode = Opcode.FLOTU },
+ Operation{ .opcode = Opcode.SFLOT },
+ Operation{ .opcode = Opcode.SFLOTU },
+ Operation{ .opcode = Opcode.FMUL },
+ Operation{ .opcode = Opcode.FCMPE },
+ Operation{ .opcode = Opcode.FUNE },
+ Operation{ .opcode = Opcode.FEQLE },
+ Operation{ .opcode = Opcode.FDIV },
+ Operation{ .opcode = Opcode.FSQRT },
+ Operation{ .opcode = Opcode.FREM },
+ Operation{ .opcode = Opcode.FINT },
+ Operation{ .opcode = Opcode.MUL },
+ Operation{ .opcode = Opcode.MULU },
+ Operation{ .opcode = Opcode.DIV },
+ Operation{ .opcode = Opcode.DIVU },
+ Operation{ .opcode = Opcode.ADD },
+ Operation{ .opcode = Opcode.ADDU },
+ Operation{ .opcode = Opcode.SUB },
+ Operation{ .opcode = Opcode.SUBU },
+ Operation{ .opcode = Opcode._2ADDU },
+ Operation{ .opcode = Opcode._4ADDU },
+ Operation{ .opcode = Opcode._8ADDU },
+ Operation{ .opcode = Opcode._16ADDU },
+ Operation{ .opcode = Opcode.CMP },
+ Operation{ .opcode = Opcode.CMPU },
+ Operation{ .opcode = Opcode.NEG },
+ Operation{ .opcode = Opcode.NEGU },
+ Operation{ .opcode = Opcode.SL },
+ Operation{ .opcode = Opcode.SLU },
+ Operation{ .opcode = Opcode.SR },
+ Operation{ .opcode = Opcode.SRU },
+ Operation{ .opcode = Opcode.BN },
+ Operation{ .opcode = Opcode.BZ },
+ Operation{ .opcode = Opcode.BP },
+ Operation{ .opcode = Opcode.BOD },
+ Operation{ .opcode = Opcode.BNN },
+ Operation{ .opcode = Opcode.BNZ },
+ Operation{ .opcode = Opcode.BNP },
+ Operation{ .opcode = Opcode.BEV },
+ Operation{ .opcode = Opcode.PBN },
+ Operation{ .opcode = Opcode.PBZ },
+ Operation{ .opcode = Opcode.PBP },
+ Operation{ .opcode = Opcode.PBOD },
+ Operation{ .opcode = Opcode.PBNN },
+ Operation{ .opcode = Opcode.PBNZ },
+ Operation{ .opcode = Opcode.PBNP },
+ Operation{ .opcode = Opcode.PBEV },
+ Operation{ .opcode = Opcode.CSN },
+ Operation{ .opcode = Opcode.CSZ },
+ Operation{ .opcode = Opcode.CSP },
+ Operation{ .opcode = Opcode.CSOD },
+ Operation{ .opcode = Opcode.CSNN },
+ Operation{ .opcode = Opcode.CSNZ },
+ Operation{ .opcode = Opcode.CSNP },
+ Operation{ .opcode = Opcode.CSEV },
+ Operation{ .opcode = Opcode.ZSN },
+ Operation{ .opcode = Opcode.ZSZ },
+ Operation{ .opcode = Opcode.ZSP },
+ Operation{ .opcode = Opcode.ZSOD },
+ Operation{ .opcode = Opcode.ZSNN },
+ Operation{ .opcode = Opcode.ZSNZ },
+ Operation{ .opcode = Opcode.ZSNP },
+ Operation{ .opcode = Opcode.ZSEV },
+ Operation{ .opcode = Opcode.LDB },
+ Operation{ .opcode = Opcode.LDBU },
+ Operation{ .opcode = Opcode.LDW },
+ Operation{ .opcode = Opcode.LDWU },
+ Operation{ .opcode = Opcode.LDT },
+ Operation{ .opcode = Opcode.LDTU },
+ Operation{ .opcode = Opcode.LDO },
+ Operation{ .opcode = Opcode.LDOU },
+ Operation{ .opcode = Opcode.LDSF },
+ Operation{ .opcode = Opcode.LDHT },
+ Operation{ .opcode = Opcode.CSWAP },
+ Operation{ .opcode = Opcode.LDUNC },
+ Operation{ .opcode = Opcode.LDVTS },
+ Operation{ .opcode = Opcode.PRELD },
+ Operation{ .opcode = Opcode.PREGO },
+ Operation{ .opcode = Opcode.GO },
+ Operation{ .opcode = Opcode.STB },
+ Operation{ .opcode = Opcode.STBU },
+ Operation{ .opcode = Opcode.STW },
+ Operation{ .opcode = Opcode.STWU },
+ Operation{ .opcode = Opcode.STT },
+ Operation{ .opcode = Opcode.STTU },
+ Operation{ .opcode = Opcode.STO },
+ Operation{ .opcode = Opcode.STOU },
+ Operation{ .opcode = Opcode.STSF },
+ Operation{ .opcode = Opcode.STHT },
+ Operation{ .opcode = Opcode.STCO },
+ Operation{ .opcode = Opcode.STUNC },
+ Operation{ .opcode = Opcode.SYNCD },
+ Operation{ .opcode = Opcode.PREST },
+ Operation{ .opcode = Opcode.SYNCID },
+ Operation{ .opcode = Opcode.PUSHGO },
+ Operation{ .opcode = Opcode.OR },
+ Operation{ .opcode = Opcode.ORN },
+ Operation{ .opcode = Opcode.NOR },
+ Operation{ .opcode = Opcode.XOR },
+ Operation{ .opcode = Opcode.AND },
+ Operation{ .opcode = Opcode.ANDN },
+ Operation{ .opcode = Opcode.NAND },
+ Operation{ .opcode = Opcode.NXOR },
+ Operation{ .opcode = Opcode.BDIF },
+ Operation{ .opcode = Opcode.WDIF },
+ Operation{ .opcode = Opcode.TDIF },
+ Operation{ .opcode = Opcode.ODIF },
+ Operation{ .opcode = Opcode.MUX },
+ Operation{ .opcode = Opcode.SADD },
+ Operation{ .opcode = Opcode.MOR },
+ Operation{ .opcode = Opcode.MXOR },
+ Operation{ .opcode = Opcode.SETH },
+ Operation{ .opcode = Opcode.SETMH },
+ Operation{ .opcode = Opcode.SETML },
+ Operation{ .opcode = Opcode.SETL },
+ Operation{ .opcode = Opcode.INCH },
+ Operation{ .opcode = Opcode.INCMH },
+ Operation{ .opcode = Opcode.INCML },
+ Operation{ .opcode = Opcode.INCL },
+ Operation{ .opcode = Opcode.ORH },
+ Operation{ .opcode = Opcode.ORMH },
+ Operation{ .opcode = Opcode.ORML },
+ Operation{ .opcode = Opcode.ORL },
+ Operation{ .opcode = Opcode.ANDNH },
+ Operation{ .opcode = Opcode.ANDNMH },
+ Operation{ .opcode = Opcode.ANDNML },
+ Operation{ .opcode = Opcode.ANDNL },
+ Operation{ .opcode = Opcode.JMP },
+ Operation{ .opcode = Opcode.PUSHJ },
+ Operation{ .opcode = Opcode.GETA },
+ Operation{ .opcode = Opcode.PUT },
+ Operation{ .opcode = Opcode.POP },
+ Operation{ .opcode = Opcode.RESUME },
+ Operation{ .opcode = Opcode.SAVE },
+ Operation{ .opcode = Opcode.UNSAVE },
+ Operation{ .opcode = Opcode.SYNC },
+ Operation{ .opcode = Opcode.SWYM },
+ Operation{ .opcode = Opcode.GET },
+ Operation{ .opcode = Opcode.TRIP },
+ Operation{ .pseudo_op = PseudoOp.IS },
+ Operation{ .pseudo_op = PseudoOp.GREG },
+ Operation{ .pseudo_op = PseudoOp.LOC },
+ Operation{ .pseudo_op = PseudoOp.BYTE },
+ Operation{ .pseudo_op = PseudoOp.WYDE },
+ Operation{ .pseudo_op = PseudoOp.TETRA },
+ Operation{ .pseudo_op = PseudoOp.OCTA },
+ };
+
+ for (0..test_cases.len) |i| {
+ const operation = try parseOp(std.testing.allocator, test_cases[i]);
+ try std.testing.expectEqual(expected[i], operation);
+ }
+}
+
+test "Invalid opcodes return error" {
+ var operation = parseOp(std.testing.allocator, "ALKSFDJ");
+ try std.testing.expectEqual(error.NoOpcode, operation);
+ operation = parseOp(std.testing.allocator, "ADDI");
+ try std.testing.expectEqual(error.NoOpcode, operation);
+}
diff --git a/src/parser.zig b/src/parser.zig
new file mode 100644
index 0000000..253ddf6
--- /dev/null
+++ b/src/parser.zig
@@ -0,0 +1,535 @@
+const std = @import("std");
+const opcodes = @import("opcodes.zig");
+
+/// A symbol's value can be pure or point to a register
+const SymbolValueType = enum {
+ pure,
+ register,
+};
+const SymbolValue = union {
+ pure: u64,
+ register: u8,
+};
+
+/// A constant can be a number of a string
+const ConstantType = enum {
+ number,
+ string,
+};
+const ConstantValue = union(ConstantType) {
+ number: u64,
+ string: []const u8,
+};
+
+/// The Parser reads a provided input and assembles it into MMIX object code
+pub const Parser = struct {
+ allocator: std.mem.Allocator,
+ input: []const u8,
+ location: u64,
+ ch_pos: usize,
+ symbols: std.StringHashMap(SymbolValue),
+ object: std.ArrayList(u8),
+
+ /// Test is a character is whitespace
+ /// Note that newlines are special and not included in this implementation.
+ fn isWhitespace(ch: u8) bool {
+ return ch == ' ' or ch == '\t' or ch == '\r';
+ }
+
+ /// Test if a character is a letter
+ /// Note that underscores are letters for the purposes of symbol recognition.
+ fn isLetter(ch: u8) bool {
+ return ch == '_' or (ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z');
+ }
+
+ /// Test if a character is a decimal digit
+ fn isDecimal(ch: u8) bool {
+ return ch >= '0' and ch <= '9';
+ }
+
+ /// Test if a character is a hexadecimal digit
+ fn isHexadecimal(ch: u8) bool {
+ return isDecimal(ch) or (ch >= 'a' and ch <= 'f') or (ch >= 'A' and ch <= 'F');
+ }
+
+ /// Test if a character is a symbol character
+ /// Note that all valid unicode characters larger than 126 are also valid symbol characters.
+ fn isSymbolChar(ch: u8) bool {
+ return isLetter(ch) or isDecimal(ch) or ch > 126;
+ }
+
+ /// Get a byte from the input at a specified location
+ /// Return 0 if the requested byte is out of range
+ fn getByte(self: *Parser, pos: usize) u8 {
+ if (pos < self.input.len) {
+ return self.input[pos];
+ }
+ return 0;
+ }
+
+ /// Move the cursor forward until it does not point at whitespace
+ fn skipWhitespace(self: *Parser) void {
+ while (isWhitespace(self.getByte(self.ch_pos))) {
+ self.ch_pose += 1;
+ }
+ }
+
+ /// Determine whether the cursor points at a valid integer in base 10
+ /// Move the cursor past the integer and return it
+ fn identifyDecimal(self: *Parser) !u64 {
+ const start = self.ch_pos;
+ while (isDecimal(self.getByte(self.ch_pos))) {
+ self.ch_pos += 1;
+ }
+ const end = self.ch_pos;
+
+ return std.fmt.parseInt(u64, self.input[start..end], 10) catch return error.NoDecimal;
+ }
+
+ /// Determine whether the cursor points at a valid integer in base 16
+ /// Base 16 is identified by a number starting with #
+ /// Move the cursor past the integer and return it
+ fn identifyHexadecimal(self: *Parser) !u64 {
+ if (self.getByte(self.ch_pos) != '#') {
+ return error.NoHexadecimal;
+ }
+
+ self.ch_pos += 1;
+
+ const start = self.ch_pos;
+ while (isHexadecimal(self.getByte(self.ch_pos))) {
+ self.ch_pos += 1;
+ }
+ const end = self.ch_pos;
+
+ return std.fmt.parseInt(u64, self.input[start..end], 16) catch return error.NoHexadecimal;
+ }
+
+ /// Determine whether the cursor points at a valid unicode character wrapped in single quotes
+ /// Move the cursor past the closing quote and return the character
+ fn identifyChar(self: *Parser) ![]const u8 {
+ if (self.getByte(self.ch_pos) != '\'') {
+ return error.NoChar;
+ }
+
+ self.ch_pos += 1;
+
+ const start = self.ch_pos;
+ while (self.getByte(self.ch_pos) != 0 and self.getByte(self.ch_pos) != '\'') {
+ self.ch_pos += 1;
+
+ if (self.ch_pos - start > 4) {
+ return error.NoChar;
+ }
+ }
+ const end = self.ch_pos;
+ self.ch_pos += 1;
+
+ if (end <= start) {
+ return error.NoChar;
+ }
+
+ const view = std.unicode.Utf8View.init(self.input[start..end]) catch return error.NoChar;
+ var iter = view.iterator();
+ var count: u8 = 0;
+ while (iter.nextCodepoint()) |_| {
+ count += 1;
+ if (count > 1) {
+ return error.NoChar;
+ }
+ }
+ if (count != 1) {
+ return error.NoChar;
+ }
+
+ return self.input[start..end];
+ }
+
+ /// Determine whether the cursor points at a valid string wrapped in double quotes
+ /// Note that a string has at least one character in it and that it cannot have " or newlines in it
+ /// Move the cursor past the string and return the string
+ fn identifyString(self: *Parser) ![]const u8 {
+ if (self.getByte(self.ch_pos) != '"') {
+ return error.NoString;
+ }
+
+ self.ch_pos += 1;
+ const start = self.ch_pos;
+ while (self.getByte(self.ch_pos) != 0 and self.getByte(self.ch_pos) != '"') {
+ if (self.getByte(self.ch_pos) == '\n') {
+ return error.NoString;
+ }
+ self.ch_pos += 1;
+ }
+ const end = self.ch_pos;
+ if (self.getByte(self.ch_pos) == '"') {
+ self.ch_pos += 1;
+ }
+
+ if (end <= start) {
+ return error.NoString;
+ }
+
+ return self.input[start..end];
+ }
+
+ /// Determine whether the cursor points at a valid constant
+ /// The constant may be a string or a number
+ /// Move the cursor past the constant and return it
+ fn identifyConstant(self: *Parser) !ConstantValue {
+ switch (self.getByte(self.ch_pos)) {
+ '0'...'9' => {
+ const number = try identifyDecimal(self);
+ return ConstantValue{ .number = number };
+ },
+ '#' => {
+ const number = try identifyHexadecimal(self);
+ return ConstantValue{ .number = number };
+ },
+ '\'' => {
+ const string = try identifyChar(self);
+ return ConstantValue{ .string = string };
+ },
+ '"' => {
+ const string = try identifyString(self);
+ return ConstantValue{ .string = string };
+ },
+ else => return error.NoConstant,
+ }
+ }
+
+ /// Determine whether the cursor points at a symbol
+ /// A symbol starts with a letter and only has symbol characters after that point
+ /// There is an exception that there are 30 special symbols of the form xH, xF, and xB where x is a single decimal digit
+ /// Move the cursor past the symbol and return its name
+ fn identifySymbol(self: *Parser) ![]const u8 {
+ const start = self.ch_pos;
+ if ((isLetter(self.getByte(self.ch_pos)) or self.getByte(self.ch_pos) == '_')) {
+ self.ch_pos += 1;
+
+ while (isSymbolChar(self.getByte(self.ch_pos))) {
+ self.ch_pos += 1;
+ }
+ } else if (isDecimal(self.getByte(self.ch_pos)) and
+ (self.getByte(self.ch_pos + 1) == 'H' or self.getByte(self.ch_pos + 1) == 'F' or self.getByte(self.ch_pos + 1) == 'B'))
+ {
+ self.ch_pos += 2;
+ return self.input[self.ch_pos - 2 .. self.ch_pos];
+ }
+ const end = self.ch_pos;
+
+ if (end > start) {
+ return self.input[start..end];
+ }
+
+ return error.NoSymbol;
+ }
+
+ /// Determine whether the cursor points at a valid opcode or pseudo operation
+ /// An opcode consists solely of symbol characters (letters and numbers in fact)
+ /// Move the cursor past the opcode and return it
+ fn identifyOperation(self: *Parser) !opcodes.Operation {
+ const start = self.ch_pos;
+ while (isSymbolChar(self.getByte(self.ch_pos))) {
+ self.ch_pos += 1;
+ }
+ const end = self.ch_pos;
+
+ return opcodes.parseOp(self.allocator, self.input[start..end]);
+ }
+
+ pub fn init(allocator: std.mem.Allocator, input: []const u8) Parser {
+ return Parser{
+ .allocator = allocator,
+ .input = input,
+ .location = 0,
+ .ch_pos = 0,
+ .symbols = std.StringHashMap(SymbolValue).init(allocator),
+ .object = std.ArrayList(u8).init(allocator),
+ };
+ }
+
+ pub fn deinit(self: *Parser) void {
+ self.symbols.deinit();
+ self.object.deinit();
+ }
+};
+
+test "normal ascii characters are recognized as symbol chars" {
+ const chars = "qwertyuiopasdfghjklzxcvbnm1234567890QWERTYUIOPASDFGHJKLZXCVBNM_";
+
+ for (chars) |c| {
+ try std.testing.expect(Parser.isSymbolChar(c));
+ }
+}
+
+test "large unicode characters are recognized as symbol chars" {
+ const cuneiform = "𒀀𒀁𒀂𒀃𒀄𒀅𒀆𒀇𒀈𒀉𒀊𒀋𒀌𒀍𒀎𒀏𒀐𒀑𒀒𒀓𒀔𒀕𒀖𒀗𒀘𒀙𒀚𒀛𒀜𒀝𒀞𒀟𒀠𒀡𒀢𒀣𒀤𒀥𒀦𒀧𒀨𒀩𒀪𒀫𒀬𒀭𒀮𒀯𒀰𒀱𒈷𒌄";
+
+ for (cuneiform) |c| {
+ try std.testing.expect(Parser.isSymbolChar(c));
+ }
+}
+
+test "non-symbol characters are detected" {
+ const chars = "\n\r \t!@#$%^&*()-=+[]{}\\|;:'\"/?,.<>`~";
+
+ for (chars) |c| {
+ try std.testing.expect(!Parser.isSymbolChar(c));
+ }
+}
+
+test "symbols are identified" {
+ const test_cases = [_][]const u8{
+ "_asdf$%@",
+ "ASFLKJ3332__q5 ;asdf;lk",
+ "asdf𒀤𒀥𒀦\nalsfkd",
+ "2H",
+ "5F",
+ "0B",
+ };
+
+ const expected = [_][]const u8{
+ "_asdf",
+ "ASFLKJ3332__q5",
+ "asdf𒀤𒀥𒀦",
+ "2H",
+ "5F",
+ "0B",
+ };
+
+ for (0..6) |i| {
+ var parser = Parser.init(std.testing.allocator, test_cases[i]);
+ const symbol = try parser.identifySymbol();
+ try std.testing.expect(std.mem.eql(u8, expected[i], symbol));
+ parser.deinit();
+ }
+}
+
+test "no symbols are found successfully" {
+ const test_cases = [_][]const u8{
+ " _asdf",
+ ";ASFLKJ3332__q5",
+ "\nasdf𒀤𒀥𒀦",
+ };
+
+ for (test_cases) |case| {
+ var parser = Parser.init(std.testing.allocator, case);
+ const symbol = parser.identifySymbol();
+ try std.testing.expectEqual(error.NoSymbol, symbol);
+ parser.deinit();
+ }
+}
+
+test "opcodes are identified" {
+ const test_cases = [_][]const u8{
+ "2ADDU%aldfk",
+ "GO ",
+ "ADD\taksfdjas",
+ "GREG\n",
+ "IS",
+ };
+
+ const expected = [_]opcodes.Operation{
+ opcodes.Operation{ .opcode = opcodes.Opcode._2ADDU },
+ opcodes.Operation{ .opcode = opcodes.Opcode.GO },
+ opcodes.Operation{ .opcode = opcodes.Opcode.ADD },
+ opcodes.Operation{ .pseudo_op = opcodes.PseudoOp.GREG },
+ opcodes.Operation{ .pseudo_op = opcodes.PseudoOp.IS },
+ };
+
+ for (0..5) |i| {
+ var parser = Parser.init(std.testing.allocator, test_cases[i]);
+ const op = try parser.identifyOperation();
+ try std.testing.expectEqual(expected[i], op);
+ parser.deinit();
+ }
+}
+
+test "no opcodes are found successfully" {
+ const test_cases = [_][]const u8{
+ " _asdf",
+ ";ASFLKJ3332__q5",
+ "\nasdf𒀤𒀥𒀦",
+ "asdfklajsdfl",
+ };
+
+ for (test_cases) |case| {
+ var parser = Parser.init(std.testing.allocator, case);
+ const symbol = parser.identifyOperation();
+ try std.testing.expectEqual(error.NoOpcode, symbol);
+ parser.deinit();
+ }
+}
+
+test "decimals are recognized" {
+ const test_cases = [_][]const u8{
+ "012314aslkfdj",
+ "1234567890 43",
+ "1234567891234567889\n123124",
+ };
+
+ const expected = [_]u64{
+ 12314,
+ 1234567890,
+ 1234567891234567889,
+ };
+
+ for (0..3) |i| {
+ var parser = Parser.init(std.testing.allocator, test_cases[i]);
+ const symbol = try parser.identifyDecimal();
+ try std.testing.expectEqual(expected[i], symbol);
+ parser.deinit();
+ }
+}
+
+test "malformed decimals are not recognized" {
+ const test_cases = [_][]const u8{
+ "",
+ "asdf123",
+ " 123",
+ "12345678901234567890123456789012345678901234567890",
+ };
+
+ for (test_cases) |case| {
+ var parser = Parser.init(std.testing.allocator, case);
+ const symbol = parser.identifyDecimal();
+ try std.testing.expectEqual(error.NoDecimal, symbol);
+ parser.deinit();
+ }
+}
+
+test "hexadecimals are recognized" {
+ const test_cases = [_][]const u8{
+ "#012314saslkfdj",
+ "#1234567890abcdef 43",
+ "#1234567891\n123124",
+ };
+
+ const expected = [_]u64{
+ 0x12314,
+ 0x1234567890abcdef,
+ 0x1234567891,
+ };
+
+ for (0..3) |i| {
+ var parser = Parser.init(std.testing.allocator, test_cases[i]);
+ const symbol = try parser.identifyHexadecimal();
+ try std.testing.expectEqual(expected[i], symbol);
+ parser.deinit();
+ }
+}
+
+test "malformed hexadecimals are not recognized" {
+ const test_cases = [_][]const u8{
+ "",
+ "sasdf123",
+ " 123",
+ "#12345678901234567890123456789012345678901234567890",
+ "#",
+ };
+
+ for (test_cases) |case| {
+ var parser = Parser.init(std.testing.allocator, case);
+ const symbol = parser.identifyHexadecimal();
+ try std.testing.expectEqual(error.NoHexadecimal, symbol);
+ parser.deinit();
+ }
+}
+
+test "characters are recognized" {
+ const test_cases = [_][]const u8{
+ "'a'",
+ "'1'",
+ "'𒀤'",
+ };
+
+ const expected = [_][]const u8{
+ "a",
+ "1",
+ "𒀤",
+ };
+
+ for (0..3) |i| {
+ var parser = Parser.init(std.testing.allocator, test_cases[i]);
+ const symbol = try parser.identifyChar();
+ try std.testing.expect(std.mem.eql(u8, expected[i], symbol));
+ parser.deinit();
+ }
+}
+
+test "invalid unicode sequences are not characters" {
+ const test_cases = [_][]const u8{
+ "'asdf'",
+ "'asdfg'",
+ "'as'",
+ "''",
+ "'",
+ };
+
+ for (test_cases) |case| {
+ var parser = Parser.init(std.testing.allocator, case);
+ const symbol = parser.identifyChar();
+ try std.testing.expectEqual(error.NoChar, symbol);
+ parser.deinit();
+ }
+}
+
+test "strings are recognized" {
+ const test_cases = [_][]const u8{
+ "\" \"",
+ "\"aslkdfjlaskdfj lkasjflkasjdflaksjfd''12309)($)(#$[[]𒀤\"",
+ };
+
+ const expected = [_][]const u8{
+ " ",
+ "aslkdfjlaskdfj lkasjflkasjdflaksjfd''12309)($)(#$[[]𒀤",
+ };
+
+ for (0..2) |i| {
+ var parser = Parser.init(std.testing.allocator, test_cases[i]);
+ const symbol = try parser.identifyString();
+ try std.testing.expect(std.mem.eql(u8, expected[i], symbol));
+ parser.deinit();
+ }
+}
+
+test "invalid strings are not recognized" {
+ const test_cases = [_][]const u8{
+ "\"\"",
+ "\"",
+ "\"\n\"",
+ };
+
+ for (test_cases) |case| {
+ var parser = Parser.init(std.testing.allocator, case);
+ const symbol = parser.identifyString();
+ try std.testing.expectEqual(error.NoString, symbol);
+ parser.deinit();
+ }
+}
+
+test "constants are recognized" {
+ const test_cases = [_][]const u8{
+ "1234567890 1234",
+ "#1234567890abcdef;%#*(",
+ "'a'uuuuuu",
+ "\"hello \"world",
+ };
+
+ const expected = [_]ConstantValue{
+ ConstantValue{ .number = 1234567890 },
+ ConstantValue{ .number = 0x1234567890abcdef },
+ ConstantValue{ .string = "a" },
+ ConstantValue{ .string = "hello " },
+ };
+
+ for (0..4) |i| {
+ var parser = Parser.init(std.testing.allocator, test_cases[i]);
+ const symbol = try parser.identifyConstant();
+ switch (symbol) {
+ .number => try std.testing.expectEqual(expected[i].number, symbol.number),
+ .string => try std.testing.expect(std.mem.eql(u8, expected[i].string, symbol.string)),
+ }
+ }
+}
diff --git a/src/root.zig b/src/root.zig
new file mode 100644
index 0000000..fc2c0dc
--- /dev/null
+++ b/src/root.zig
@@ -0,0 +1,10 @@
+const std = @import("std");
+const testing = std.testing;
+
+const opcodes = @import("opcodes.zig");
+const parser = @import("parser.zig");
+
+test {
+ _ = opcodes;
+ _ = parser;
+}
diff --git a/src/temp.py b/src/temp.py
new file mode 100644
index 0000000..5577e9e
--- /dev/null
+++ b/src/temp.py
@@ -0,0 +1,153 @@
+x = [
+ "TRAP",
+ "FCMP",
+ "FUN",
+ "FEQL",
+ "FADD",
+ "FIX",
+ "FSUB",
+ "FIXU",
+ "FLOT",
+ "FLOTU",
+ "SFLOT",
+ "SFLOTU",
+ "FMUL",
+ "FCMPE",
+ "FUNE",
+ "FEQLE",
+ "FDIV",
+ "FSQRT",
+ "FREM",
+ "FINT",
+ "MUL",
+ "MULU",
+ "DIV",
+ "DIVU",
+ "ADD",
+ "ADDU",
+ "SUB",
+ "SUBU",
+ "2ADDU",
+ "4ADDU",
+ "8ADDU",
+ "16ADDU",
+ "CMP",
+ "CMPU",
+ "NEG",
+ "NEGU",
+ "SL",
+ "SLU",
+ "SR",
+ "SRU",
+ "BN",
+ "BZ",
+ "BP",
+ "BOD",
+ "BNN",
+ "BNZ",
+ "BNP",
+ "BEV",
+ "PBN",
+ "PBZ",
+ "PBP",
+ "PBOD",
+ "PBNN",
+ "PBNZ",
+ "PBNP",
+ "PBEV",
+ "CSN",
+ "CSZ",
+ "CSP",
+ "CSOD",
+ "CSNN",
+ "CSNZ",
+ "CSNP",
+ "CSEV",
+ "ZSN",
+ "ZSZ",
+ "ZSP",
+ "ZSOD",
+ "ZSNN",
+ "ZSNZ",
+ "ZSNP",
+ "ZSEV",
+ "LDB",
+ "LDBU",
+ "LDW",
+ "LDWU",
+ "LDT",
+ "LDTU",
+ "LDO",
+ "LDOU",
+ "LDSF",
+ "LDHT",
+ "CSWAP",
+ "LDUNC",
+ "LDVTS",
+ "PRELD",
+ "PREGO",
+ "GO",
+ "STB",
+ "STBU",
+ "STW",
+ "STWU",
+ "STT",
+ "STTU",
+ "STO",
+ "STOU",
+ "STSF",
+ "STHT",
+ "STCO",
+ "STUNC",
+ "SYNCD",
+ "PREST",
+ "SYNCID",
+ "PUSHGO",
+ "OR",
+ "ORN",
+ "NOR",
+ "XOR",
+ "AND",
+ "ANDN",
+ "NAND",
+ "NXOR",
+ "BDIF",
+ "WDIF",
+ "TDIF",
+ "ODIF",
+ "MUX",
+ "SADD",
+ "MOR",
+ "MXOR",
+ "SETH",
+ "SETMH",
+ "SETML",
+ "SETL",
+ "INCH",
+ "INCMH",
+ "INCML",
+ "INCL",
+ "ORH",
+ "ORMH",
+ "ORML",
+ "ORL",
+ "ANDNH",
+ "ANDNMH",
+ "ANDNML",
+ "ANDNL",
+ "JMP",
+ "PUSHJ",
+ "GETA",
+ "PUT",
+ "POP",
+ "RESUME",
+ "SAVE",
+ "UNSAVE",
+ "SYNC",
+ "SWYM",
+ "GET",
+ "TRIP",
+]
+
+for val in x:
+ print(f"Operation{{.opcode = Opcode.{val}}},")