add some new errors

author: jjanzen <jjanzen@jjanzen.ca> 2025-02-27 14:06:30 -0600
committer: jjanzen <jjanzen@jjanzen.ca> 2025-02-27 14:06:30 -0600
commit: 49dc6a9b2b5a4d304c5caf637289c84ff6227e24 (patch)
tree: d518a3cb967060a61c509ca0590d4375f92b940e
parent: 29f04314658fd9b2a93ae78fa235e71cae0d700d (diff)
1 files changed, 119 insertions, 33 deletions
diff --git a/src/parser.zig b/src/parser.zig
index 1b35596..eaa9c51 100644
--- a/src/parser.zig
+++ b/src/parser.zig
@@ -84,24 +84,30 @@ pub const Parser = struct {
         }
     }
 
+    const DecimalError = std.fmt.ParseIntError;
+
     /// Determine whether the cursor points at a valid integer in base 10
     /// Move the cursor past the integer and return it
-    fn identifyDecimal(self: *Parser) !u64 {
+    fn identifyDecimal(self: *Parser) DecimalError!u64 {
         const start = self.ch_pos;
         while (isDecimal(self.getByte(self.ch_pos))) {
             self.ch_pos += 1;
         }
         const end = self.ch_pos;
 
-        return std.fmt.parseInt(u64, self.input[start..end], 10) catch return error.NoDecimal;
+        return std.fmt.parseInt(u64, self.input[start..end], 10);
     }
 
+    const HexadecimalError = std.fmt.ParseIntError || error{
+        WrongStartingCharacter,
+    };
+
     /// Determine whether the cursor points at a valid integer in base 16
     /// Base 16 is identified by a number starting with #
     /// Move the cursor past the integer and return it
-    fn identifyHexadecimal(self: *Parser) !u64 {
+    fn identifyHexadecimal(self: *Parser) HexadecimalError!u64 {
         if (self.getByte(self.ch_pos) != '#') {
-            return error.NoHexadecimal;
+            return HexadecimalError.WrongStartingCharacter;
         }
 
         self.ch_pos += 1;
@@ -112,14 +118,21 @@ pub const Parser = struct {
         }
         const end = self.ch_pos;
 
-        return std.fmt.parseInt(u64, self.input[start..end], 16) catch return error.NoHexadecimal;
+        return std.fmt.parseInt(u64, self.input[start..end], 16);
     }
 
+    const CharError = error{
+        NoStartingDelimiter,
+        NoEndingDelimiter,
+        NoChar,
+        InvalidUtf8,
+    };
+
     /// Determine whether the cursor points at a valid unicode character wrapped in single quotes
     /// Move the cursor past the closing quote and return the character
-    fn identifyChar(self: *Parser) !u21 {
+    fn identifyChar(self: *Parser) CharError!u21 {
         if (self.getByte(self.ch_pos) != '\'') {
-            return error.NoChar;
+            return CharError.NoStartingDelimiter;
         }
 
         self.ch_pos += 1;
@@ -128,63 +141,78 @@ pub const Parser = struct {
         while (self.getByte(self.ch_pos) != 0 and self.getByte(self.ch_pos) != '\'') {
             self.ch_pos += 1;
 
+            if (self.getByte(self.ch_pos) == ';' or self.getByte(self.ch_pos) == '\n') {
+                return CharError.NoEndingDelimiter;
+            }
             if (self.ch_pos - start > 4) {
-                return error.NoChar;
+                return CharError.NoEndingDelimiter;
             }
         }
         const end = self.ch_pos;
         self.ch_pos += 1;
 
         if (end <= start) {
-            return error.NoChar;
+            return CharError.NoChar;
         }
 
-        const view = std.unicode.Utf8View.init(self.input[start..end]) catch return error.NoChar;
+        const view = std.unicode.Utf8View.init(self.input[start..end]) catch return CharError.InvalidUtf8;
         var iter = view.iterator();
         var count: u8 = 0;
-        var character: u21 = undefined;
+        var character: u21 = 0;
         while (iter.nextCodepoint()) |u| {
             character = u;
             count += 1;
             if (count > 1) {
-                return error.NoChar;
+                return CharError.NoEndingDelimiter;
             }
         }
         if (count != 1) {
-            return error.NoChar;
+            return CharError.NoEndingDelimiter;
         }
 
         return character;
     }
 
+    const StringError = error{
+        NoStartingDelimiter,
+        NoEndingDelimiter,
+        NoString,
+    };
+
     /// Determine whether the cursor points at a valid string wrapped in double quotes
     /// Note that a string has at least one character in it and that it cannot have " or newlines in it
     /// Move the cursor past the string and return the string
-    fn identifyString(self: *Parser) ![]const u8 {
+    fn identifyString(self: *Parser) StringError![]const u8 {
         if (self.getByte(self.ch_pos) != '"') {
-            return error.NoString;
+            return StringError.NoStartingDelimiter;
         }
 
         self.ch_pos += 1;
         const start = self.ch_pos;
         while (self.getByte(self.ch_pos) != 0 and self.getByte(self.ch_pos) != '"') {
             if (self.getByte(self.ch_pos) == '\n') {
-                return error.NoString;
+                return StringError.NoEndingDelimiter;
             }
             self.ch_pos += 1;
         }
         const end = self.ch_pos;
         if (self.getByte(self.ch_pos) == '"') {
             self.ch_pos += 1;
+        } else {
+            return StringError.NoEndingDelimiter;
         }
 
         if (end <= start) {
-            return error.NoString;
+            return StringError.NoString;
         }
 
         return self.input[start..end];
     }
 
+    const ConstantError = DecimalError || HexadecimalError || CharError || StringError || error{
+        UnexpectedSymbol,
+    };
+
     /// Determine whether the cursor points at a valid constant
     /// The constant may be a string or a number
     /// Move the cursor past the constant and return it
@@ -206,15 +234,19 @@ pub const Parser = struct {
                 const string = try identifyString(self);
                 return ConstantValue{ .string = string };
             },
-            else => return error.NoConstant,
+            else => return ConstantError.UnexpectedSymbol,
         }
     }
 
+    const SymbolError = error{
+        UnexpectedSymbol,
+    };
+
     /// Determine whether the cursor points at a symbol
     /// A symbol starts with a letter and only has symbol characters after that point
     /// There is an exception that there are 30 special symbols of the form xH, xF, and xB where x is a single decimal digit
     /// Move the cursor past the symbol and return its name
-    fn identifySymbol(self: *Parser) ![]const u8 {
+    fn identifySymbol(self: *Parser) SymbolError![]const u8 {
         const start = self.ch_pos;
         if ((isLetter(self.getByte(self.ch_pos)) or self.getByte(self.ch_pos) == '_')) {
             self.ch_pos += 1;
@@ -234,7 +266,7 @@ pub const Parser = struct {
             return self.input[start..end];
         }
 
-        return error.NoSymbol;
+        return SymbolError.UnexpectedSymbol;
     }
 
     /// Get the number associated with a given symbol
@@ -580,7 +612,7 @@ test "no symbols are found successfully" {
     for (test_cases) |case| {
         var parser = Parser.init(std.testing.allocator, case);
         const symbol = parser.identifySymbol();
-        try std.testing.expectEqual(error.NoSymbol, symbol);
+        try std.testing.expectEqual(error.UnexpectedSymbol, symbol);
         parser.deinit();
     }
 }
@@ -655,10 +687,17 @@ test "malformed decimals are not recognized" {
         "12345678901234567890123456789012345678901234567890",
     };
 
-    for (test_cases) |case| {
-        var parser = Parser.init(std.testing.allocator, case);
+    const expected = [_]Parser.DecimalError{
+        Parser.DecimalError.InvalidCharacter,
+        Parser.DecimalError.InvalidCharacter,
+        Parser.DecimalError.InvalidCharacter,
+        Parser.DecimalError.Overflow,
+    };
+
+    for (0..test_cases.len) |i| {
+        var parser = Parser.init(std.testing.allocator, test_cases[i]);
         const symbol = parser.identifyDecimal();
-        try std.testing.expectEqual(error.NoDecimal, symbol);
+        try std.testing.expectEqual(expected[i], symbol);
         parser.deinit();
     }
 }
@@ -693,10 +732,18 @@ test "malformed hexadecimals are not recognized" {
         "#",
     };
 
-    for (test_cases) |case| {
-        var parser = Parser.init(std.testing.allocator, case);
+    const expected = [_]Parser.HexadecimalError{
+        Parser.HexadecimalError.WrongStartingCharacter,
+        Parser.HexadecimalError.WrongStartingCharacter,
+        Parser.HexadecimalError.WrongStartingCharacter,
+        Parser.HexadecimalError.Overflow,
+        Parser.HexadecimalError.InvalidCharacter,
+    };
+
+    for (0..test_cases.len) |i| {
+        var parser = Parser.init(std.testing.allocator, test_cases[i]);
         const symbol = parser.identifyHexadecimal();
-        try std.testing.expectEqual(error.NoHexadecimal, symbol);
+        try std.testing.expectEqual(expected[i], symbol);
         parser.deinit();
     }
 }
@@ -729,12 +776,22 @@ test "invalid unicode sequences are not characters" {
         "'as'",
         "''",
         "'",
+        "a",
     };
 
-    for (test_cases) |case| {
-        var parser = Parser.init(std.testing.allocator, case);
+    const expected = [_]Parser.CharError{
+        Parser.CharError.NoEndingDelimiter,
+        Parser.CharError.NoEndingDelimiter,
+        Parser.CharError.NoEndingDelimiter,
+        Parser.CharError.NoChar,
+        Parser.CharError.NoChar,
+        Parser.CharError.NoStartingDelimiter,
+    };
+
+    for (0..test_cases.len) |i| {
+        var parser = Parser.init(std.testing.allocator, test_cases[i]);
         const symbol = parser.identifyChar();
-        try std.testing.expectEqual(error.NoChar, symbol);
+        try std.testing.expectEqual(expected[i], symbol);
         parser.deinit();
     }
 }
@@ -763,12 +820,20 @@ test "invalid strings are not recognized" {
         "\"\"",
         "\"",
         "\"\n\"",
+        "asdf",
     };
 
-    for (test_cases) |case| {
-        var parser = Parser.init(std.testing.allocator, case);
+    const expected = [_]Parser.StringError{
+        Parser.StringError.NoString,
+        Parser.StringError.NoEndingDelimiter,
+        Parser.StringError.NoEndingDelimiter,
+        Parser.StringError.NoStartingDelimiter,
+    };
+
+    for (0..test_cases.len) |i| {
+        var parser = Parser.init(std.testing.allocator, test_cases[i]);
         const symbol = parser.identifyString();
-        try std.testing.expectEqual(error.NoString, symbol);
+        try std.testing.expectEqual(expected[i], symbol);
         parser.deinit();
     }
 }
@@ -799,6 +864,27 @@ test "constants are recognized" {
     }
 }
 
+test "invalid constants are recognized" {
+    const test_cases = [_][]const u8{
+        "$123",
+        " 123",
+        "'aa'",
+    };
+
+    const expected = [_]Parser.ConstantError{
+        Parser.ConstantError.UnexpectedSymbol,
+        Parser.ConstantError.UnexpectedSymbol,
+        Parser.ConstantError.NoEndingDelimiter,
+    };
+
+    for (0..test_cases.len) |i| {
+        var parser = Parser.init(std.testing.allocator, test_cases[i]);
+        defer parser.deinit();
+        const symbol = parser.identifyConstant();
+        try std.testing.expectEqual(expected[i], symbol);
+    }
+}
+
 test "basic primaries are identified" {
     const test_cases = [_][]const u8{
         "1234",
author	jjanzen <jjanzen@jjanzen.ca>	2025-02-27 14:06:30 -0600
committer	jjanzen <jjanzen@jjanzen.ca>	2025-02-27 14:06:30 -0600
commit	49dc6a9b2b5a4d304c5caf637289c84ff6227e24 (patch)
tree	d518a3cb967060a61c509ca0590d4375f92b940e
parent	29f04314658fd9b2a93ae78fa235e71cae0d700d (diff)