diff options
author | jjanzen <jjanzen@jjanzen.ca> | 2025-02-27 14:06:30 -0600 |
---|---|---|
committer | jjanzen <jjanzen@jjanzen.ca> | 2025-02-27 14:06:30 -0600 |
commit | 49dc6a9b2b5a4d304c5caf637289c84ff6227e24 (patch) | |
tree | d518a3cb967060a61c509ca0590d4375f92b940e | |
parent | 29f04314658fd9b2a93ae78fa235e71cae0d700d (diff) |
add some new errors
-rw-r--r-- | src/parser.zig | 152 |
1 files changed, 119 insertions, 33 deletions
diff --git a/src/parser.zig b/src/parser.zig index 1b35596..eaa9c51 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -84,24 +84,30 @@ pub const Parser = struct { } } + const DecimalError = std.fmt.ParseIntError; + /// Determine whether the cursor points at a valid integer in base 10 /// Move the cursor past the integer and return it - fn identifyDecimal(self: *Parser) !u64 { + fn identifyDecimal(self: *Parser) DecimalError!u64 { const start = self.ch_pos; while (isDecimal(self.getByte(self.ch_pos))) { self.ch_pos += 1; } const end = self.ch_pos; - return std.fmt.parseInt(u64, self.input[start..end], 10) catch return error.NoDecimal; + return std.fmt.parseInt(u64, self.input[start..end], 10); } + const HexadecimalError = std.fmt.ParseIntError || error{ + WrongStartingCharacter, + }; + /// Determine whether the cursor points at a valid integer in base 16 /// Base 16 is identified by a number starting with # /// Move the cursor past the integer and return it - fn identifyHexadecimal(self: *Parser) !u64 { + fn identifyHexadecimal(self: *Parser) HexadecimalError!u64 { if (self.getByte(self.ch_pos) != '#') { - return error.NoHexadecimal; + return HexadecimalError.WrongStartingCharacter; } self.ch_pos += 1; @@ -112,14 +118,21 @@ pub const Parser = struct { } const end = self.ch_pos; - return std.fmt.parseInt(u64, self.input[start..end], 16) catch return error.NoHexadecimal; + return std.fmt.parseInt(u64, self.input[start..end], 16); } + const CharError = error{ + NoStartingDelimiter, + NoEndingDelimiter, + NoChar, + InvalidUtf8, + }; + /// Determine whether the cursor points at a valid unicode character wrapped in single quotes /// Move the cursor past the closing quote and return the character - fn identifyChar(self: *Parser) !u21 { + fn identifyChar(self: *Parser) CharError!u21 { if (self.getByte(self.ch_pos) != '\'') { - return error.NoChar; + return CharError.NoStartingDelimiter; } self.ch_pos += 1; @@ -128,63 +141,78 @@ pub const Parser = struct { while (self.getByte(self.ch_pos) != 0 and self.getByte(self.ch_pos) != '\'') { self.ch_pos += 1; + if (self.getByte(self.ch_pos) == ';' or self.getByte(self.ch_pos) == '\n') { + return CharError.NoEndingDelimiter; + } if (self.ch_pos - start > 4) { - return error.NoChar; + return CharError.NoEndingDelimiter; } } const end = self.ch_pos; self.ch_pos += 1; if (end <= start) { - return error.NoChar; + return CharError.NoChar; } - const view = std.unicode.Utf8View.init(self.input[start..end]) catch return error.NoChar; + const view = std.unicode.Utf8View.init(self.input[start..end]) catch return CharError.InvalidUtf8; var iter = view.iterator(); var count: u8 = 0; - var character: u21 = undefined; + var character: u21 = 0; while (iter.nextCodepoint()) |u| { character = u; count += 1; if (count > 1) { - return error.NoChar; + return CharError.NoEndingDelimiter; } } if (count != 1) { - return error.NoChar; + return CharError.NoEndingDelimiter; } return character; } + const StringError = error{ + NoStartingDelimiter, + NoEndingDelimiter, + NoString, + }; + /// Determine whether the cursor points at a valid string wrapped in double quotes /// Note that a string has at least one character in it and that it cannot have " or newlines in it /// Move the cursor past the string and return the string - fn identifyString(self: *Parser) ![]const u8 { + fn identifyString(self: *Parser) StringError![]const u8 { if (self.getByte(self.ch_pos) != '"') { - return error.NoString; + return StringError.NoStartingDelimiter; } self.ch_pos += 1; const start = self.ch_pos; while (self.getByte(self.ch_pos) != 0 and self.getByte(self.ch_pos) != '"') { if (self.getByte(self.ch_pos) == '\n') { - return error.NoString; + return StringError.NoEndingDelimiter; } self.ch_pos += 1; } const end = self.ch_pos; if (self.getByte(self.ch_pos) == '"') { self.ch_pos += 1; + } else { + return StringError.NoEndingDelimiter; } if (end <= start) { - return error.NoString; + return StringError.NoString; } return self.input[start..end]; } + const ConstantError = DecimalError || HexadecimalError || CharError || StringError || error{ + UnexpectedSymbol, + }; + /// Determine whether the cursor points at a valid constant /// The constant may be a string or a number /// Move the cursor past the constant and return it @@ -206,15 +234,19 @@ pub const Parser = struct { const string = try identifyString(self); return ConstantValue{ .string = string }; }, - else => return error.NoConstant, + else => return ConstantError.UnexpectedSymbol, } } + const SymbolError = error{ + UnexpectedSymbol, + }; + /// Determine whether the cursor points at a symbol /// A symbol starts with a letter and only has symbol characters after that point /// There is an exception that there are 30 special symbols of the form xH, xF, and xB where x is a single decimal digit /// Move the cursor past the symbol and return its name - fn identifySymbol(self: *Parser) ![]const u8 { + fn identifySymbol(self: *Parser) SymbolError![]const u8 { const start = self.ch_pos; if ((isLetter(self.getByte(self.ch_pos)) or self.getByte(self.ch_pos) == '_')) { self.ch_pos += 1; @@ -234,7 +266,7 @@ pub const Parser = struct { return self.input[start..end]; } - return error.NoSymbol; + return SymbolError.UnexpectedSymbol; } /// Get the number associated with a given symbol @@ -580,7 +612,7 @@ test "no symbols are found successfully" { for (test_cases) |case| { var parser = Parser.init(std.testing.allocator, case); const symbol = parser.identifySymbol(); - try std.testing.expectEqual(error.NoSymbol, symbol); + try std.testing.expectEqual(error.UnexpectedSymbol, symbol); parser.deinit(); } } @@ -655,10 +687,17 @@ test "malformed decimals are not recognized" { "12345678901234567890123456789012345678901234567890", }; - for (test_cases) |case| { - var parser = Parser.init(std.testing.allocator, case); + const expected = [_]Parser.DecimalError{ + Parser.DecimalError.InvalidCharacter, + Parser.DecimalError.InvalidCharacter, + Parser.DecimalError.InvalidCharacter, + Parser.DecimalError.Overflow, + }; + + for (0..test_cases.len) |i| { + var parser = Parser.init(std.testing.allocator, test_cases[i]); const symbol = parser.identifyDecimal(); - try std.testing.expectEqual(error.NoDecimal, symbol); + try std.testing.expectEqual(expected[i], symbol); parser.deinit(); } } @@ -693,10 +732,18 @@ test "malformed hexadecimals are not recognized" { "#", }; - for (test_cases) |case| { - var parser = Parser.init(std.testing.allocator, case); + const expected = [_]Parser.HexadecimalError{ + Parser.HexadecimalError.WrongStartingCharacter, + Parser.HexadecimalError.WrongStartingCharacter, + Parser.HexadecimalError.WrongStartingCharacter, + Parser.HexadecimalError.Overflow, + Parser.HexadecimalError.InvalidCharacter, + }; + + for (0..test_cases.len) |i| { + var parser = Parser.init(std.testing.allocator, test_cases[i]); const symbol = parser.identifyHexadecimal(); - try std.testing.expectEqual(error.NoHexadecimal, symbol); + try std.testing.expectEqual(expected[i], symbol); parser.deinit(); } } @@ -729,12 +776,22 @@ test "invalid unicode sequences are not characters" { "'as'", "''", "'", + "a", }; - for (test_cases) |case| { - var parser = Parser.init(std.testing.allocator, case); + const expected = [_]Parser.CharError{ + Parser.CharError.NoEndingDelimiter, + Parser.CharError.NoEndingDelimiter, + Parser.CharError.NoEndingDelimiter, + Parser.CharError.NoChar, + Parser.CharError.NoChar, + Parser.CharError.NoStartingDelimiter, + }; + + for (0..test_cases.len) |i| { + var parser = Parser.init(std.testing.allocator, test_cases[i]); const symbol = parser.identifyChar(); - try std.testing.expectEqual(error.NoChar, symbol); + try std.testing.expectEqual(expected[i], symbol); parser.deinit(); } } @@ -763,12 +820,20 @@ test "invalid strings are not recognized" { "\"\"", "\"", "\"\n\"", + "asdf", }; - for (test_cases) |case| { - var parser = Parser.init(std.testing.allocator, case); + const expected = [_]Parser.StringError{ + Parser.StringError.NoString, + Parser.StringError.NoEndingDelimiter, + Parser.StringError.NoEndingDelimiter, + Parser.StringError.NoStartingDelimiter, + }; + + for (0..test_cases.len) |i| { + var parser = Parser.init(std.testing.allocator, test_cases[i]); const symbol = parser.identifyString(); - try std.testing.expectEqual(error.NoString, symbol); + try std.testing.expectEqual(expected[i], symbol); parser.deinit(); } } @@ -799,6 +864,27 @@ test "constants are recognized" { } } +test "invalid constants are recognized" { + const test_cases = [_][]const u8{ + "$123", + " 123", + "'aa'", + }; + + const expected = [_]Parser.ConstantError{ + Parser.ConstantError.UnexpectedSymbol, + Parser.ConstantError.UnexpectedSymbol, + Parser.ConstantError.NoEndingDelimiter, + }; + + for (0..test_cases.len) |i| { + var parser = Parser.init(std.testing.allocator, test_cases[i]); + defer parser.deinit(); + const symbol = parser.identifyConstant(); + try std.testing.expectEqual(expected[i], symbol); + } +} + test "basic primaries are identified" { const test_cases = [_][]const u8{ "1234", |