aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjjanzen <jjanzen@jjanzen.ca>2025-02-27 14:06:30 -0600
committerjjanzen <jjanzen@jjanzen.ca>2025-02-27 14:06:30 -0600
commit49dc6a9b2b5a4d304c5caf637289c84ff6227e24 (patch)
treed518a3cb967060a61c509ca0590d4375f92b940e
parent29f04314658fd9b2a93ae78fa235e71cae0d700d (diff)
add some new errors
-rw-r--r--src/parser.zig152
1 files changed, 119 insertions, 33 deletions
diff --git a/src/parser.zig b/src/parser.zig
index 1b35596..eaa9c51 100644
--- a/src/parser.zig
+++ b/src/parser.zig
@@ -84,24 +84,30 @@ pub const Parser = struct {
}
}
+ const DecimalError = std.fmt.ParseIntError;
+
/// Determine whether the cursor points at a valid integer in base 10
/// Move the cursor past the integer and return it
- fn identifyDecimal(self: *Parser) !u64 {
+ fn identifyDecimal(self: *Parser) DecimalError!u64 {
const start = self.ch_pos;
while (isDecimal(self.getByte(self.ch_pos))) {
self.ch_pos += 1;
}
const end = self.ch_pos;
- return std.fmt.parseInt(u64, self.input[start..end], 10) catch return error.NoDecimal;
+ return std.fmt.parseInt(u64, self.input[start..end], 10);
}
+ const HexadecimalError = std.fmt.ParseIntError || error{
+ WrongStartingCharacter,
+ };
+
/// Determine whether the cursor points at a valid integer in base 16
/// Base 16 is identified by a number starting with #
/// Move the cursor past the integer and return it
- fn identifyHexadecimal(self: *Parser) !u64 {
+ fn identifyHexadecimal(self: *Parser) HexadecimalError!u64 {
if (self.getByte(self.ch_pos) != '#') {
- return error.NoHexadecimal;
+ return HexadecimalError.WrongStartingCharacter;
}
self.ch_pos += 1;
@@ -112,14 +118,21 @@ pub const Parser = struct {
}
const end = self.ch_pos;
- return std.fmt.parseInt(u64, self.input[start..end], 16) catch return error.NoHexadecimal;
+ return std.fmt.parseInt(u64, self.input[start..end], 16);
}
+ const CharError = error{
+ NoStartingDelimiter,
+ NoEndingDelimiter,
+ NoChar,
+ InvalidUtf8,
+ };
+
/// Determine whether the cursor points at a valid unicode character wrapped in single quotes
/// Move the cursor past the closing quote and return the character
- fn identifyChar(self: *Parser) !u21 {
+ fn identifyChar(self: *Parser) CharError!u21 {
if (self.getByte(self.ch_pos) != '\'') {
- return error.NoChar;
+ return CharError.NoStartingDelimiter;
}
self.ch_pos += 1;
@@ -128,63 +141,78 @@ pub const Parser = struct {
while (self.getByte(self.ch_pos) != 0 and self.getByte(self.ch_pos) != '\'') {
self.ch_pos += 1;
+ if (self.getByte(self.ch_pos) == ';' or self.getByte(self.ch_pos) == '\n') {
+ return CharError.NoEndingDelimiter;
+ }
if (self.ch_pos - start > 4) {
- return error.NoChar;
+ return CharError.NoEndingDelimiter;
}
}
const end = self.ch_pos;
self.ch_pos += 1;
if (end <= start) {
- return error.NoChar;
+ return CharError.NoChar;
}
- const view = std.unicode.Utf8View.init(self.input[start..end]) catch return error.NoChar;
+ const view = std.unicode.Utf8View.init(self.input[start..end]) catch return CharError.InvalidUtf8;
var iter = view.iterator();
var count: u8 = 0;
- var character: u21 = undefined;
+ var character: u21 = 0;
while (iter.nextCodepoint()) |u| {
character = u;
count += 1;
if (count > 1) {
- return error.NoChar;
+ return CharError.NoEndingDelimiter;
}
}
if (count != 1) {
- return error.NoChar;
+ return CharError.NoEndingDelimiter;
}
return character;
}
+ const StringError = error{
+ NoStartingDelimiter,
+ NoEndingDelimiter,
+ NoString,
+ };
+
/// Determine whether the cursor points at a valid string wrapped in double quotes
/// Note that a string has at least one character in it and that it cannot have " or newlines in it
/// Move the cursor past the string and return the string
- fn identifyString(self: *Parser) ![]const u8 {
+ fn identifyString(self: *Parser) StringError![]const u8 {
if (self.getByte(self.ch_pos) != '"') {
- return error.NoString;
+ return StringError.NoStartingDelimiter;
}
self.ch_pos += 1;
const start = self.ch_pos;
while (self.getByte(self.ch_pos) != 0 and self.getByte(self.ch_pos) != '"') {
if (self.getByte(self.ch_pos) == '\n') {
- return error.NoString;
+ return StringError.NoEndingDelimiter;
}
self.ch_pos += 1;
}
const end = self.ch_pos;
if (self.getByte(self.ch_pos) == '"') {
self.ch_pos += 1;
+ } else {
+ return StringError.NoEndingDelimiter;
}
if (end <= start) {
- return error.NoString;
+ return StringError.NoString;
}
return self.input[start..end];
}
+ const ConstantError = DecimalError || HexadecimalError || CharError || StringError || error{
+ UnexpectedSymbol,
+ };
+
/// Determine whether the cursor points at a valid constant
/// The constant may be a string or a number
/// Move the cursor past the constant and return it
@@ -206,15 +234,19 @@ pub const Parser = struct {
const string = try identifyString(self);
return ConstantValue{ .string = string };
},
- else => return error.NoConstant,
+ else => return ConstantError.UnexpectedSymbol,
}
}
+ const SymbolError = error{
+ UnexpectedSymbol,
+ };
+
/// Determine whether the cursor points at a symbol
/// A symbol starts with a letter and only has symbol characters after that point
/// There is an exception that there are 30 special symbols of the form xH, xF, and xB where x is a single decimal digit
/// Move the cursor past the symbol and return its name
- fn identifySymbol(self: *Parser) ![]const u8 {
+ fn identifySymbol(self: *Parser) SymbolError![]const u8 {
const start = self.ch_pos;
if ((isLetter(self.getByte(self.ch_pos)) or self.getByte(self.ch_pos) == '_')) {
self.ch_pos += 1;
@@ -234,7 +266,7 @@ pub const Parser = struct {
return self.input[start..end];
}
- return error.NoSymbol;
+ return SymbolError.UnexpectedSymbol;
}
/// Get the number associated with a given symbol
@@ -580,7 +612,7 @@ test "no symbols are found successfully" {
for (test_cases) |case| {
var parser = Parser.init(std.testing.allocator, case);
const symbol = parser.identifySymbol();
- try std.testing.expectEqual(error.NoSymbol, symbol);
+ try std.testing.expectEqual(error.UnexpectedSymbol, symbol);
parser.deinit();
}
}
@@ -655,10 +687,17 @@ test "malformed decimals are not recognized" {
"12345678901234567890123456789012345678901234567890",
};
- for (test_cases) |case| {
- var parser = Parser.init(std.testing.allocator, case);
+ const expected = [_]Parser.DecimalError{
+ Parser.DecimalError.InvalidCharacter,
+ Parser.DecimalError.InvalidCharacter,
+ Parser.DecimalError.InvalidCharacter,
+ Parser.DecimalError.Overflow,
+ };
+
+ for (0..test_cases.len) |i| {
+ var parser = Parser.init(std.testing.allocator, test_cases[i]);
const symbol = parser.identifyDecimal();
- try std.testing.expectEqual(error.NoDecimal, symbol);
+ try std.testing.expectEqual(expected[i], symbol);
parser.deinit();
}
}
@@ -693,10 +732,18 @@ test "malformed hexadecimals are not recognized" {
"#",
};
- for (test_cases) |case| {
- var parser = Parser.init(std.testing.allocator, case);
+ const expected = [_]Parser.HexadecimalError{
+ Parser.HexadecimalError.WrongStartingCharacter,
+ Parser.HexadecimalError.WrongStartingCharacter,
+ Parser.HexadecimalError.WrongStartingCharacter,
+ Parser.HexadecimalError.Overflow,
+ Parser.HexadecimalError.InvalidCharacter,
+ };
+
+ for (0..test_cases.len) |i| {
+ var parser = Parser.init(std.testing.allocator, test_cases[i]);
const symbol = parser.identifyHexadecimal();
- try std.testing.expectEqual(error.NoHexadecimal, symbol);
+ try std.testing.expectEqual(expected[i], symbol);
parser.deinit();
}
}
@@ -729,12 +776,22 @@ test "invalid unicode sequences are not characters" {
"'as'",
"''",
"'",
+ "a",
};
- for (test_cases) |case| {
- var parser = Parser.init(std.testing.allocator, case);
+ const expected = [_]Parser.CharError{
+ Parser.CharError.NoEndingDelimiter,
+ Parser.CharError.NoEndingDelimiter,
+ Parser.CharError.NoEndingDelimiter,
+ Parser.CharError.NoChar,
+ Parser.CharError.NoChar,
+ Parser.CharError.NoStartingDelimiter,
+ };
+
+ for (0..test_cases.len) |i| {
+ var parser = Parser.init(std.testing.allocator, test_cases[i]);
const symbol = parser.identifyChar();
- try std.testing.expectEqual(error.NoChar, symbol);
+ try std.testing.expectEqual(expected[i], symbol);
parser.deinit();
}
}
@@ -763,12 +820,20 @@ test "invalid strings are not recognized" {
"\"\"",
"\"",
"\"\n\"",
+ "asdf",
};
- for (test_cases) |case| {
- var parser = Parser.init(std.testing.allocator, case);
+ const expected = [_]Parser.StringError{
+ Parser.StringError.NoString,
+ Parser.StringError.NoEndingDelimiter,
+ Parser.StringError.NoEndingDelimiter,
+ Parser.StringError.NoStartingDelimiter,
+ };
+
+ for (0..test_cases.len) |i| {
+ var parser = Parser.init(std.testing.allocator, test_cases[i]);
const symbol = parser.identifyString();
- try std.testing.expectEqual(error.NoString, symbol);
+ try std.testing.expectEqual(expected[i], symbol);
parser.deinit();
}
}
@@ -799,6 +864,27 @@ test "constants are recognized" {
}
}
+test "invalid constants are recognized" {
+ const test_cases = [_][]const u8{
+ "$123",
+ " 123",
+ "'aa'",
+ };
+
+ const expected = [_]Parser.ConstantError{
+ Parser.ConstantError.UnexpectedSymbol,
+ Parser.ConstantError.UnexpectedSymbol,
+ Parser.ConstantError.NoEndingDelimiter,
+ };
+
+ for (0..test_cases.len) |i| {
+ var parser = Parser.init(std.testing.allocator, test_cases[i]);
+ defer parser.deinit();
+ const symbol = parser.identifyConstant();
+ try std.testing.expectEqual(expected[i], symbol);
+ }
+}
+
test "basic primaries are identified" {
const test_cases = [_][]const u8{
"1234",