add some new errors

This commit is contained in:
jjanzen 2025-02-27 14:06:30 -06:00
parent 29f0431465
commit 49dc6a9b2b

View file

@ -84,24 +84,30 @@ pub const Parser = struct {
} }
} }
const DecimalError = std.fmt.ParseIntError;
/// Determine whether the cursor points at a valid integer in base 10 /// Determine whether the cursor points at a valid integer in base 10
/// Move the cursor past the integer and return it /// Move the cursor past the integer and return it
fn identifyDecimal(self: *Parser) !u64 { fn identifyDecimal(self: *Parser) DecimalError!u64 {
const start = self.ch_pos; const start = self.ch_pos;
while (isDecimal(self.getByte(self.ch_pos))) { while (isDecimal(self.getByte(self.ch_pos))) {
self.ch_pos += 1; self.ch_pos += 1;
} }
const end = self.ch_pos; const end = self.ch_pos;
return std.fmt.parseInt(u64, self.input[start..end], 10) catch return error.NoDecimal; return std.fmt.parseInt(u64, self.input[start..end], 10);
} }
const HexadecimalError = std.fmt.ParseIntError || error{
WrongStartingCharacter,
};
/// Determine whether the cursor points at a valid integer in base 16 /// Determine whether the cursor points at a valid integer in base 16
/// Base 16 is identified by a number starting with # /// Base 16 is identified by a number starting with #
/// Move the cursor past the integer and return it /// Move the cursor past the integer and return it
fn identifyHexadecimal(self: *Parser) !u64 { fn identifyHexadecimal(self: *Parser) HexadecimalError!u64 {
if (self.getByte(self.ch_pos) != '#') { if (self.getByte(self.ch_pos) != '#') {
return error.NoHexadecimal; return HexadecimalError.WrongStartingCharacter;
} }
self.ch_pos += 1; self.ch_pos += 1;
@ -112,14 +118,21 @@ pub const Parser = struct {
} }
const end = self.ch_pos; const end = self.ch_pos;
return std.fmt.parseInt(u64, self.input[start..end], 16) catch return error.NoHexadecimal; return std.fmt.parseInt(u64, self.input[start..end], 16);
} }
const CharError = error{
NoStartingDelimiter,
NoEndingDelimiter,
NoChar,
InvalidUtf8,
};
/// Determine whether the cursor points at a valid unicode character wrapped in single quotes /// Determine whether the cursor points at a valid unicode character wrapped in single quotes
/// Move the cursor past the closing quote and return the character /// Move the cursor past the closing quote and return the character
fn identifyChar(self: *Parser) !u21 { fn identifyChar(self: *Parser) CharError!u21 {
if (self.getByte(self.ch_pos) != '\'') { if (self.getByte(self.ch_pos) != '\'') {
return error.NoChar; return CharError.NoStartingDelimiter;
} }
self.ch_pos += 1; self.ch_pos += 1;
@ -128,63 +141,78 @@ pub const Parser = struct {
while (self.getByte(self.ch_pos) != 0 and self.getByte(self.ch_pos) != '\'') { while (self.getByte(self.ch_pos) != 0 and self.getByte(self.ch_pos) != '\'') {
self.ch_pos += 1; self.ch_pos += 1;
if (self.getByte(self.ch_pos) == ';' or self.getByte(self.ch_pos) == '\n') {
return CharError.NoEndingDelimiter;
}
if (self.ch_pos - start > 4) { if (self.ch_pos - start > 4) {
return error.NoChar; return CharError.NoEndingDelimiter;
} }
} }
const end = self.ch_pos; const end = self.ch_pos;
self.ch_pos += 1; self.ch_pos += 1;
if (end <= start) { if (end <= start) {
return error.NoChar; return CharError.NoChar;
} }
const view = std.unicode.Utf8View.init(self.input[start..end]) catch return error.NoChar; const view = std.unicode.Utf8View.init(self.input[start..end]) catch return CharError.InvalidUtf8;
var iter = view.iterator(); var iter = view.iterator();
var count: u8 = 0; var count: u8 = 0;
var character: u21 = undefined; var character: u21 = 0;
while (iter.nextCodepoint()) |u| { while (iter.nextCodepoint()) |u| {
character = u; character = u;
count += 1; count += 1;
if (count > 1) { if (count > 1) {
return error.NoChar; return CharError.NoEndingDelimiter;
} }
} }
if (count != 1) { if (count != 1) {
return error.NoChar; return CharError.NoEndingDelimiter;
} }
return character; return character;
} }
const StringError = error{
NoStartingDelimiter,
NoEndingDelimiter,
NoString,
};
/// Determine whether the cursor points at a valid string wrapped in double quotes /// Determine whether the cursor points at a valid string wrapped in double quotes
/// Note that a string has at least one character in it and that it cannot have " or newlines in it /// Note that a string has at least one character in it and that it cannot have " or newlines in it
/// Move the cursor past the string and return the string /// Move the cursor past the string and return the string
fn identifyString(self: *Parser) ![]const u8 { fn identifyString(self: *Parser) StringError![]const u8 {
if (self.getByte(self.ch_pos) != '"') { if (self.getByte(self.ch_pos) != '"') {
return error.NoString; return StringError.NoStartingDelimiter;
} }
self.ch_pos += 1; self.ch_pos += 1;
const start = self.ch_pos; const start = self.ch_pos;
while (self.getByte(self.ch_pos) != 0 and self.getByte(self.ch_pos) != '"') { while (self.getByte(self.ch_pos) != 0 and self.getByte(self.ch_pos) != '"') {
if (self.getByte(self.ch_pos) == '\n') { if (self.getByte(self.ch_pos) == '\n') {
return error.NoString; return StringError.NoEndingDelimiter;
} }
self.ch_pos += 1; self.ch_pos += 1;
} }
const end = self.ch_pos; const end = self.ch_pos;
if (self.getByte(self.ch_pos) == '"') { if (self.getByte(self.ch_pos) == '"') {
self.ch_pos += 1; self.ch_pos += 1;
} else {
return StringError.NoEndingDelimiter;
} }
if (end <= start) { if (end <= start) {
return error.NoString; return StringError.NoString;
} }
return self.input[start..end]; return self.input[start..end];
} }
const ConstantError = DecimalError || HexadecimalError || CharError || StringError || error{
UnexpectedSymbol,
};
/// Determine whether the cursor points at a valid constant /// Determine whether the cursor points at a valid constant
/// The constant may be a string or a number /// The constant may be a string or a number
/// Move the cursor past the constant and return it /// Move the cursor past the constant and return it
@ -206,15 +234,19 @@ pub const Parser = struct {
const string = try identifyString(self); const string = try identifyString(self);
return ConstantValue{ .string = string }; return ConstantValue{ .string = string };
}, },
else => return error.NoConstant, else => return ConstantError.UnexpectedSymbol,
} }
} }
const SymbolError = error{
UnexpectedSymbol,
};
/// Determine whether the cursor points at a symbol /// Determine whether the cursor points at a symbol
/// A symbol starts with a letter and only has symbol characters after that point /// A symbol starts with a letter and only has symbol characters after that point
/// There is an exception that there are 30 special symbols of the form xH, xF, and xB where x is a single decimal digit /// There is an exception that there are 30 special symbols of the form xH, xF, and xB where x is a single decimal digit
/// Move the cursor past the symbol and return its name /// Move the cursor past the symbol and return its name
fn identifySymbol(self: *Parser) ![]const u8 { fn identifySymbol(self: *Parser) SymbolError![]const u8 {
const start = self.ch_pos; const start = self.ch_pos;
if ((isLetter(self.getByte(self.ch_pos)) or self.getByte(self.ch_pos) == '_')) { if ((isLetter(self.getByte(self.ch_pos)) or self.getByte(self.ch_pos) == '_')) {
self.ch_pos += 1; self.ch_pos += 1;
@ -234,7 +266,7 @@ pub const Parser = struct {
return self.input[start..end]; return self.input[start..end];
} }
return error.NoSymbol; return SymbolError.UnexpectedSymbol;
} }
/// Get the number associated with a given symbol /// Get the number associated with a given symbol
@ -580,7 +612,7 @@ test "no symbols are found successfully" {
for (test_cases) |case| { for (test_cases) |case| {
var parser = Parser.init(std.testing.allocator, case); var parser = Parser.init(std.testing.allocator, case);
const symbol = parser.identifySymbol(); const symbol = parser.identifySymbol();
try std.testing.expectEqual(error.NoSymbol, symbol); try std.testing.expectEqual(error.UnexpectedSymbol, symbol);
parser.deinit(); parser.deinit();
} }
} }
@ -655,10 +687,17 @@ test "malformed decimals are not recognized" {
"12345678901234567890123456789012345678901234567890", "12345678901234567890123456789012345678901234567890",
}; };
for (test_cases) |case| { const expected = [_]Parser.DecimalError{
var parser = Parser.init(std.testing.allocator, case); Parser.DecimalError.InvalidCharacter,
Parser.DecimalError.InvalidCharacter,
Parser.DecimalError.InvalidCharacter,
Parser.DecimalError.Overflow,
};
for (0..test_cases.len) |i| {
var parser = Parser.init(std.testing.allocator, test_cases[i]);
const symbol = parser.identifyDecimal(); const symbol = parser.identifyDecimal();
try std.testing.expectEqual(error.NoDecimal, symbol); try std.testing.expectEqual(expected[i], symbol);
parser.deinit(); parser.deinit();
} }
} }
@ -693,10 +732,18 @@ test "malformed hexadecimals are not recognized" {
"#", "#",
}; };
for (test_cases) |case| { const expected = [_]Parser.HexadecimalError{
var parser = Parser.init(std.testing.allocator, case); Parser.HexadecimalError.WrongStartingCharacter,
Parser.HexadecimalError.WrongStartingCharacter,
Parser.HexadecimalError.WrongStartingCharacter,
Parser.HexadecimalError.Overflow,
Parser.HexadecimalError.InvalidCharacter,
};
for (0..test_cases.len) |i| {
var parser = Parser.init(std.testing.allocator, test_cases[i]);
const symbol = parser.identifyHexadecimal(); const symbol = parser.identifyHexadecimal();
try std.testing.expectEqual(error.NoHexadecimal, symbol); try std.testing.expectEqual(expected[i], symbol);
parser.deinit(); parser.deinit();
} }
} }
@ -729,12 +776,22 @@ test "invalid unicode sequences are not characters" {
"'as'", "'as'",
"''", "''",
"'", "'",
"a",
}; };
for (test_cases) |case| { const expected = [_]Parser.CharError{
var parser = Parser.init(std.testing.allocator, case); Parser.CharError.NoEndingDelimiter,
Parser.CharError.NoEndingDelimiter,
Parser.CharError.NoEndingDelimiter,
Parser.CharError.NoChar,
Parser.CharError.NoChar,
Parser.CharError.NoStartingDelimiter,
};
for (0..test_cases.len) |i| {
var parser = Parser.init(std.testing.allocator, test_cases[i]);
const symbol = parser.identifyChar(); const symbol = parser.identifyChar();
try std.testing.expectEqual(error.NoChar, symbol); try std.testing.expectEqual(expected[i], symbol);
parser.deinit(); parser.deinit();
} }
} }
@ -763,12 +820,20 @@ test "invalid strings are not recognized" {
"\"\"", "\"\"",
"\"", "\"",
"\"\n\"", "\"\n\"",
"asdf",
}; };
for (test_cases) |case| { const expected = [_]Parser.StringError{
var parser = Parser.init(std.testing.allocator, case); Parser.StringError.NoString,
Parser.StringError.NoEndingDelimiter,
Parser.StringError.NoEndingDelimiter,
Parser.StringError.NoStartingDelimiter,
};
for (0..test_cases.len) |i| {
var parser = Parser.init(std.testing.allocator, test_cases[i]);
const symbol = parser.identifyString(); const symbol = parser.identifyString();
try std.testing.expectEqual(error.NoString, symbol); try std.testing.expectEqual(expected[i], symbol);
parser.deinit(); parser.deinit();
} }
} }
@ -799,6 +864,27 @@ test "constants are recognized" {
} }
} }
test "invalid constants are recognized" {
const test_cases = [_][]const u8{
"$123",
" 123",
"'aa'",
};
const expected = [_]Parser.ConstantError{
Parser.ConstantError.UnexpectedSymbol,
Parser.ConstantError.UnexpectedSymbol,
Parser.ConstantError.NoEndingDelimiter,
};
for (0..test_cases.len) |i| {
var parser = Parser.init(std.testing.allocator, test_cases[i]);
defer parser.deinit();
const symbol = parser.identifyConstant();
try std.testing.expectEqual(expected[i], symbol);
}
}
test "basic primaries are identified" { test "basic primaries are identified" {
const test_cases = [_][]const u8{ const test_cases = [_][]const u8{
"1234", "1234",