add some new errors
This commit is contained in:
parent
29f0431465
commit
49dc6a9b2b
1 changed files with 119 additions and 33 deletions
152
src/parser.zig
152
src/parser.zig
|
@ -84,24 +84,30 @@ pub const Parser = struct {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const DecimalError = std.fmt.ParseIntError;
|
||||||
|
|
||||||
/// Determine whether the cursor points at a valid integer in base 10
|
/// Determine whether the cursor points at a valid integer in base 10
|
||||||
/// Move the cursor past the integer and return it
|
/// Move the cursor past the integer and return it
|
||||||
fn identifyDecimal(self: *Parser) !u64 {
|
fn identifyDecimal(self: *Parser) DecimalError!u64 {
|
||||||
const start = self.ch_pos;
|
const start = self.ch_pos;
|
||||||
while (isDecimal(self.getByte(self.ch_pos))) {
|
while (isDecimal(self.getByte(self.ch_pos))) {
|
||||||
self.ch_pos += 1;
|
self.ch_pos += 1;
|
||||||
}
|
}
|
||||||
const end = self.ch_pos;
|
const end = self.ch_pos;
|
||||||
|
|
||||||
return std.fmt.parseInt(u64, self.input[start..end], 10) catch return error.NoDecimal;
|
return std.fmt.parseInt(u64, self.input[start..end], 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const HexadecimalError = std.fmt.ParseIntError || error{
|
||||||
|
WrongStartingCharacter,
|
||||||
|
};
|
||||||
|
|
||||||
/// Determine whether the cursor points at a valid integer in base 16
|
/// Determine whether the cursor points at a valid integer in base 16
|
||||||
/// Base 16 is identified by a number starting with #
|
/// Base 16 is identified by a number starting with #
|
||||||
/// Move the cursor past the integer and return it
|
/// Move the cursor past the integer and return it
|
||||||
fn identifyHexadecimal(self: *Parser) !u64 {
|
fn identifyHexadecimal(self: *Parser) HexadecimalError!u64 {
|
||||||
if (self.getByte(self.ch_pos) != '#') {
|
if (self.getByte(self.ch_pos) != '#') {
|
||||||
return error.NoHexadecimal;
|
return HexadecimalError.WrongStartingCharacter;
|
||||||
}
|
}
|
||||||
|
|
||||||
self.ch_pos += 1;
|
self.ch_pos += 1;
|
||||||
|
@ -112,14 +118,21 @@ pub const Parser = struct {
|
||||||
}
|
}
|
||||||
const end = self.ch_pos;
|
const end = self.ch_pos;
|
||||||
|
|
||||||
return std.fmt.parseInt(u64, self.input[start..end], 16) catch return error.NoHexadecimal;
|
return std.fmt.parseInt(u64, self.input[start..end], 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const CharError = error{
|
||||||
|
NoStartingDelimiter,
|
||||||
|
NoEndingDelimiter,
|
||||||
|
NoChar,
|
||||||
|
InvalidUtf8,
|
||||||
|
};
|
||||||
|
|
||||||
/// Determine whether the cursor points at a valid unicode character wrapped in single quotes
|
/// Determine whether the cursor points at a valid unicode character wrapped in single quotes
|
||||||
/// Move the cursor past the closing quote and return the character
|
/// Move the cursor past the closing quote and return the character
|
||||||
fn identifyChar(self: *Parser) !u21 {
|
fn identifyChar(self: *Parser) CharError!u21 {
|
||||||
if (self.getByte(self.ch_pos) != '\'') {
|
if (self.getByte(self.ch_pos) != '\'') {
|
||||||
return error.NoChar;
|
return CharError.NoStartingDelimiter;
|
||||||
}
|
}
|
||||||
|
|
||||||
self.ch_pos += 1;
|
self.ch_pos += 1;
|
||||||
|
@ -128,63 +141,78 @@ pub const Parser = struct {
|
||||||
while (self.getByte(self.ch_pos) != 0 and self.getByte(self.ch_pos) != '\'') {
|
while (self.getByte(self.ch_pos) != 0 and self.getByte(self.ch_pos) != '\'') {
|
||||||
self.ch_pos += 1;
|
self.ch_pos += 1;
|
||||||
|
|
||||||
|
if (self.getByte(self.ch_pos) == ';' or self.getByte(self.ch_pos) == '\n') {
|
||||||
|
return CharError.NoEndingDelimiter;
|
||||||
|
}
|
||||||
if (self.ch_pos - start > 4) {
|
if (self.ch_pos - start > 4) {
|
||||||
return error.NoChar;
|
return CharError.NoEndingDelimiter;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const end = self.ch_pos;
|
const end = self.ch_pos;
|
||||||
self.ch_pos += 1;
|
self.ch_pos += 1;
|
||||||
|
|
||||||
if (end <= start) {
|
if (end <= start) {
|
||||||
return error.NoChar;
|
return CharError.NoChar;
|
||||||
}
|
}
|
||||||
|
|
||||||
const view = std.unicode.Utf8View.init(self.input[start..end]) catch return error.NoChar;
|
const view = std.unicode.Utf8View.init(self.input[start..end]) catch return CharError.InvalidUtf8;
|
||||||
var iter = view.iterator();
|
var iter = view.iterator();
|
||||||
var count: u8 = 0;
|
var count: u8 = 0;
|
||||||
var character: u21 = undefined;
|
var character: u21 = 0;
|
||||||
while (iter.nextCodepoint()) |u| {
|
while (iter.nextCodepoint()) |u| {
|
||||||
character = u;
|
character = u;
|
||||||
count += 1;
|
count += 1;
|
||||||
if (count > 1) {
|
if (count > 1) {
|
||||||
return error.NoChar;
|
return CharError.NoEndingDelimiter;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (count != 1) {
|
if (count != 1) {
|
||||||
return error.NoChar;
|
return CharError.NoEndingDelimiter;
|
||||||
}
|
}
|
||||||
|
|
||||||
return character;
|
return character;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const StringError = error{
|
||||||
|
NoStartingDelimiter,
|
||||||
|
NoEndingDelimiter,
|
||||||
|
NoString,
|
||||||
|
};
|
||||||
|
|
||||||
/// Determine whether the cursor points at a valid string wrapped in double quotes
|
/// Determine whether the cursor points at a valid string wrapped in double quotes
|
||||||
/// Note that a string has at least one character in it and that it cannot have " or newlines in it
|
/// Note that a string has at least one character in it and that it cannot have " or newlines in it
|
||||||
/// Move the cursor past the string and return the string
|
/// Move the cursor past the string and return the string
|
||||||
fn identifyString(self: *Parser) ![]const u8 {
|
fn identifyString(self: *Parser) StringError![]const u8 {
|
||||||
if (self.getByte(self.ch_pos) != '"') {
|
if (self.getByte(self.ch_pos) != '"') {
|
||||||
return error.NoString;
|
return StringError.NoStartingDelimiter;
|
||||||
}
|
}
|
||||||
|
|
||||||
self.ch_pos += 1;
|
self.ch_pos += 1;
|
||||||
const start = self.ch_pos;
|
const start = self.ch_pos;
|
||||||
while (self.getByte(self.ch_pos) != 0 and self.getByte(self.ch_pos) != '"') {
|
while (self.getByte(self.ch_pos) != 0 and self.getByte(self.ch_pos) != '"') {
|
||||||
if (self.getByte(self.ch_pos) == '\n') {
|
if (self.getByte(self.ch_pos) == '\n') {
|
||||||
return error.NoString;
|
return StringError.NoEndingDelimiter;
|
||||||
}
|
}
|
||||||
self.ch_pos += 1;
|
self.ch_pos += 1;
|
||||||
}
|
}
|
||||||
const end = self.ch_pos;
|
const end = self.ch_pos;
|
||||||
if (self.getByte(self.ch_pos) == '"') {
|
if (self.getByte(self.ch_pos) == '"') {
|
||||||
self.ch_pos += 1;
|
self.ch_pos += 1;
|
||||||
|
} else {
|
||||||
|
return StringError.NoEndingDelimiter;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (end <= start) {
|
if (end <= start) {
|
||||||
return error.NoString;
|
return StringError.NoString;
|
||||||
}
|
}
|
||||||
|
|
||||||
return self.input[start..end];
|
return self.input[start..end];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const ConstantError = DecimalError || HexadecimalError || CharError || StringError || error{
|
||||||
|
UnexpectedSymbol,
|
||||||
|
};
|
||||||
|
|
||||||
/// Determine whether the cursor points at a valid constant
|
/// Determine whether the cursor points at a valid constant
|
||||||
/// The constant may be a string or a number
|
/// The constant may be a string or a number
|
||||||
/// Move the cursor past the constant and return it
|
/// Move the cursor past the constant and return it
|
||||||
|
@ -206,15 +234,19 @@ pub const Parser = struct {
|
||||||
const string = try identifyString(self);
|
const string = try identifyString(self);
|
||||||
return ConstantValue{ .string = string };
|
return ConstantValue{ .string = string };
|
||||||
},
|
},
|
||||||
else => return error.NoConstant,
|
else => return ConstantError.UnexpectedSymbol,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const SymbolError = error{
|
||||||
|
UnexpectedSymbol,
|
||||||
|
};
|
||||||
|
|
||||||
/// Determine whether the cursor points at a symbol
|
/// Determine whether the cursor points at a symbol
|
||||||
/// A symbol starts with a letter and only has symbol characters after that point
|
/// A symbol starts with a letter and only has symbol characters after that point
|
||||||
/// There is an exception that there are 30 special symbols of the form xH, xF, and xB where x is a single decimal digit
|
/// There is an exception that there are 30 special symbols of the form xH, xF, and xB where x is a single decimal digit
|
||||||
/// Move the cursor past the symbol and return its name
|
/// Move the cursor past the symbol and return its name
|
||||||
fn identifySymbol(self: *Parser) ![]const u8 {
|
fn identifySymbol(self: *Parser) SymbolError![]const u8 {
|
||||||
const start = self.ch_pos;
|
const start = self.ch_pos;
|
||||||
if ((isLetter(self.getByte(self.ch_pos)) or self.getByte(self.ch_pos) == '_')) {
|
if ((isLetter(self.getByte(self.ch_pos)) or self.getByte(self.ch_pos) == '_')) {
|
||||||
self.ch_pos += 1;
|
self.ch_pos += 1;
|
||||||
|
@ -234,7 +266,7 @@ pub const Parser = struct {
|
||||||
return self.input[start..end];
|
return self.input[start..end];
|
||||||
}
|
}
|
||||||
|
|
||||||
return error.NoSymbol;
|
return SymbolError.UnexpectedSymbol;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the number associated with a given symbol
|
/// Get the number associated with a given symbol
|
||||||
|
@ -580,7 +612,7 @@ test "no symbols are found successfully" {
|
||||||
for (test_cases) |case| {
|
for (test_cases) |case| {
|
||||||
var parser = Parser.init(std.testing.allocator, case);
|
var parser = Parser.init(std.testing.allocator, case);
|
||||||
const symbol = parser.identifySymbol();
|
const symbol = parser.identifySymbol();
|
||||||
try std.testing.expectEqual(error.NoSymbol, symbol);
|
try std.testing.expectEqual(error.UnexpectedSymbol, symbol);
|
||||||
parser.deinit();
|
parser.deinit();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -655,10 +687,17 @@ test "malformed decimals are not recognized" {
|
||||||
"12345678901234567890123456789012345678901234567890",
|
"12345678901234567890123456789012345678901234567890",
|
||||||
};
|
};
|
||||||
|
|
||||||
for (test_cases) |case| {
|
const expected = [_]Parser.DecimalError{
|
||||||
var parser = Parser.init(std.testing.allocator, case);
|
Parser.DecimalError.InvalidCharacter,
|
||||||
|
Parser.DecimalError.InvalidCharacter,
|
||||||
|
Parser.DecimalError.InvalidCharacter,
|
||||||
|
Parser.DecimalError.Overflow,
|
||||||
|
};
|
||||||
|
|
||||||
|
for (0..test_cases.len) |i| {
|
||||||
|
var parser = Parser.init(std.testing.allocator, test_cases[i]);
|
||||||
const symbol = parser.identifyDecimal();
|
const symbol = parser.identifyDecimal();
|
||||||
try std.testing.expectEqual(error.NoDecimal, symbol);
|
try std.testing.expectEqual(expected[i], symbol);
|
||||||
parser.deinit();
|
parser.deinit();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -693,10 +732,18 @@ test "malformed hexadecimals are not recognized" {
|
||||||
"#",
|
"#",
|
||||||
};
|
};
|
||||||
|
|
||||||
for (test_cases) |case| {
|
const expected = [_]Parser.HexadecimalError{
|
||||||
var parser = Parser.init(std.testing.allocator, case);
|
Parser.HexadecimalError.WrongStartingCharacter,
|
||||||
|
Parser.HexadecimalError.WrongStartingCharacter,
|
||||||
|
Parser.HexadecimalError.WrongStartingCharacter,
|
||||||
|
Parser.HexadecimalError.Overflow,
|
||||||
|
Parser.HexadecimalError.InvalidCharacter,
|
||||||
|
};
|
||||||
|
|
||||||
|
for (0..test_cases.len) |i| {
|
||||||
|
var parser = Parser.init(std.testing.allocator, test_cases[i]);
|
||||||
const symbol = parser.identifyHexadecimal();
|
const symbol = parser.identifyHexadecimal();
|
||||||
try std.testing.expectEqual(error.NoHexadecimal, symbol);
|
try std.testing.expectEqual(expected[i], symbol);
|
||||||
parser.deinit();
|
parser.deinit();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -729,12 +776,22 @@ test "invalid unicode sequences are not characters" {
|
||||||
"'as'",
|
"'as'",
|
||||||
"''",
|
"''",
|
||||||
"'",
|
"'",
|
||||||
|
"a",
|
||||||
};
|
};
|
||||||
|
|
||||||
for (test_cases) |case| {
|
const expected = [_]Parser.CharError{
|
||||||
var parser = Parser.init(std.testing.allocator, case);
|
Parser.CharError.NoEndingDelimiter,
|
||||||
|
Parser.CharError.NoEndingDelimiter,
|
||||||
|
Parser.CharError.NoEndingDelimiter,
|
||||||
|
Parser.CharError.NoChar,
|
||||||
|
Parser.CharError.NoChar,
|
||||||
|
Parser.CharError.NoStartingDelimiter,
|
||||||
|
};
|
||||||
|
|
||||||
|
for (0..test_cases.len) |i| {
|
||||||
|
var parser = Parser.init(std.testing.allocator, test_cases[i]);
|
||||||
const symbol = parser.identifyChar();
|
const symbol = parser.identifyChar();
|
||||||
try std.testing.expectEqual(error.NoChar, symbol);
|
try std.testing.expectEqual(expected[i], symbol);
|
||||||
parser.deinit();
|
parser.deinit();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -763,12 +820,20 @@ test "invalid strings are not recognized" {
|
||||||
"\"\"",
|
"\"\"",
|
||||||
"\"",
|
"\"",
|
||||||
"\"\n\"",
|
"\"\n\"",
|
||||||
|
"asdf",
|
||||||
};
|
};
|
||||||
|
|
||||||
for (test_cases) |case| {
|
const expected = [_]Parser.StringError{
|
||||||
var parser = Parser.init(std.testing.allocator, case);
|
Parser.StringError.NoString,
|
||||||
|
Parser.StringError.NoEndingDelimiter,
|
||||||
|
Parser.StringError.NoEndingDelimiter,
|
||||||
|
Parser.StringError.NoStartingDelimiter,
|
||||||
|
};
|
||||||
|
|
||||||
|
for (0..test_cases.len) |i| {
|
||||||
|
var parser = Parser.init(std.testing.allocator, test_cases[i]);
|
||||||
const symbol = parser.identifyString();
|
const symbol = parser.identifyString();
|
||||||
try std.testing.expectEqual(error.NoString, symbol);
|
try std.testing.expectEqual(expected[i], symbol);
|
||||||
parser.deinit();
|
parser.deinit();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -799,6 +864,27 @@ test "constants are recognized" {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test "invalid constants are recognized" {
|
||||||
|
const test_cases = [_][]const u8{
|
||||||
|
"$123",
|
||||||
|
" 123",
|
||||||
|
"'aa'",
|
||||||
|
};
|
||||||
|
|
||||||
|
const expected = [_]Parser.ConstantError{
|
||||||
|
Parser.ConstantError.UnexpectedSymbol,
|
||||||
|
Parser.ConstantError.UnexpectedSymbol,
|
||||||
|
Parser.ConstantError.NoEndingDelimiter,
|
||||||
|
};
|
||||||
|
|
||||||
|
for (0..test_cases.len) |i| {
|
||||||
|
var parser = Parser.init(std.testing.allocator, test_cases[i]);
|
||||||
|
defer parser.deinit();
|
||||||
|
const symbol = parser.identifyConstant();
|
||||||
|
try std.testing.expectEqual(expected[i], symbol);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
test "basic primaries are identified" {
|
test "basic primaries are identified" {
|
||||||
const test_cases = [_][]const u8{
|
const test_cases = [_][]const u8{
|
||||||
"1234",
|
"1234",
|
||||||
|
|
Loading…
Add table
Reference in a new issue