fully working lexer

This commit is contained in:
jjanzen 2025-03-22 14:05:43 -05:00
parent 0d6277a3a7
commit 2ef4a21175
2 changed files with 77 additions and 5 deletions

View file

@ -18,16 +18,21 @@ data Token = TokLPar
isWhitespace :: Char -> Bool isWhitespace :: Char -> Bool
isWhitespace x = x == ' ' || x == '\t' || x == '\n' || x == '\r' isWhitespace x = x == ' ' || x == '\t' || x == '\n' || x == '\r'
data StrState = StrStart skipComment :: String -> String
| StrInside skipComment "" = ""
| StrEscape skipComment (x:xs) | x == '\n' = xs
| otherwise = skipComment xs
identifySymbol :: String -> (String, String) identifySymbol :: String -> (String, String)
identifySymbol "" = ("", "") identifySymbol "" = ("", "")
identifySymbol (x:xs) | isWhitespace x = (x:xs, "") identifySymbol (x:xs) | isWhitespace x || x == '(' || x == ')' || x == ';' = (x:xs, "")
| otherwise = case identifySymbol xs of | otherwise = case identifySymbol xs of
(ys, zs) -> (ys, x:zs) (ys, zs) -> (ys, x:zs)
data StrState = StrStart
| StrInside
| StrEscape
identifyString :: String -> StrState -> Maybe (String, String) identifyString :: String -> StrState -> Maybe (String, String)
identifyString ('"':xs) StrStart = case identifyString xs StrInside of identifyString ('"':xs) StrStart = case identifyString xs StrInside of
Nothing -> Nothing Nothing -> Nothing
@ -140,6 +145,7 @@ identifyNum (x:xs) NumExponentialComponent | isDigit x = case identifyNum xs Num
lexStr :: String -> [Token] lexStr :: String -> [Token]
lexStr "" = [] lexStr "" = []
lexStr (';':xs) = lexStr (skipComment xs)
lexStr ('(':xs) = TokLPar : lexStr xs lexStr ('(':xs) = TokLPar : lexStr xs
lexStr (')':xs) = TokRPar : lexStr xs lexStr (')':xs) = TokRPar : lexStr xs
lexStr ('"':xs) = case identifyString ('"':xs) StrStart lexStr ('"':xs) = case identifyString ('"':xs) StrStart

View file

@ -80,6 +80,71 @@ testUnclosedStringsAreSymbols = TestCase (assertEqual
[Lib.TokSymbol "\"hello", Lib.TokSymbol "world"] [Lib.TokSymbol "\"hello", Lib.TokSymbol "world"]
(lexStr "\"hello world")) (lexStr "\"hello world"))
testLexComplexProgram :: Test
testLexComplexProgram = TestCase (assertEqual
"lex an entire program"
[
Lib.TokLPar,
Lib.TokSymbol "defun",
Lib.TokSymbol "pomodoro-toggle-state",
Lib.TokLPar,
Lib.TokRPar,
Lib.TokStr "Restart the pomodoro timer and switch to the other state.",
Lib.TokLPar,
Lib.TokSymbol "interactive",
Lib.TokRPar,
Lib.TokLPar,
Lib.TokSymbol "cond",
Lib.TokLPar,
Lib.TokLPar,
Lib.TokSymbol "equal",
Lib.TokSymbol "pomodoro-state",
Lib.TokQuote,
Lib.TokSymbol "working",
Lib.TokRPar,
Lib.TokLPar,
Lib.TokSymbol "setq",
Lib.TokSymbol "pomodoro-state",
Lib.TokQuote,
Lib.TokSymbol "break",
Lib.TokRPar,
Lib.TokRPar,
Lib.TokLPar,
Lib.TokLPar,
Lib.TokSymbol "equal",
Lib.TokSymbol "pomodoro-state",
Lib.TokQuote,
Lib.TokSymbol "break",
Lib.TokRPar,
Lib.TokLPar,
Lib.TokSymbol "setq",
Lib.TokSymbol "pomodoro-state",
Lib.TokQuote,
Lib.TokSymbol "working",
Lib.TokRPar,
Lib.TokRPar,
Lib.TokLPar,
Lib.TokSymbol "t",
Lib.TokLPar,
Lib.TokSymbol "user-error",
Lib.TokStr "Invalid pomodoro state",
Lib.TokRPar,
Lib.TokRPar,
Lib.TokRPar,
Lib.TokLPar,
Lib.TokSymbol "setq",
Lib.TokSymbol "pomodoro-start-time",
Lib.TokLPar,
Lib.TokSymbol "floor",
Lib.TokLPar,
Lib.TokSymbol "float-time",
Lib.TokRPar,
Lib.TokRPar,
Lib.TokRPar,
Lib.TokRPar
]
(lexStr ";; This is a comment\n(defun pomodoro-toggle-state () ; This is another comment\n\"Restart the pomodoro timer and switch to the other state.\"\n(interactive; a third comment\n)\n(cond ((equal pomodoro-state 'working)\n(setq pomodoro-state 'break))\n((equal pomodoro-state 'break)\n(setq pomodoro-state 'working))\n(t\n(user-error \"Invalid pomodoro state\")))\n(setq pomodoro-start-time (floor (float-time))))"))
tests :: Test tests :: Test
tests = TestList [ tests = TestList [
TestLabel "test paretheses are detected" testPars, TestLabel "test paretheses are detected" testPars,
@ -96,7 +161,8 @@ tests = TestList [
TestLabel "test negative numbers are recognized" testNegativeNumber, TestLabel "test negative numbers are recognized" testNegativeNumber,
TestLabel "test malformed numbers count as symbols" testMalformedNumbersAreSymbols, TestLabel "test malformed numbers count as symbols" testMalformedNumbersAreSymbols,
TestLabel "test strings are recognized" testStringsAreRecognized, TestLabel "test strings are recognized" testStringsAreRecognized,
TestLabel "test unclosed strings are symbols" testUnclosedStringsAreSymbols TestLabel "test unclosed strings are symbols" testUnclosedStringsAreSymbols,
TestLabel "test lexing a complete program" testLexComplexProgram
] ]
main :: IO () main :: IO ()