fully working lexer

This commit is contained in:
jjanzen 2025-03-22 14:05:43 -05:00
parent 0d6277a3a7
commit 2ef4a21175
2 changed files with 77 additions and 5 deletions

View file

@ -18,16 +18,21 @@ data Token = TokLPar
isWhitespace :: Char -> Bool
isWhitespace x = x == ' ' || x == '\t' || x == '\n' || x == '\r'
data StrState = StrStart
| StrInside
| StrEscape
skipComment :: String -> String
skipComment "" = ""
skipComment (x:xs) | x == '\n' = xs
| otherwise = skipComment xs
identifySymbol :: String -> (String, String)
identifySymbol "" = ("", "")
identifySymbol (x:xs) | isWhitespace x = (x:xs, "")
identifySymbol (x:xs) | isWhitespace x || x == '(' || x == ')' || x == ';' = (x:xs, "")
| otherwise = case identifySymbol xs of
(ys, zs) -> (ys, x:zs)
data StrState = StrStart
| StrInside
| StrEscape
identifyString :: String -> StrState -> Maybe (String, String)
identifyString ('"':xs) StrStart = case identifyString xs StrInside of
Nothing -> Nothing
@ -140,6 +145,7 @@ identifyNum (x:xs) NumExponentialComponent | isDigit x = case identifyNum xs Num
lexStr :: String -> [Token]
lexStr "" = []
lexStr (';':xs) = lexStr (skipComment xs)
lexStr ('(':xs) = TokLPar : lexStr xs
lexStr (')':xs) = TokRPar : lexStr xs
lexStr ('"':xs) = case identifyString ('"':xs) StrStart

View file

@ -80,6 +80,71 @@ testUnclosedStringsAreSymbols = TestCase (assertEqual
[Lib.TokSymbol "\"hello", Lib.TokSymbol "world"]
(lexStr "\"hello world"))
testLexComplexProgram :: Test
testLexComplexProgram = TestCase (assertEqual
"lex an entire program"
[
Lib.TokLPar,
Lib.TokSymbol "defun",
Lib.TokSymbol "pomodoro-toggle-state",
Lib.TokLPar,
Lib.TokRPar,
Lib.TokStr "Restart the pomodoro timer and switch to the other state.",
Lib.TokLPar,
Lib.TokSymbol "interactive",
Lib.TokRPar,
Lib.TokLPar,
Lib.TokSymbol "cond",
Lib.TokLPar,
Lib.TokLPar,
Lib.TokSymbol "equal",
Lib.TokSymbol "pomodoro-state",
Lib.TokQuote,
Lib.TokSymbol "working",
Lib.TokRPar,
Lib.TokLPar,
Lib.TokSymbol "setq",
Lib.TokSymbol "pomodoro-state",
Lib.TokQuote,
Lib.TokSymbol "break",
Lib.TokRPar,
Lib.TokRPar,
Lib.TokLPar,
Lib.TokLPar,
Lib.TokSymbol "equal",
Lib.TokSymbol "pomodoro-state",
Lib.TokQuote,
Lib.TokSymbol "break",
Lib.TokRPar,
Lib.TokLPar,
Lib.TokSymbol "setq",
Lib.TokSymbol "pomodoro-state",
Lib.TokQuote,
Lib.TokSymbol "working",
Lib.TokRPar,
Lib.TokRPar,
Lib.TokLPar,
Lib.TokSymbol "t",
Lib.TokLPar,
Lib.TokSymbol "user-error",
Lib.TokStr "Invalid pomodoro state",
Lib.TokRPar,
Lib.TokRPar,
Lib.TokRPar,
Lib.TokLPar,
Lib.TokSymbol "setq",
Lib.TokSymbol "pomodoro-start-time",
Lib.TokLPar,
Lib.TokSymbol "floor",
Lib.TokLPar,
Lib.TokSymbol "float-time",
Lib.TokRPar,
Lib.TokRPar,
Lib.TokRPar,
Lib.TokRPar
]
(lexStr ";; This is a comment\n(defun pomodoro-toggle-state () ; This is another comment\n\"Restart the pomodoro timer and switch to the other state.\"\n(interactive; a third comment\n)\n(cond ((equal pomodoro-state 'working)\n(setq pomodoro-state 'break))\n((equal pomodoro-state 'break)\n(setq pomodoro-state 'working))\n(t\n(user-error \"Invalid pomodoro state\")))\n(setq pomodoro-start-time (floor (float-time))))"))
tests :: Test
tests = TestList [
TestLabel "test paretheses are detected" testPars,
@ -96,7 +161,8 @@ tests = TestList [
TestLabel "test negative numbers are recognized" testNegativeNumber,
TestLabel "test malformed numbers count as symbols" testMalformedNumbersAreSymbols,
TestLabel "test strings are recognized" testStringsAreRecognized,
TestLabel "test unclosed strings are symbols" testUnclosedStringsAreSymbols
TestLabel "test unclosed strings are symbols" testUnclosedStringsAreSymbols,
TestLabel "test lexing a complete program" testLexComplexProgram
]
main :: IO ()