From 2ef4a21175eab8d24ff4be04269efe8455a163ce Mon Sep 17 00:00:00 2001 From: jjanzen Date: Sat, 22 Mar 2025 14:05:43 -0500 Subject: [PATCH] fully working lexer --- src/Lexer.hs | 14 +++++++---- test/Main.hs | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 77 insertions(+), 5 deletions(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index 35159a2..98f06a6 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -18,16 +18,21 @@ data Token = TokLPar isWhitespace :: Char -> Bool isWhitespace x = x == ' ' || x == '\t' || x == '\n' || x == '\r' -data StrState = StrStart - | StrInside - | StrEscape +skipComment :: String -> String +skipComment "" = "" +skipComment (x:xs) | x == '\n' = xs + | otherwise = skipComment xs identifySymbol :: String -> (String, String) identifySymbol "" = ("", "") -identifySymbol (x:xs) | isWhitespace x = (x:xs, "") +identifySymbol (x:xs) | isWhitespace x || x == '(' || x == ')' || x == ';' = (x:xs, "") | otherwise = case identifySymbol xs of (ys, zs) -> (ys, x:zs) +data StrState = StrStart + | StrInside + | StrEscape + identifyString :: String -> StrState -> Maybe (String, String) identifyString ('"':xs) StrStart = case identifyString xs StrInside of Nothing -> Nothing @@ -140,6 +145,7 @@ identifyNum (x:xs) NumExponentialComponent | isDigit x = case identifyNum xs Num lexStr :: String -> [Token] lexStr "" = [] +lexStr (';':xs) = lexStr (skipComment xs) lexStr ('(':xs) = TokLPar : lexStr xs lexStr (')':xs) = TokRPar : lexStr xs lexStr ('"':xs) = case identifyString ('"':xs) StrStart diff --git a/test/Main.hs b/test/Main.hs index 5d4c8de..b4ea2b2 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -80,6 +80,71 @@ testUnclosedStringsAreSymbols = TestCase (assertEqual [Lib.TokSymbol "\"hello", Lib.TokSymbol "world"] (lexStr "\"hello world")) +testLexComplexProgram :: Test +testLexComplexProgram = TestCase (assertEqual + "lex an entire program" + [ + Lib.TokLPar, + Lib.TokSymbol "defun", + Lib.TokSymbol "pomodoro-toggle-state", + Lib.TokLPar, + Lib.TokRPar, + Lib.TokStr "Restart the pomodoro timer and switch to the other state.", + Lib.TokLPar, + Lib.TokSymbol "interactive", + Lib.TokRPar, + Lib.TokLPar, + Lib.TokSymbol "cond", + Lib.TokLPar, + Lib.TokLPar, + Lib.TokSymbol "equal", + Lib.TokSymbol "pomodoro-state", + Lib.TokQuote, + Lib.TokSymbol "working", + Lib.TokRPar, + Lib.TokLPar, + Lib.TokSymbol "setq", + Lib.TokSymbol "pomodoro-state", + Lib.TokQuote, + Lib.TokSymbol "break", + Lib.TokRPar, + Lib.TokRPar, + Lib.TokLPar, + Lib.TokLPar, + Lib.TokSymbol "equal", + Lib.TokSymbol "pomodoro-state", + Lib.TokQuote, + Lib.TokSymbol "break", + Lib.TokRPar, + Lib.TokLPar, + Lib.TokSymbol "setq", + Lib.TokSymbol "pomodoro-state", + Lib.TokQuote, + Lib.TokSymbol "working", + Lib.TokRPar, + Lib.TokRPar, + Lib.TokLPar, + Lib.TokSymbol "t", + Lib.TokLPar, + Lib.TokSymbol "user-error", + Lib.TokStr "Invalid pomodoro state", + Lib.TokRPar, + Lib.TokRPar, + Lib.TokRPar, + Lib.TokLPar, + Lib.TokSymbol "setq", + Lib.TokSymbol "pomodoro-start-time", + Lib.TokLPar, + Lib.TokSymbol "floor", + Lib.TokLPar, + Lib.TokSymbol "float-time", + Lib.TokRPar, + Lib.TokRPar, + Lib.TokRPar, + Lib.TokRPar + ] + (lexStr ";; This is a comment\n(defun pomodoro-toggle-state () ; This is another comment\n\"Restart the pomodoro timer and switch to the other state.\"\n(interactive; a third comment\n)\n(cond ((equal pomodoro-state 'working)\n(setq pomodoro-state 'break))\n((equal pomodoro-state 'break)\n(setq pomodoro-state 'working))\n(t\n(user-error \"Invalid pomodoro state\")))\n(setq pomodoro-start-time (floor (float-time))))")) + tests :: Test tests = TestList [ TestLabel "test paretheses are detected" testPars, @@ -96,7 +161,8 @@ tests = TestList [ TestLabel "test negative numbers are recognized" testNegativeNumber, TestLabel "test malformed numbers count as symbols" testMalformedNumbersAreSymbols, TestLabel "test strings are recognized" testStringsAreRecognized, - TestLabel "test unclosed strings are symbols" testUnclosedStringsAreSymbols + TestLabel "test unclosed strings are symbols" testUnclosedStringsAreSymbols, + TestLabel "test lexing a complete program" testLexComplexProgram ] main :: IO ()