From 742df026380628466e1bd357f2102cd7afe0f988 Mon Sep 17 00:00:00 2001 From: jjanzen Date: Sat, 22 Mar 2025 00:36:36 -0500 Subject: [PATCH] refactor project --- lisp-interpreter.cabal | 7 +- src/Lexer.hs | 157 ++++++++++++++++++++++++++++++++++++++++ src/Lib.hs | 161 ++--------------------------------------- test/Main.hs | 29 ++++++-- 4 files changed, 190 insertions(+), 164 deletions(-) create mode 100644 src/Lexer.hs diff --git a/lisp-interpreter.cabal b/lisp-interpreter.cabal index f157fa1..8b30b96 100644 --- a/lisp-interpreter.cabal +++ b/lisp-interpreter.cabal @@ -62,13 +62,15 @@ library exposed-modules: Lib -- Modules included in this library but not exported. - -- other-modules: + other-modules: + Lexer -- LANGUAGE extensions used by modules in this package. -- other-extensions: -- Other library packages from which modules are imported. - build-depends: base ^>=4.18.2.1 + build-depends: + base ^>=4.18.2.1 -- Directories containing source files. hs-source-dirs: src @@ -125,4 +127,5 @@ test-suite lisp-interpreter-test -- Test dependencies. build-depends: base ^>=4.18.2.1, + HUnit, lisp-interpreter diff --git a/src/Lexer.hs b/src/Lexer.hs new file mode 100644 index 0000000..f622664 --- /dev/null +++ b/src/Lexer.hs @@ -0,0 +1,157 @@ +module Lexer( + Token (..), + lexStr + )where + +import Data.Char + +data Token = TokLPar + | TokRPar + | TokQuote + | TokNumber Double + | TokStr String + | TokCharacter Char + | TokSymbol String + | TokErr String + deriving Eq + +data StrState = StrStart + | StrInside + | StrEscape + +identifyString :: String -> StrState -> (String, Maybe String) +identifyString ('"':xs) StrStart = identifyString xs StrInside +identifyString xs StrStart = (xs, Nothing) -- will never occur +identifyString "" _ = ("", Nothing) -- early end of file +identifyString ('\\':xs) StrInside = identifyString xs StrEscape +identifyString ('"':xs) StrInside = (xs, Just "") +identifyString ('n':xs) StrEscape = case identifyString xs StrInside + of (ys, Just b) -> (ys, Just ('\n':b)) + y -> y +identifyString ('\\':xs) StrEscape = case identifyString xs StrInside + of (ys, Just b) -> (ys, Just ('\\':b)) + y -> y +identifyString ('/':xs) StrEscape = case identifyString xs StrInside + of (ys, Just b) -> (ys, Just ('/':b)) + y -> y +identifyString ('"':xs) StrEscape = case identifyString xs StrInside + of (ys, Just b) -> (ys, Just ('"':b)) + y -> y +identifyString ('b':xs) StrEscape = case identifyString xs StrInside + of (ys, Just b) -> (ys, Just ('\b':b)) + y -> y +identifyString ('f':xs) StrEscape = case identifyString xs StrInside + of (ys, Just b) -> (ys, Just ('\f':b)) + y -> y +identifyString ('r':xs) StrEscape = case identifyString xs StrInside + of (ys, Just b) -> (ys, Just ('\r':b)) + y -> y +identifyString ('t':xs) StrEscape = case identifyString xs StrInside + of (ys, Just b) -> (ys, Just ('\t':b)) + y -> y +identifyString ('u':w:x:y:z:xs) StrEscape | isHexDigit w && + isHexDigit x && + isHexDigit y && + isHexDigit z = + case identifyString xs StrInside + of (ys, Just b) -> (ys, Just (chr (read ('0':'x':w:x:y:z:"")):b)) + a -> a +identifyString (x:xs) StrEscape = (x:xs, Nothing) +identifyString (x:xs) StrInside = case identifyString xs StrInside + of (ys, Nothing) -> (ys, Nothing) + (ys, Just b) -> (ys, Just (x:b)) + +data NumState = NumStart + | NumNeg + | NumIntegralComponent + | NumFractionalComponent + | NumExponentialComponent + +identifyNum :: String -> NumState -> (String, Maybe String) +identifyNum "" NumStart = ("", Nothing) +identifyNum ('-':xs) NumStart = identifyNum xs NumNeg +identifyNum ('0':'.':x:xs) NumStart | isDigit x = case identifyNum (x:xs) NumFractionalComponent + of (ys, Just zs) -> (ys, Just ('0':'.':zs)) + y -> y + | otherwise = (x:xs, Nothing) +identifyNum (x:xs) NumStart | x == '0' = (x:xs, Nothing) + | isDigit x = identifyNum (x:xs) NumIntegralComponent + | otherwise = (x:xs, Nothing) +identifyNum "" NumNeg = ("", Nothing) +identifyNum ('0':'.':x:xs) NumNeg | isDigit x = case identifyNum (x:xs) NumFractionalComponent + of (ys, Just zs) -> (ys, Just ('-':'0':'.':zs)) + y -> y +identifyNum (x:xs) NumNeg | x == '0' = (x:xs, Nothing) + | isDigit x = case identifyNum (x:xs) NumIntegralComponent + of (ys, Just zs) -> (ys, Just ('-':zs)) + y -> y + | otherwise = (x:xs, Nothing) +identifyNum ('.':x:xs) NumIntegralComponent | isDigit x = case identifyNum (x:xs) NumFractionalComponent + of (ys, Just zs) -> (ys, Just ('.':zs)) + y -> y + | otherwise = ('.':x:xs, Just "") +identifyNum (w:x:y:xs) NumIntegralComponent | (w == 'e' || w == 'E') && + (x == '+' || x == '-') && + isDigit y = + case identifyNum (y:xs) NumExponentialComponent + of (ys, Just zs) -> (ys, Just (w:x:zs)) + z -> z + | (w == 'e' || w == 'E') && + isDigit x = + case identifyNum (x:y:xs) NumExponentialComponent + of (ys, Just zs) -> (ys, Just (w:zs)) + z -> z +identifyNum (x:y:"") NumIntegralComponent | (x == 'e' || x == 'E') && + isDigit y = ("", Just (x:y:"")) + | isDigit x && isDigit y = ("", Just (x:y:"")) + | isDigit x = (y:"", Just (x:"")) + | otherwise = (x:y:"", Just "") +identifyNum (x:xs) NumIntegralComponent | isDigit x = case identifyNum xs NumIntegralComponent + of (ys, Just zs) -> (ys, Just (x:zs)) + y -> y + | otherwise = (x:xs, Just "") +identifyNum (w:x:y:xs) NumFractionalComponent | (w == 'e' || w == 'E') && + (x == '+' || x == '-') && + isDigit y = + case identifyNum (y:xs) NumExponentialComponent + of (ys, Just zs) -> (ys, Just (w:x:zs)) + z -> z + | (w == 'e' || w == 'E') && + isDigit x = + case identifyNum (x:y:xs) NumExponentialComponent + of (ys, Just zs) -> (ys, Just (w:zs)) + z -> z +identifyNum (x:y:"") NumFractionalComponent | (x == 'e' || x == 'E') && + isDigit y = ("", Just (x:y:"")) + | isDigit x && isDigit y = ("", Just (x:y:"")) + | isDigit x = (y:"", Just (x:"")) + | otherwise = (x:y:"", Just "") +identifyNum (x:xs) NumFractionalComponent | isDigit x = case identifyNum xs NumFractionalComponent + of (ys, Just zs) -> (ys, Just (x:zs)) + y -> y + | otherwise = (x:xs, Just "") +identifyNum (x:xs) NumExponentialComponent | isDigit x = case identifyNum xs NumExponentialComponent + of (ys, Just zs) -> (ys, Just (x:zs)) + y -> y + | otherwise = (x:xs, Just "") +identifyNum [] _ = ("", Just "") + +lexStr :: String -> [Token] +lexStr "" = [] +lexStr ('(':xs) = TokLPar : lexStr xs +lexStr (')':xs) = TokRPar : lexStr xs +lexStr ('"':xs) = case identifyString ('"':xs) StrStart + of (ys, Nothing) -> TokErr "End of file reached before matching \"" :lexStr ys + (ys, Just b) -> TokStr b:lexStr ys +lexStr ('\'':"") = [TokErr "End of file reached before matching '"] +lexStr ('\'':'(':xs) = TokQuote : lexStr ('(':xs) +lexStr ('\'':_:"") = [TokErr "End of file reached before matching '"] +lexStr ('\'':x:'\'':xs) = TokCharacter x : lexStr xs +lexStr (' ':xs) = lexStr xs +lexStr ('\t':xs) = lexStr xs +lexStr ('\n':xs) = lexStr xs +lexStr ('\r':xs) = lexStr xs +lexStr (x:xs) | isDigit x || x == '-' = case identifyNum (x:xs) NumStart + of (ys, Nothing) -> TokErr "Failed to parse number" : lexStr ys + (ys, Just b) -> TokNumber (read b) : lexStr ys +lexStr _ = [] diff --git a/src/Lib.hs b/src/Lib.hs index 8f7785c..18016f8 100644 --- a/src/Lib.hs +++ b/src/Lib.hs @@ -1,157 +1,10 @@ -module Lib where +module Lib( + Lexer.Token(..), + Lexer.lexStr, + someFunc + )where -import Data.Char +import Lexer someFunc :: IO () -someFunc = putStrLn "someFunc" - -data Token = LPar - | RPar - | Quote - | Number Double - | Str String - | Character Char - | Symbol String - | Err String - deriving Eq - -data StrState = StrStart - | StrInside - | StrEscape - -identifyString :: String -> StrState -> (String, Maybe String) -identifyString ('"':xs) StrStart = identifyString xs StrInside -identifyString xs StrStart = (xs, Nothing) -- will never occur -identifyString "" _ = ("", Nothing) -- early end of file -identifyString ('\\':xs) StrInside = identifyString xs StrEscape -identifyString ('"':xs) StrInside = (xs, Just "") -identifyString ('n':xs) StrEscape = case identifyString xs StrInside - of (ys, Just b) -> (ys, Just ('\n':b)) - y -> y -identifyString ('\\':xs) StrEscape = case identifyString xs StrInside - of (ys, Just b) -> (ys, Just ('\\':b)) - y -> y -identifyString ('/':xs) StrEscape = case identifyString xs StrInside - of (ys, Just b) -> (ys, Just ('/':b)) - y -> y -identifyString ('"':xs) StrEscape = case identifyString xs StrInside - of (ys, Just b) -> (ys, Just ('"':b)) - y -> y -identifyString ('b':xs) StrEscape = case identifyString xs StrInside - of (ys, Just b) -> (ys, Just ('\b':b)) - y -> y -identifyString ('f':xs) StrEscape = case identifyString xs StrInside - of (ys, Just b) -> (ys, Just ('\f':b)) - y -> y -identifyString ('r':xs) StrEscape = case identifyString xs StrInside - of (ys, Just b) -> (ys, Just ('\r':b)) - y -> y -identifyString ('t':xs) StrEscape = case identifyString xs StrInside - of (ys, Just b) -> (ys, Just ('\t':b)) - y -> y -identifyString ('u':w:x:y:z:xs) StrEscape | isHexDigit w && - isHexDigit x && - isHexDigit y && - isHexDigit z = - case identifyString xs StrInside - of (ys, Just b) -> (ys, Just (chr (read ('0':'x':w:x:y:z:"")):b)) - a -> a -identifyString (x:xs) StrEscape = (x:xs, Nothing) -identifyString (x:xs) StrInside = case identifyString xs StrInside - of (ys, Nothing) -> (ys, Nothing) - (ys, Just b) -> (ys, Just (x:b)) - -data NumState = NumStart - | NumNeg - | NumIntegralComponent - | NumFractionalComponent - | NumExponentialComponent - -identifyNum :: String -> NumState -> (String, Maybe String) -identifyNum "" NumStart = ("", Nothing) -identifyNum ('-':xs) NumStart = identifyNum xs NumNeg -identifyNum ('0':'.':x:xs) NumStart | isDigit x = case identifyNum (x:xs) NumFractionalComponent - of (ys, Just zs) -> (ys, Just ('0':'.':zs)) - y -> y - | otherwise = (x:xs, Nothing) -identifyNum (x:xs) NumStart | x == '0' = (x:xs, Nothing) - | isDigit x = identifyNum (x:xs) NumIntegralComponent - | otherwise = (x:xs, Nothing) -identifyNum "" NumNeg = ("", Nothing) -identifyNum ('0':'.':x:xs) NumNeg | isDigit x = case identifyNum (x:xs) NumFractionalComponent - of (ys, Just zs) -> (ys, Just ('-':'0':'.':zs)) - y -> y -identifyNum (x:xs) NumNeg | x == '0' = (x:xs, Nothing) - | isDigit x = case identifyNum (x:xs) NumIntegralComponent - of (ys, Just zs) -> (ys, Just ('-':zs)) - y -> y - | otherwise = (x:xs, Nothing) -identifyNum ('.':x:xs) NumIntegralComponent | isDigit x = case identifyNum (x:xs) NumFractionalComponent - of (ys, Just zs) -> (ys, Just ('.':zs)) - y -> y - | otherwise = ('.':x:xs, Just "") -identifyNum (w:x:y:xs) NumIntegralComponent | (w == 'e' || w == 'E') && - (x == '+' || x == '-') && - isDigit y = - case identifyNum (y:xs) NumExponentialComponent - of (ys, Just zs) -> (ys, Just (w:x:zs)) - z -> z - | (w == 'e' || w == 'E') && - isDigit x = - case identifyNum (x:y:xs) NumExponentialComponent - of (ys, Just zs) -> (ys, Just (w:zs)) - z -> z -identifyNum (x:y:"") NumIntegralComponent | (x == 'e' || x == 'E') && - isDigit y = ("", Just (x:y:"")) - | isDigit x && isDigit y = ("", Just (x:y:"")) - | isDigit x = (y:"", Just (x:"")) - | otherwise = (x:y:"", Just "") -identifyNum (x:xs) NumIntegralComponent | isDigit x = case identifyNum xs NumIntegralComponent - of (ys, Just zs) -> (ys, Just (x:zs)) - y -> y - | otherwise = (x:xs, Just "") -identifyNum (w:x:y:xs) NumFractionalComponent | (w == 'e' || w == 'E') && - (x == '+' || x == '-') && - isDigit y = - case identifyNum (y:xs) NumExponentialComponent - of (ys, Just zs) -> (ys, Just (w:x:zs)) - z -> z - | (w == 'e' || w == 'E') && - isDigit x = - case identifyNum (x:y:xs) NumExponentialComponent - of (ys, Just zs) -> (ys, Just (w:zs)) - z -> z -identifyNum (x:y:"") NumFractionalComponent | (x == 'e' || x == 'E') && - isDigit y = ("", Just (x:y:"")) - | isDigit x && isDigit y = ("", Just (x:y:"")) - | isDigit x = (y:"", Just (x:"")) - | otherwise = (x:y:"", Just "") -identifyNum (x:xs) NumFractionalComponent | isDigit x = case identifyNum xs NumFractionalComponent - of (ys, Just zs) -> (ys, Just (x:zs)) - y -> y - | otherwise = (x:xs, Just "") -identifyNum (x:xs) NumExponentialComponent | isDigit x = case identifyNum xs NumExponentialComponent - of (ys, Just zs) -> (ys, Just (x:zs)) - y -> y - | otherwise = (x:xs, Just "") -identifyNum [] _ = ("", Just "") - -lexStr :: String -> [Token] -lexStr "" = [] -lexStr ('(':xs) = LPar : lexStr xs -lexStr (')':xs) = RPar : lexStr xs -lexStr ('"':xs) = case identifyString ('"':xs) StrStart - of (ys, Nothing) -> Err "End of file reached before matching \"" :lexStr ys - (ys, Just b) -> Str b:lexStr ys -lexStr ('\'':"") = [Err "End of file reached before matching '"] -lexStr ('\'':'(':xs) = Quote : lexStr ('(':xs) -lexStr ('\'':_:"") = [Err "End of file reached before matching '"] -lexStr ('\'':x:'\'':xs) = Character x : lexStr xs -lexStr (' ':xs) = lexStr xs -lexStr ('\t':xs) = lexStr xs -lexStr ('\n':xs) = lexStr xs -lexStr ('\r':xs) = lexStr xs -lexStr (x:xs) | isDigit x || x == '-' = case identifyNum (x:xs) NumStart - of (ys, Nothing) -> Err "Failed to parse number" : lexStr ys - (ys, Just b) -> Number (read b) : lexStr ys -lexStr _ = [] +someFunc = putStrLn "Hello, world" diff --git a/test/Main.hs b/test/Main.hs index c4afb18..5483b06 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -1,20 +1,33 @@ module Main (main) where -import System.Exit (exitFailure) +import System.Exit (exitFailure, exitSuccess) import Control.Monad +import Test.HUnit + import Lib +test1 :: Test +test1 = TestCase (assertEqual "2==2" (2.0 :: Double) (2 :: Double)) + +tests :: Test +tests = TestList [ + TestLabel "test 1" test1 + ] + main :: IO () main = do when (lexStr "'('a' -1.01e12 \r \"\\uabcdasdf\\b\\f\\r\\n\\t\\\\\\/\" )\"asdf" /= [ - Quote, - LPar, - Character 'a', - Number (-1.01e12), - Str "\43981asdf\b\f\r\n\t\\/", - RPar, - Err "End of file reached before matching \"" + Lib.TokQuote, + Lib.TokLPar, + Lib.TokCharacter 'a', + Lib.TokNumber (-1.01e12), + Lib.TokStr "\43981asdf\b\f\r\n\t\\/", + Lib.TokRPar, + Lib.TokErr "End of file reached before matching \"" ]) exitFailure + c <- runTestTT tests + putStrLn (showCounts c) + exitSuccess