refactor strings
This commit is contained in:
parent
f16db81fd6
commit
4508879586
2 changed files with 86 additions and 45 deletions
102
src/Lexer.hs
102
src/Lexer.hs
|
@ -15,51 +15,62 @@ data Token = TokLPar
|
|||
| TokErr String
|
||||
deriving (Eq, Show)
|
||||
|
||||
isWhitespace :: Char -> Bool
|
||||
isWhitespace x = x == ' ' || x == '\t' || x == '\n' || x == '\r'
|
||||
|
||||
data StrState = StrStart
|
||||
| StrInside
|
||||
| StrEscape
|
||||
|
||||
identifyString :: String -> StrState -> (String, Maybe String)
|
||||
identifyString ('"':xs) StrStart = identifyString xs StrInside
|
||||
identifyString xs StrStart = (xs, Nothing) -- will never occur
|
||||
identifyString "" _ = ("", Nothing) -- early end of file
|
||||
identifySymbol :: String -> (String, String)
|
||||
identifySymbol "" = ("", "")
|
||||
identifySymbol (x:xs) | isWhitespace x = (x:xs, "")
|
||||
| otherwise = case identifySymbol xs of
|
||||
(ys, zs) -> (ys, x:zs)
|
||||
|
||||
identifyString :: String -> StrState -> Maybe (String, String)
|
||||
identifyString ('"':xs) StrStart = case identifyString xs StrInside of
|
||||
Nothing -> Nothing
|
||||
y -> y
|
||||
identifyString _ StrStart = Nothing -- should never occur unless not called on a string
|
||||
identifyString "" _ = Nothing -- early end of file
|
||||
identifyString ('\\':xs) StrInside = identifyString xs StrEscape
|
||||
identifyString ('"':xs) StrInside = (xs, Just "")
|
||||
identifyString ('n':xs) StrEscape = case identifyString xs StrInside
|
||||
of (ys, Just b) -> (ys, Just ('\n':b))
|
||||
y -> y
|
||||
identifyString ('\\':xs) StrEscape = case identifyString xs StrInside
|
||||
of (ys, Just b) -> (ys, Just ('\\':b))
|
||||
y -> y
|
||||
identifyString ('/':xs) StrEscape = case identifyString xs StrInside
|
||||
of (ys, Just b) -> (ys, Just ('/':b))
|
||||
y -> y
|
||||
identifyString ('"':xs) StrEscape = case identifyString xs StrInside
|
||||
of (ys, Just b) -> (ys, Just ('"':b))
|
||||
y -> y
|
||||
identifyString ('b':xs) StrEscape = case identifyString xs StrInside
|
||||
of (ys, Just b) -> (ys, Just ('\b':b))
|
||||
y -> y
|
||||
identifyString ('f':xs) StrEscape = case identifyString xs StrInside
|
||||
of (ys, Just b) -> (ys, Just ('\f':b))
|
||||
y -> y
|
||||
identifyString ('r':xs) StrEscape = case identifyString xs StrInside
|
||||
of (ys, Just b) -> (ys, Just ('\r':b))
|
||||
y -> y
|
||||
identifyString ('t':xs) StrEscape = case identifyString xs StrInside
|
||||
of (ys, Just b) -> (ys, Just ('\t':b))
|
||||
y -> y
|
||||
identifyString ('"':xs) StrInside = Just (xs, "")
|
||||
identifyString ('n':xs) StrEscape = case identifyString xs StrInside of
|
||||
Just (ys, b) -> Just (ys, '\n':b)
|
||||
y -> y
|
||||
identifyString ('\\':xs) StrEscape = case identifyString xs StrInside of
|
||||
Just (ys, b) -> Just (ys, '\\':b)
|
||||
y -> y
|
||||
identifyString ('/':xs) StrEscape = case identifyString xs StrInside of
|
||||
Just (ys, b) -> Just (ys, '/':b)
|
||||
y -> y
|
||||
identifyString ('"':xs) StrEscape = case identifyString xs StrInside of
|
||||
Just (ys, b) -> Just (ys, '"':b)
|
||||
y -> y
|
||||
identifyString ('b':xs) StrEscape = case identifyString xs StrInside of
|
||||
Just (ys, b) -> Just (ys, '\b':b)
|
||||
y -> y
|
||||
identifyString ('f':xs) StrEscape = case identifyString xs StrInside of
|
||||
Just (ys, b) -> Just (ys, '\f':b)
|
||||
y -> y
|
||||
identifyString ('r':xs) StrEscape = case identifyString xs StrInside of
|
||||
Just (ys, b) -> Just (ys, '\r':b)
|
||||
y -> y
|
||||
identifyString ('t':xs) StrEscape = case identifyString xs StrInside of
|
||||
Just (ys, b) -> Just (ys, '\t':b)
|
||||
y -> y
|
||||
identifyString ('u':w:x:y:z:xs) StrEscape | isHexDigit w &&
|
||||
isHexDigit x &&
|
||||
isHexDigit y &&
|
||||
isHexDigit z =
|
||||
case identifyString xs StrInside
|
||||
of (ys, Just b) -> (ys, Just (chr (read ('0':'x':w:x:y:z:"")):b))
|
||||
a -> a
|
||||
identifyString (x:xs) StrEscape = (x:xs, Nothing)
|
||||
identifyString (x:xs) StrInside = case identifyString xs StrInside
|
||||
of (ys, Nothing) -> (ys, Nothing)
|
||||
(ys, Just b) -> (ys, Just (x:b))
|
||||
case identifyString xs StrInside of
|
||||
Just (ys, b) -> Just (ys, chr (read ('0':'x':w:x:y:z:"")):b)
|
||||
a -> a
|
||||
identifyString _ StrEscape = Nothing
|
||||
identifyString (x:xs) StrInside = case identifyString xs StrInside of
|
||||
Just (ys, b) -> Just (ys, x:b)
|
||||
y -> y
|
||||
|
||||
data NumState = NumStart
|
||||
| NumNeg
|
||||
|
@ -141,16 +152,21 @@ lexStr "" = []
|
|||
lexStr ('(':xs) = TokLPar : lexStr xs
|
||||
lexStr (')':xs) = TokRPar : lexStr xs
|
||||
lexStr ('"':xs) = case identifyString ('"':xs) StrStart
|
||||
of (ys, Nothing) -> TokErr "End of file reached before matching \"" :lexStr ys
|
||||
(ys, Just b) -> TokStr b:lexStr ys
|
||||
lexStr ('\'':"") = [TokErr "End of file reached before matching '"]
|
||||
of Nothing -> case identifySymbol ('"':xs) of (rest, symb) -> TokSymbol symb : lexStr rest
|
||||
Just (ys, b) -> TokStr b:lexStr ys
|
||||
lexStr ('\'':"") = [TokSymbol "'"]
|
||||
lexStr ('\'':'(':xs) = TokQuote : lexStr ('(':xs)
|
||||
lexStr ('\'':x:'\'':xs) = TokCharacter x : lexStr xs
|
||||
lexStr ('\'':x:xs) | isWhitespace x = TokSymbol "'" : lexStr (x:xs)
|
||||
| otherwise = case identifySymbol (x:xs) of
|
||||
(rest, symb) -> TokQuote : TokSymbol symb : lexStr rest
|
||||
lexStr (' ':xs) = lexStr xs
|
||||
lexStr ('\t':xs) = lexStr xs
|
||||
lexStr ('\n':xs) = lexStr xs
|
||||
lexStr ('\r':xs) = lexStr xs
|
||||
lexStr (x:xs) | isDigit x || x == '-' = case identifyNum (x:xs) NumStart
|
||||
of (ys, Nothing) -> TokErr "Failed to parse number" : lexStr ys
|
||||
(ys, Just b) -> TokNumber (read b) : lexStr ys
|
||||
lexStr _ = []
|
||||
lexStr (x:xs) | isDigit x ||
|
||||
x == '-' =
|
||||
case identifyNum (x:xs) NumStart of
|
||||
(ys, Nothing) -> TokErr "Failed to parse number" : lexStr ys
|
||||
(ys, Just b) -> TokNumber (read b) : lexStr ys
|
||||
lexStr xs = case identifySymbol xs of (ys, zs) -> TokSymbol zs : lexStr ys
|
||||
|
|
29
test/Main.hs
29
test/Main.hs
|
@ -8,16 +8,31 @@ testPars = TestCase (assertEqual "() -> [TokLPar,TokRPar]"
|
|||
[Lib.TokLPar, Lib.TokRPar]
|
||||
(lexStr "()"))
|
||||
|
||||
testSymbol :: Test
|
||||
testSymbol = TestCase (assertEqual "hello world -> [TokSymbol \"hello\", TokSymbol \"world\"]"
|
||||
[Lib.TokSymbol "hello", Lib.TokSymbol "world"]
|
||||
(lexStr "hello world"))
|
||||
|
||||
testQuotedPars :: Test
|
||||
testQuotedPars = TestCase (assertEqual "'() -> [TokQuote,TokLPar,TokRPar]"
|
||||
[Lib.TokQuote, Lib.TokLPar, Lib.TokRPar]
|
||||
(lexStr "'()"))
|
||||
|
||||
testLoneQuoteIsSymbol :: Test
|
||||
testLoneQuoteIsSymbol = TestCase (assertEqual "' ' -> [TokSymbol \"'\", TokSymbol \"'\"]"
|
||||
[Lib.TokSymbol "'", Lib.TokSymbol "'"]
|
||||
(lexStr "' '"))
|
||||
|
||||
testCharacter :: Test
|
||||
testCharacter = TestCase (assertEqual "'a' -> [TokCharacter 'a']"
|
||||
[Lib.TokCharacter 'a']
|
||||
(lexStr "'a'"))
|
||||
|
||||
testQuotedSymbol :: Test
|
||||
testQuotedSymbol = TestCase (assertEqual "'hello -> [TokQuote,TokSymbol \"hello\"]"
|
||||
[Lib.TokQuote, Lib.TokSymbol "hello"]
|
||||
(lexStr "'hello"))
|
||||
|
||||
testUnicodeCharacter :: Test
|
||||
testUnicodeCharacter = TestCase (assertEqual "'📎' -> [TokCharacter '📎']"
|
||||
[Lib.TokCharacter '📎']
|
||||
|
@ -50,22 +65,32 @@ testNegativeNumber = TestCase (assertEqual "-42.03e-12 -> [TokNumber -42.03e-12]
|
|||
|
||||
testStringsAreRecognized :: Test
|
||||
testStringsAreRecognized = TestCase (assertEqual
|
||||
"\"\\uabcdasdf\\b\\f\\r\\n\\t\\\\\\/\" -> [Lib.TokStr \"\43981asdf\b\f\r\n\t\\/\""
|
||||
"can strings be recognized?"
|
||||
[Lib.TokStr "\43981asdf\b\f\r\n\t\\/"]
|
||||
(lexStr "\"\\uabcdasdf\\b\\f\\r\\n\\t\\\\\\/\""))
|
||||
|
||||
testUnclosedStringsAreSymbols :: Test
|
||||
testUnclosedStringsAreSymbols = TestCase (assertEqual
|
||||
"this would make for a terrible symbol, but it can hypothetically start with a double quote"
|
||||
[Lib.TokSymbol "\"hello", Lib.TokSymbol "world"]
|
||||
(lexStr "\"hello world"))
|
||||
|
||||
tests :: Test
|
||||
tests = TestList [
|
||||
TestLabel "test paretheses are detected" testPars,
|
||||
TestLabel "test symbols are recognized" testSymbol,
|
||||
TestLabel "test quoted parentheses are detected" testQuotedPars,
|
||||
TestLabel "test single quotes without symbols or characters are symbols themselves" testLoneQuoteIsSymbol,
|
||||
TestLabel "test characters are recognized" testCharacter,
|
||||
TestLabel "test quoted symbols are recognized" testQuotedSymbol,
|
||||
TestLabel "test unicode characters are recognized" testUnicodeCharacter,
|
||||
TestLabel "test simple integers are recognized" testSimpleIntegers,
|
||||
TestLabel "test fractional numbers are recognized" testFractionalNumber,
|
||||
TestLabel "test integers with exponential component are recognized" testExponentialInteger,
|
||||
TestLabel "test exponential number is recognized" testExponential,
|
||||
TestLabel "test negative numbers are recognized" testNegativeNumber,
|
||||
TestLabel "test strings are recognized" testStringsAreRecognized
|
||||
TestLabel "test strings are recognized" testStringsAreRecognized,
|
||||
TestLabel "test unclosed strings are symbols" testUnclosedStringsAreSymbols
|
||||
]
|
||||
|
||||
main :: IO ()
|
||||
|
|
Loading…
Add table
Reference in a new issue