refactor number recognition
This commit is contained in:
parent
d12ca569e9
commit
0d6277a3a7
2 changed files with 84 additions and 87 deletions
131
src/Lexer.hs
131
src/Lexer.hs
|
@ -60,13 +60,14 @@ identifyString ('r':xs) StrEscape = case identifyString xs StrInside of
|
|||
identifyString ('t':xs) StrEscape = case identifyString xs StrInside of
|
||||
Just (ys, b) -> Just (ys, '\t':b)
|
||||
y -> y
|
||||
identifyString ('u':w:x:y:z:xs) StrEscape | isHexDigit w &&
|
||||
isHexDigit x &&
|
||||
isHexDigit y &&
|
||||
isHexDigit z =
|
||||
case identifyString xs StrInside of
|
||||
Just (ys, b) -> Just (ys, chr (read ('0':'x':w:x:y:z:"")):b)
|
||||
a -> a
|
||||
identifyString ('u':w:x:y:z:xs) StrEscape
|
||||
| isHexDigit w &&
|
||||
isHexDigit x &&
|
||||
isHexDigit y &&
|
||||
isHexDigit z =
|
||||
case identifyString xs StrInside of
|
||||
Just (ys, b) -> Just (ys, chr (read ('0':'x':w:x:y:z:"")):b)
|
||||
a -> a
|
||||
identifyString _ StrEscape = Nothing
|
||||
identifyString (x:xs) StrInside = case identifyString xs StrInside of
|
||||
Just (ys, b) -> Just (ys, x:b)
|
||||
|
@ -78,74 +79,64 @@ data NumState = NumStart
|
|||
| NumFractionalComponent
|
||||
| NumExponentialComponent
|
||||
|
||||
identifyNum :: String -> NumState -> (String, Maybe String)
|
||||
identifyNum "" NumStart = ("", Nothing)
|
||||
identifyNum ('-':xs) NumStart = identifyNum xs NumNeg
|
||||
identifyNum ('0':'.':x:xs) NumStart | isDigit x = case identifyNum (x:xs) NumFractionalComponent
|
||||
of (ys, Just zs) -> (ys, Just ('0':'.':zs))
|
||||
y -> y
|
||||
| otherwise = (x:xs, Nothing)
|
||||
identifyNum (x:xs) NumStart | x == '0' = (x:xs, Nothing)
|
||||
identifyNum :: String -> NumState -> Maybe (String, String)
|
||||
identifyNum "" _ = Nothing
|
||||
identifyNum (x:"") _ | isDigit x = Just ("", x:"")
|
||||
identifyNum (x:y:xs) _ | isDigit x && isWhitespace y = Just (y:xs, x:"")
|
||||
identifyNum ('-':xs) NumStart = case identifyNum xs NumNeg of
|
||||
Just (ys, zs) -> Just (ys, '-':zs)
|
||||
y -> y
|
||||
identifyNum ('0':'.':x:xs) NumStart | isDigit x = case identifyNum (x:xs) NumFractionalComponent of
|
||||
Just (ys, zs) -> Just (ys, '0':'.':zs)
|
||||
y -> y
|
||||
identifyNum (x:xs) NumStart | x == '0' = Nothing
|
||||
| isDigit x = identifyNum (x:xs) NumIntegralComponent
|
||||
| otherwise = (x:xs, Nothing)
|
||||
identifyNum "" NumNeg = ("", Nothing)
|
||||
identifyNum ('0':'.':x:xs) NumNeg | isDigit x = case identifyNum (x:xs) NumFractionalComponent
|
||||
of (ys, Just zs) -> (ys, Just ('-':'0':'.':zs))
|
||||
y -> y
|
||||
identifyNum (x:xs) NumNeg | x == '0' = (x:xs, Nothing)
|
||||
| isDigit x = case identifyNum (x:xs) NumIntegralComponent
|
||||
of (ys, Just zs) -> (ys, Just ('-':zs))
|
||||
y -> y
|
||||
| otherwise = (x:xs, Nothing)
|
||||
identifyNum ('.':x:xs) NumIntegralComponent | isDigit x = case identifyNum (x:xs) NumFractionalComponent
|
||||
of (ys, Just zs) -> (ys, Just ('.':zs))
|
||||
y -> y
|
||||
| otherwise = ('.':x:xs, Just "")
|
||||
| otherwise = Nothing
|
||||
identifyNum ('0':'.':x:xs) NumNeg | isDigit x = case identifyNum (x:xs) NumFractionalComponent of
|
||||
Just (ys, zs) -> Just (ys, '0':'.':zs)
|
||||
y -> y
|
||||
identifyNum (x:xs) NumNeg | x == '0' = Nothing
|
||||
| isDigit x = identifyNum (x:xs) NumIntegralComponent
|
||||
| otherwise = Nothing
|
||||
identifyNum ('.':x:xs) NumIntegralComponent
|
||||
| isDigit x = case identifyNum (x:xs) NumFractionalComponent of
|
||||
Just (ys, zs) -> Just (ys, '.':zs)
|
||||
y -> y
|
||||
| otherwise = Nothing
|
||||
identifyNum (w:x:y:xs) NumIntegralComponent | (w == 'e' || w == 'E') &&
|
||||
(x == '+' || x == '-') &&
|
||||
isDigit y =
|
||||
case identifyNum (y:xs) NumExponentialComponent
|
||||
of (ys, Just zs) -> (ys, Just (w:x:zs))
|
||||
z -> z
|
||||
| (w == 'e' || w == 'E') &&
|
||||
isDigit x =
|
||||
case identifyNum (x:y:xs) NumExponentialComponent
|
||||
of (ys, Just zs) -> (ys, Just (w:zs))
|
||||
z -> z
|
||||
identifyNum (x:y:"") NumIntegralComponent | (x == 'e' || x == 'E') &&
|
||||
isDigit y = ("", Just (x:y:""))
|
||||
| isDigit x && isDigit y = ("", Just (x:y:""))
|
||||
| isDigit x = (y:"", Just (x:""))
|
||||
| otherwise = (x:y:"", Just "")
|
||||
identifyNum (x:xs) NumIntegralComponent | isDigit x = case identifyNum xs NumIntegralComponent
|
||||
of (ys, Just zs) -> (ys, Just (x:zs))
|
||||
y -> y
|
||||
| otherwise = (x:xs, Just "")
|
||||
case identifyNum (y:xs) NumExponentialComponent of
|
||||
Just (ys, zs) -> Just (ys, w:x:zs)
|
||||
z -> z
|
||||
identifyNum (x:y:xs) NumIntegralComponent | (x == 'e' || x == 'E') &&
|
||||
isDigit y =
|
||||
case identifyNum (y:xs) NumExponentialComponent of
|
||||
Just (ys, zs) -> Just (ys, x:zs)
|
||||
z -> z
|
||||
identifyNum (x:xs) NumIntegralComponent | isDigit x = case identifyNum xs NumIntegralComponent of
|
||||
Just (ys, zs) -> Just (ys, x:zs)
|
||||
y -> y
|
||||
| otherwise = Nothing
|
||||
identifyNum (w:x:y:xs) NumFractionalComponent | (w == 'e' || w == 'E') &&
|
||||
(x == '+' || x == '-') &&
|
||||
isDigit y =
|
||||
case identifyNum (y:xs) NumExponentialComponent
|
||||
of (ys, Just zs) -> (ys, Just (w:x:zs))
|
||||
z -> z
|
||||
| (w == 'e' || w == 'E') &&
|
||||
isDigit x =
|
||||
case identifyNum (x:y:xs) NumExponentialComponent
|
||||
of (ys, Just zs) -> (ys, Just (w:zs))
|
||||
z -> z
|
||||
identifyNum (x:y:"") NumFractionalComponent | (x == 'e' || x == 'E') &&
|
||||
isDigit y = ("", Just (x:y:""))
|
||||
| isDigit x && isDigit y = ("", Just (x:y:""))
|
||||
| isDigit x = (y:"", Just (x:""))
|
||||
| otherwise = (x:y:"", Just "")
|
||||
identifyNum (x:xs) NumFractionalComponent | isDigit x = case identifyNum xs NumFractionalComponent
|
||||
of (ys, Just zs) -> (ys, Just (x:zs))
|
||||
y -> y
|
||||
| otherwise = (x:xs, Just "")
|
||||
identifyNum (x:xs) NumExponentialComponent | isDigit x = case identifyNum xs NumExponentialComponent
|
||||
of (ys, Just zs) -> (ys, Just (x:zs))
|
||||
y -> y
|
||||
| otherwise = (x:xs, Just "")
|
||||
identifyNum [] _ = ("", Just "")
|
||||
case identifyNum (y:xs) NumExponentialComponent of
|
||||
Just (ys, zs) -> Just (ys, w:x:zs)
|
||||
z -> z
|
||||
identifyNum (x:y:xs) NumFractionalComponent | (x == 'e' || x == 'E') &&
|
||||
isDigit y =
|
||||
case identifyNum (y:xs) NumExponentialComponent of
|
||||
Just (ys, zs) -> Just (ys, x:zs)
|
||||
z -> z
|
||||
identifyNum (x:xs) NumFractionalComponent | isDigit x = case identifyNum xs NumFractionalComponent of
|
||||
Just (ys, zs) -> Just (ys, x:zs)
|
||||
y -> y
|
||||
| otherwise = Nothing
|
||||
identifyNum (x:xs) NumExponentialComponent | isDigit x = case identifyNum xs NumExponentialComponent of
|
||||
Just (ys, zs) -> Just (ys, x:zs)
|
||||
y -> y
|
||||
| otherwise = Nothing
|
||||
|
||||
lexStr :: String -> [Token]
|
||||
lexStr "" = []
|
||||
|
@ -167,6 +158,6 @@ lexStr ('\r':xs) = lexStr xs
|
|||
lexStr (x:xs) | isDigit x ||
|
||||
x == '-' =
|
||||
case identifyNum (x:xs) NumStart of
|
||||
(ys, Nothing) -> TokErr "Failed to parse number" : lexStr ys
|
||||
(ys, Just b) -> TokNumber (read b) : lexStr ys
|
||||
Nothing -> case identifySymbol (x:xs) of (rest, symb) -> TokSymbol symb : lexStr rest
|
||||
Just (ys, b) -> TokNumber (read b) : lexStr ys
|
||||
lexStr xs = case identifySymbol xs of (ys, zs) -> TokSymbol zs : lexStr ys
|
||||
|
|
40
test/Main.hs
40
test/Main.hs
|
@ -4,68 +4,73 @@ import Test.HUnit
|
|||
import Lib
|
||||
|
||||
testPars :: Test
|
||||
testPars = TestCase (assertEqual "() -> [TokLPar,TokRPar]"
|
||||
testPars = TestCase (assertEqual "recognize parentheses"
|
||||
[Lib.TokLPar, Lib.TokRPar]
|
||||
(lexStr "()"))
|
||||
|
||||
testSymbol :: Test
|
||||
testSymbol = TestCase (assertEqual "hello world -> [TokSymbol \"hello\", TokSymbol \"world\"]"
|
||||
testSymbol = TestCase (assertEqual "recognize symbols"
|
||||
[Lib.TokSymbol "hello", Lib.TokSymbol "world"]
|
||||
(lexStr "hello world"))
|
||||
|
||||
testQuotedPars :: Test
|
||||
testQuotedPars = TestCase (assertEqual "'() -> [TokQuote,TokLPar,TokRPar]"
|
||||
testQuotedPars = TestCase (assertEqual "recognize quoted parentheses"
|
||||
[Lib.TokQuote, Lib.TokLPar, Lib.TokRPar]
|
||||
(lexStr "'()"))
|
||||
|
||||
testLoneQuoteIsSymbol :: Test
|
||||
testLoneQuoteIsSymbol = TestCase (assertEqual "' ' -> [TokSymbol \"'\", TokSymbol \"'\"]"
|
||||
testLoneQuoteIsSymbol = TestCase (assertEqual "a quote on its own is just a symbol"
|
||||
[Lib.TokSymbol "'", Lib.TokSymbol "'"]
|
||||
(lexStr "' '"))
|
||||
|
||||
testCharacter :: Test
|
||||
testCharacter = TestCase (assertEqual "'a' -> [TokCharacter 'a']"
|
||||
testCharacter = TestCase (assertEqual "recognize characters"
|
||||
[Lib.TokCharacter 'a']
|
||||
(lexStr "'a'"))
|
||||
|
||||
testQuotedSymbol :: Test
|
||||
testQuotedSymbol = TestCase (assertEqual "'hello -> [TokQuote,TokSymbol \"hello\"]"
|
||||
testQuotedSymbol = TestCase (assertEqual "recognize quoted symbols"
|
||||
[Lib.TokQuote, Lib.TokSymbol "hello"]
|
||||
(lexStr "'hello"))
|
||||
|
||||
testUnicodeCharacter :: Test
|
||||
testUnicodeCharacter = TestCase (assertEqual "'📎' -> [TokCharacter '📎']"
|
||||
testUnicodeCharacter = TestCase (assertEqual "recognize unicode characters"
|
||||
[Lib.TokCharacter '📎']
|
||||
(lexStr "'📎'"))
|
||||
|
||||
testSimpleIntegers :: Test
|
||||
testSimpleIntegers = TestCase (assertEqual "42 -> [TokNumber 42]"
|
||||
testSimpleIntegers = TestCase (assertEqual "recognize basic integers"
|
||||
[Lib.TokNumber 42]
|
||||
(lexStr "42"))
|
||||
(lexStr "42\t"))
|
||||
|
||||
testFractionalNumber :: Test
|
||||
testFractionalNumber = TestCase (assertEqual "42.123 -> [TokNumber 42.123]"
|
||||
testFractionalNumber = TestCase (assertEqual "recognize numbers with fractional components"
|
||||
[Lib.TokNumber 42.123]
|
||||
(lexStr "42.123"))
|
||||
(lexStr "42.123 "))
|
||||
|
||||
testExponentialInteger :: Test
|
||||
testExponentialInteger = TestCase (assertEqual "42e123 -> [TokNumber 42e123]"
|
||||
testExponentialInteger = TestCase (assertEqual "recognize integers with exponents"
|
||||
[Lib.TokNumber 42e123]
|
||||
(lexStr "42e123"))
|
||||
(lexStr "42e123\n"))
|
||||
|
||||
testExponential :: Test
|
||||
testExponential = TestCase (assertEqual "42.03E+12 -> [TokNumber 42.03e12]"
|
||||
testExponential = TestCase (assertEqual "recognize fractional numbers with exponents"
|
||||
[Lib.TokNumber 42.03e12]
|
||||
(lexStr "42.03E+12"))
|
||||
(lexStr "42.03E+12\r"))
|
||||
|
||||
testNegativeNumber :: Test
|
||||
testNegativeNumber = TestCase (assertEqual "-42.03e-12 -> [TokNumber -42.03e-12]"
|
||||
testNegativeNumber = TestCase (assertEqual "recognize negative numbers"
|
||||
[Lib.TokNumber (-42.03e-12)]
|
||||
(lexStr "-42.03e-12"))
|
||||
|
||||
testMalformedNumbersAreSymbols :: Test
|
||||
testMalformedNumbersAreSymbols = TestCase (assertEqual "recognize symbols that arise from invalid numbers"
|
||||
[Lib.TokSymbol "-", Lib.TokSymbol "123abc", Lib.TokSymbol "-123.01f23"]
|
||||
(lexStr "- 123abc -123.01f23"))
|
||||
|
||||
testStringsAreRecognized :: Test
|
||||
testStringsAreRecognized = TestCase (assertEqual
|
||||
"can strings be recognized?"
|
||||
"recognize strings"
|
||||
[Lib.TokStr "\43981asdf\b\f\r\n\t\\/"]
|
||||
(lexStr "\"\\uabcdasdf\\b\\f\\r\\n\\t\\\\\\/\""))
|
||||
|
||||
|
@ -89,6 +94,7 @@ tests = TestList [
|
|||
TestLabel "test integers with exponential component are recognized" testExponentialInteger,
|
||||
TestLabel "test exponential number is recognized" testExponential,
|
||||
TestLabel "test negative numbers are recognized" testNegativeNumber,
|
||||
TestLabel "test malformed numbers count as symbols" testMalformedNumbersAreSymbols,
|
||||
TestLabel "test strings are recognized" testStringsAreRecognized,
|
||||
TestLabel "test unclosed strings are symbols" testUnclosedStringsAreSymbols
|
||||
]
|
||||
|
|
Loading…
Add table
Reference in a new issue