refactor number recognition
This commit is contained in:
parent
d12ca569e9
commit
0d6277a3a7
2 changed files with 84 additions and 87 deletions
99
src/Lexer.hs
99
src/Lexer.hs
|
@ -60,7 +60,8 @@ identifyString ('r':xs) StrEscape = case identifyString xs StrInside of
|
||||||
identifyString ('t':xs) StrEscape = case identifyString xs StrInside of
|
identifyString ('t':xs) StrEscape = case identifyString xs StrInside of
|
||||||
Just (ys, b) -> Just (ys, '\t':b)
|
Just (ys, b) -> Just (ys, '\t':b)
|
||||||
y -> y
|
y -> y
|
||||||
identifyString ('u':w:x:y:z:xs) StrEscape | isHexDigit w &&
|
identifyString ('u':w:x:y:z:xs) StrEscape
|
||||||
|
| isHexDigit w &&
|
||||||
isHexDigit x &&
|
isHexDigit x &&
|
||||||
isHexDigit y &&
|
isHexDigit y &&
|
||||||
isHexDigit z =
|
isHexDigit z =
|
||||||
|
@ -78,74 +79,64 @@ data NumState = NumStart
|
||||||
| NumFractionalComponent
|
| NumFractionalComponent
|
||||||
| NumExponentialComponent
|
| NumExponentialComponent
|
||||||
|
|
||||||
identifyNum :: String -> NumState -> (String, Maybe String)
|
identifyNum :: String -> NumState -> Maybe (String, String)
|
||||||
identifyNum "" NumStart = ("", Nothing)
|
identifyNum "" _ = Nothing
|
||||||
identifyNum ('-':xs) NumStart = identifyNum xs NumNeg
|
identifyNum (x:"") _ | isDigit x = Just ("", x:"")
|
||||||
identifyNum ('0':'.':x:xs) NumStart | isDigit x = case identifyNum (x:xs) NumFractionalComponent
|
identifyNum (x:y:xs) _ | isDigit x && isWhitespace y = Just (y:xs, x:"")
|
||||||
of (ys, Just zs) -> (ys, Just ('0':'.':zs))
|
identifyNum ('-':xs) NumStart = case identifyNum xs NumNeg of
|
||||||
|
Just (ys, zs) -> Just (ys, '-':zs)
|
||||||
y -> y
|
y -> y
|
||||||
| otherwise = (x:xs, Nothing)
|
identifyNum ('0':'.':x:xs) NumStart | isDigit x = case identifyNum (x:xs) NumFractionalComponent of
|
||||||
identifyNum (x:xs) NumStart | x == '0' = (x:xs, Nothing)
|
Just (ys, zs) -> Just (ys, '0':'.':zs)
|
||||||
|
y -> y
|
||||||
|
identifyNum (x:xs) NumStart | x == '0' = Nothing
|
||||||
| isDigit x = identifyNum (x:xs) NumIntegralComponent
|
| isDigit x = identifyNum (x:xs) NumIntegralComponent
|
||||||
| otherwise = (x:xs, Nothing)
|
| otherwise = Nothing
|
||||||
identifyNum "" NumNeg = ("", Nothing)
|
identifyNum ('0':'.':x:xs) NumNeg | isDigit x = case identifyNum (x:xs) NumFractionalComponent of
|
||||||
identifyNum ('0':'.':x:xs) NumNeg | isDigit x = case identifyNum (x:xs) NumFractionalComponent
|
Just (ys, zs) -> Just (ys, '0':'.':zs)
|
||||||
of (ys, Just zs) -> (ys, Just ('-':'0':'.':zs))
|
|
||||||
y -> y
|
y -> y
|
||||||
identifyNum (x:xs) NumNeg | x == '0' = (x:xs, Nothing)
|
identifyNum (x:xs) NumNeg | x == '0' = Nothing
|
||||||
| isDigit x = case identifyNum (x:xs) NumIntegralComponent
|
| isDigit x = identifyNum (x:xs) NumIntegralComponent
|
||||||
of (ys, Just zs) -> (ys, Just ('-':zs))
|
| otherwise = Nothing
|
||||||
|
identifyNum ('.':x:xs) NumIntegralComponent
|
||||||
|
| isDigit x = case identifyNum (x:xs) NumFractionalComponent of
|
||||||
|
Just (ys, zs) -> Just (ys, '.':zs)
|
||||||
y -> y
|
y -> y
|
||||||
| otherwise = (x:xs, Nothing)
|
| otherwise = Nothing
|
||||||
identifyNum ('.':x:xs) NumIntegralComponent | isDigit x = case identifyNum (x:xs) NumFractionalComponent
|
|
||||||
of (ys, Just zs) -> (ys, Just ('.':zs))
|
|
||||||
y -> y
|
|
||||||
| otherwise = ('.':x:xs, Just "")
|
|
||||||
identifyNum (w:x:y:xs) NumIntegralComponent | (w == 'e' || w == 'E') &&
|
identifyNum (w:x:y:xs) NumIntegralComponent | (w == 'e' || w == 'E') &&
|
||||||
(x == '+' || x == '-') &&
|
(x == '+' || x == '-') &&
|
||||||
isDigit y =
|
isDigit y =
|
||||||
case identifyNum (y:xs) NumExponentialComponent
|
case identifyNum (y:xs) NumExponentialComponent of
|
||||||
of (ys, Just zs) -> (ys, Just (w:x:zs))
|
Just (ys, zs) -> Just (ys, w:x:zs)
|
||||||
z -> z
|
z -> z
|
||||||
| (w == 'e' || w == 'E') &&
|
identifyNum (x:y:xs) NumIntegralComponent | (x == 'e' || x == 'E') &&
|
||||||
isDigit x =
|
isDigit y =
|
||||||
case identifyNum (x:y:xs) NumExponentialComponent
|
case identifyNum (y:xs) NumExponentialComponent of
|
||||||
of (ys, Just zs) -> (ys, Just (w:zs))
|
Just (ys, zs) -> Just (ys, x:zs)
|
||||||
z -> z
|
z -> z
|
||||||
identifyNum (x:y:"") NumIntegralComponent | (x == 'e' || x == 'E') &&
|
identifyNum (x:xs) NumIntegralComponent | isDigit x = case identifyNum xs NumIntegralComponent of
|
||||||
isDigit y = ("", Just (x:y:""))
|
Just (ys, zs) -> Just (ys, x:zs)
|
||||||
| isDigit x && isDigit y = ("", Just (x:y:""))
|
|
||||||
| isDigit x = (y:"", Just (x:""))
|
|
||||||
| otherwise = (x:y:"", Just "")
|
|
||||||
identifyNum (x:xs) NumIntegralComponent | isDigit x = case identifyNum xs NumIntegralComponent
|
|
||||||
of (ys, Just zs) -> (ys, Just (x:zs))
|
|
||||||
y -> y
|
y -> y
|
||||||
| otherwise = (x:xs, Just "")
|
| otherwise = Nothing
|
||||||
identifyNum (w:x:y:xs) NumFractionalComponent | (w == 'e' || w == 'E') &&
|
identifyNum (w:x:y:xs) NumFractionalComponent | (w == 'e' || w == 'E') &&
|
||||||
(x == '+' || x == '-') &&
|
(x == '+' || x == '-') &&
|
||||||
isDigit y =
|
isDigit y =
|
||||||
case identifyNum (y:xs) NumExponentialComponent
|
case identifyNum (y:xs) NumExponentialComponent of
|
||||||
of (ys, Just zs) -> (ys, Just (w:x:zs))
|
Just (ys, zs) -> Just (ys, w:x:zs)
|
||||||
z -> z
|
z -> z
|
||||||
| (w == 'e' || w == 'E') &&
|
identifyNum (x:y:xs) NumFractionalComponent | (x == 'e' || x == 'E') &&
|
||||||
isDigit x =
|
isDigit y =
|
||||||
case identifyNum (x:y:xs) NumExponentialComponent
|
case identifyNum (y:xs) NumExponentialComponent of
|
||||||
of (ys, Just zs) -> (ys, Just (w:zs))
|
Just (ys, zs) -> Just (ys, x:zs)
|
||||||
z -> z
|
z -> z
|
||||||
identifyNum (x:y:"") NumFractionalComponent | (x == 'e' || x == 'E') &&
|
identifyNum (x:xs) NumFractionalComponent | isDigit x = case identifyNum xs NumFractionalComponent of
|
||||||
isDigit y = ("", Just (x:y:""))
|
Just (ys, zs) -> Just (ys, x:zs)
|
||||||
| isDigit x && isDigit y = ("", Just (x:y:""))
|
|
||||||
| isDigit x = (y:"", Just (x:""))
|
|
||||||
| otherwise = (x:y:"", Just "")
|
|
||||||
identifyNum (x:xs) NumFractionalComponent | isDigit x = case identifyNum xs NumFractionalComponent
|
|
||||||
of (ys, Just zs) -> (ys, Just (x:zs))
|
|
||||||
y -> y
|
y -> y
|
||||||
| otherwise = (x:xs, Just "")
|
| otherwise = Nothing
|
||||||
identifyNum (x:xs) NumExponentialComponent | isDigit x = case identifyNum xs NumExponentialComponent
|
identifyNum (x:xs) NumExponentialComponent | isDigit x = case identifyNum xs NumExponentialComponent of
|
||||||
of (ys, Just zs) -> (ys, Just (x:zs))
|
Just (ys, zs) -> Just (ys, x:zs)
|
||||||
y -> y
|
y -> y
|
||||||
| otherwise = (x:xs, Just "")
|
| otherwise = Nothing
|
||||||
identifyNum [] _ = ("", Just "")
|
|
||||||
|
|
||||||
lexStr :: String -> [Token]
|
lexStr :: String -> [Token]
|
||||||
lexStr "" = []
|
lexStr "" = []
|
||||||
|
@ -167,6 +158,6 @@ lexStr ('\r':xs) = lexStr xs
|
||||||
lexStr (x:xs) | isDigit x ||
|
lexStr (x:xs) | isDigit x ||
|
||||||
x == '-' =
|
x == '-' =
|
||||||
case identifyNum (x:xs) NumStart of
|
case identifyNum (x:xs) NumStart of
|
||||||
(ys, Nothing) -> TokErr "Failed to parse number" : lexStr ys
|
Nothing -> case identifySymbol (x:xs) of (rest, symb) -> TokSymbol symb : lexStr rest
|
||||||
(ys, Just b) -> TokNumber (read b) : lexStr ys
|
Just (ys, b) -> TokNumber (read b) : lexStr ys
|
||||||
lexStr xs = case identifySymbol xs of (ys, zs) -> TokSymbol zs : lexStr ys
|
lexStr xs = case identifySymbol xs of (ys, zs) -> TokSymbol zs : lexStr ys
|
||||||
|
|
40
test/Main.hs
40
test/Main.hs
|
@ -4,68 +4,73 @@ import Test.HUnit
|
||||||
import Lib
|
import Lib
|
||||||
|
|
||||||
testPars :: Test
|
testPars :: Test
|
||||||
testPars = TestCase (assertEqual "() -> [TokLPar,TokRPar]"
|
testPars = TestCase (assertEqual "recognize parentheses"
|
||||||
[Lib.TokLPar, Lib.TokRPar]
|
[Lib.TokLPar, Lib.TokRPar]
|
||||||
(lexStr "()"))
|
(lexStr "()"))
|
||||||
|
|
||||||
testSymbol :: Test
|
testSymbol :: Test
|
||||||
testSymbol = TestCase (assertEqual "hello world -> [TokSymbol \"hello\", TokSymbol \"world\"]"
|
testSymbol = TestCase (assertEqual "recognize symbols"
|
||||||
[Lib.TokSymbol "hello", Lib.TokSymbol "world"]
|
[Lib.TokSymbol "hello", Lib.TokSymbol "world"]
|
||||||
(lexStr "hello world"))
|
(lexStr "hello world"))
|
||||||
|
|
||||||
testQuotedPars :: Test
|
testQuotedPars :: Test
|
||||||
testQuotedPars = TestCase (assertEqual "'() -> [TokQuote,TokLPar,TokRPar]"
|
testQuotedPars = TestCase (assertEqual "recognize quoted parentheses"
|
||||||
[Lib.TokQuote, Lib.TokLPar, Lib.TokRPar]
|
[Lib.TokQuote, Lib.TokLPar, Lib.TokRPar]
|
||||||
(lexStr "'()"))
|
(lexStr "'()"))
|
||||||
|
|
||||||
testLoneQuoteIsSymbol :: Test
|
testLoneQuoteIsSymbol :: Test
|
||||||
testLoneQuoteIsSymbol = TestCase (assertEqual "' ' -> [TokSymbol \"'\", TokSymbol \"'\"]"
|
testLoneQuoteIsSymbol = TestCase (assertEqual "a quote on its own is just a symbol"
|
||||||
[Lib.TokSymbol "'", Lib.TokSymbol "'"]
|
[Lib.TokSymbol "'", Lib.TokSymbol "'"]
|
||||||
(lexStr "' '"))
|
(lexStr "' '"))
|
||||||
|
|
||||||
testCharacter :: Test
|
testCharacter :: Test
|
||||||
testCharacter = TestCase (assertEqual "'a' -> [TokCharacter 'a']"
|
testCharacter = TestCase (assertEqual "recognize characters"
|
||||||
[Lib.TokCharacter 'a']
|
[Lib.TokCharacter 'a']
|
||||||
(lexStr "'a'"))
|
(lexStr "'a'"))
|
||||||
|
|
||||||
testQuotedSymbol :: Test
|
testQuotedSymbol :: Test
|
||||||
testQuotedSymbol = TestCase (assertEqual "'hello -> [TokQuote,TokSymbol \"hello\"]"
|
testQuotedSymbol = TestCase (assertEqual "recognize quoted symbols"
|
||||||
[Lib.TokQuote, Lib.TokSymbol "hello"]
|
[Lib.TokQuote, Lib.TokSymbol "hello"]
|
||||||
(lexStr "'hello"))
|
(lexStr "'hello"))
|
||||||
|
|
||||||
testUnicodeCharacter :: Test
|
testUnicodeCharacter :: Test
|
||||||
testUnicodeCharacter = TestCase (assertEqual "'📎' -> [TokCharacter '📎']"
|
testUnicodeCharacter = TestCase (assertEqual "recognize unicode characters"
|
||||||
[Lib.TokCharacter '📎']
|
[Lib.TokCharacter '📎']
|
||||||
(lexStr "'📎'"))
|
(lexStr "'📎'"))
|
||||||
|
|
||||||
testSimpleIntegers :: Test
|
testSimpleIntegers :: Test
|
||||||
testSimpleIntegers = TestCase (assertEqual "42 -> [TokNumber 42]"
|
testSimpleIntegers = TestCase (assertEqual "recognize basic integers"
|
||||||
[Lib.TokNumber 42]
|
[Lib.TokNumber 42]
|
||||||
(lexStr "42"))
|
(lexStr "42\t"))
|
||||||
|
|
||||||
testFractionalNumber :: Test
|
testFractionalNumber :: Test
|
||||||
testFractionalNumber = TestCase (assertEqual "42.123 -> [TokNumber 42.123]"
|
testFractionalNumber = TestCase (assertEqual "recognize numbers with fractional components"
|
||||||
[Lib.TokNumber 42.123]
|
[Lib.TokNumber 42.123]
|
||||||
(lexStr "42.123"))
|
(lexStr "42.123 "))
|
||||||
|
|
||||||
testExponentialInteger :: Test
|
testExponentialInteger :: Test
|
||||||
testExponentialInteger = TestCase (assertEqual "42e123 -> [TokNumber 42e123]"
|
testExponentialInteger = TestCase (assertEqual "recognize integers with exponents"
|
||||||
[Lib.TokNumber 42e123]
|
[Lib.TokNumber 42e123]
|
||||||
(lexStr "42e123"))
|
(lexStr "42e123\n"))
|
||||||
|
|
||||||
testExponential :: Test
|
testExponential :: Test
|
||||||
testExponential = TestCase (assertEqual "42.03E+12 -> [TokNumber 42.03e12]"
|
testExponential = TestCase (assertEqual "recognize fractional numbers with exponents"
|
||||||
[Lib.TokNumber 42.03e12]
|
[Lib.TokNumber 42.03e12]
|
||||||
(lexStr "42.03E+12"))
|
(lexStr "42.03E+12\r"))
|
||||||
|
|
||||||
testNegativeNumber :: Test
|
testNegativeNumber :: Test
|
||||||
testNegativeNumber = TestCase (assertEqual "-42.03e-12 -> [TokNumber -42.03e-12]"
|
testNegativeNumber = TestCase (assertEqual "recognize negative numbers"
|
||||||
[Lib.TokNumber (-42.03e-12)]
|
[Lib.TokNumber (-42.03e-12)]
|
||||||
(lexStr "-42.03e-12"))
|
(lexStr "-42.03e-12"))
|
||||||
|
|
||||||
|
testMalformedNumbersAreSymbols :: Test
|
||||||
|
testMalformedNumbersAreSymbols = TestCase (assertEqual "recognize symbols that arise from invalid numbers"
|
||||||
|
[Lib.TokSymbol "-", Lib.TokSymbol "123abc", Lib.TokSymbol "-123.01f23"]
|
||||||
|
(lexStr "- 123abc -123.01f23"))
|
||||||
|
|
||||||
testStringsAreRecognized :: Test
|
testStringsAreRecognized :: Test
|
||||||
testStringsAreRecognized = TestCase (assertEqual
|
testStringsAreRecognized = TestCase (assertEqual
|
||||||
"can strings be recognized?"
|
"recognize strings"
|
||||||
[Lib.TokStr "\43981asdf\b\f\r\n\t\\/"]
|
[Lib.TokStr "\43981asdf\b\f\r\n\t\\/"]
|
||||||
(lexStr "\"\\uabcdasdf\\b\\f\\r\\n\\t\\\\\\/\""))
|
(lexStr "\"\\uabcdasdf\\b\\f\\r\\n\\t\\\\\\/\""))
|
||||||
|
|
||||||
|
@ -89,6 +94,7 @@ tests = TestList [
|
||||||
TestLabel "test integers with exponential component are recognized" testExponentialInteger,
|
TestLabel "test integers with exponential component are recognized" testExponentialInteger,
|
||||||
TestLabel "test exponential number is recognized" testExponential,
|
TestLabel "test exponential number is recognized" testExponential,
|
||||||
TestLabel "test negative numbers are recognized" testNegativeNumber,
|
TestLabel "test negative numbers are recognized" testNegativeNumber,
|
||||||
|
TestLabel "test malformed numbers count as symbols" testMalformedNumbersAreSymbols,
|
||||||
TestLabel "test strings are recognized" testStringsAreRecognized,
|
TestLabel "test strings are recognized" testStringsAreRecognized,
|
||||||
TestLabel "test unclosed strings are symbols" testUnclosedStringsAreSymbols
|
TestLabel "test unclosed strings are symbols" testUnclosedStringsAreSymbols
|
||||||
]
|
]
|
||||||
|
|
Loading…
Add table
Reference in a new issue