refactor number recognition

This commit is contained in:
jjanzen 2025-03-22 13:17:12 -05:00
parent d12ca569e9
commit 0d6277a3a7
2 changed files with 84 additions and 87 deletions

View file

@ -60,13 +60,14 @@ identifyString ('r':xs) StrEscape = case identifyString xs StrInside of
identifyString ('t':xs) StrEscape = case identifyString xs StrInside of
Just (ys, b) -> Just (ys, '\t':b)
y -> y
identifyString ('u':w:x:y:z:xs) StrEscape | isHexDigit w &&
isHexDigit x &&
isHexDigit y &&
isHexDigit z =
case identifyString xs StrInside of
Just (ys, b) -> Just (ys, chr (read ('0':'x':w:x:y:z:"")):b)
a -> a
identifyString ('u':w:x:y:z:xs) StrEscape
| isHexDigit w &&
isHexDigit x &&
isHexDigit y &&
isHexDigit z =
case identifyString xs StrInside of
Just (ys, b) -> Just (ys, chr (read ('0':'x':w:x:y:z:"")):b)
a -> a
identifyString _ StrEscape = Nothing
identifyString (x:xs) StrInside = case identifyString xs StrInside of
Just (ys, b) -> Just (ys, x:b)
@ -78,74 +79,64 @@ data NumState = NumStart
| NumFractionalComponent
| NumExponentialComponent
identifyNum :: String -> NumState -> (String, Maybe String)
identifyNum "" NumStart = ("", Nothing)
identifyNum ('-':xs) NumStart = identifyNum xs NumNeg
identifyNum ('0':'.':x:xs) NumStart | isDigit x = case identifyNum (x:xs) NumFractionalComponent
of (ys, Just zs) -> (ys, Just ('0':'.':zs))
y -> y
| otherwise = (x:xs, Nothing)
identifyNum (x:xs) NumStart | x == '0' = (x:xs, Nothing)
identifyNum :: String -> NumState -> Maybe (String, String)
identifyNum "" _ = Nothing
identifyNum (x:"") _ | isDigit x = Just ("", x:"")
identifyNum (x:y:xs) _ | isDigit x && isWhitespace y = Just (y:xs, x:"")
identifyNum ('-':xs) NumStart = case identifyNum xs NumNeg of
Just (ys, zs) -> Just (ys, '-':zs)
y -> y
identifyNum ('0':'.':x:xs) NumStart | isDigit x = case identifyNum (x:xs) NumFractionalComponent of
Just (ys, zs) -> Just (ys, '0':'.':zs)
y -> y
identifyNum (x:xs) NumStart | x == '0' = Nothing
| isDigit x = identifyNum (x:xs) NumIntegralComponent
| otherwise = (x:xs, Nothing)
identifyNum "" NumNeg = ("", Nothing)
identifyNum ('0':'.':x:xs) NumNeg | isDigit x = case identifyNum (x:xs) NumFractionalComponent
of (ys, Just zs) -> (ys, Just ('-':'0':'.':zs))
y -> y
identifyNum (x:xs) NumNeg | x == '0' = (x:xs, Nothing)
| isDigit x = case identifyNum (x:xs) NumIntegralComponent
of (ys, Just zs) -> (ys, Just ('-':zs))
y -> y
| otherwise = (x:xs, Nothing)
identifyNum ('.':x:xs) NumIntegralComponent | isDigit x = case identifyNum (x:xs) NumFractionalComponent
of (ys, Just zs) -> (ys, Just ('.':zs))
y -> y
| otherwise = ('.':x:xs, Just "")
| otherwise = Nothing
identifyNum ('0':'.':x:xs) NumNeg | isDigit x = case identifyNum (x:xs) NumFractionalComponent of
Just (ys, zs) -> Just (ys, '0':'.':zs)
y -> y
identifyNum (x:xs) NumNeg | x == '0' = Nothing
| isDigit x = identifyNum (x:xs) NumIntegralComponent
| otherwise = Nothing
identifyNum ('.':x:xs) NumIntegralComponent
| isDigit x = case identifyNum (x:xs) NumFractionalComponent of
Just (ys, zs) -> Just (ys, '.':zs)
y -> y
| otherwise = Nothing
identifyNum (w:x:y:xs) NumIntegralComponent | (w == 'e' || w == 'E') &&
(x == '+' || x == '-') &&
isDigit y =
case identifyNum (y:xs) NumExponentialComponent
of (ys, Just zs) -> (ys, Just (w:x:zs))
z -> z
| (w == 'e' || w == 'E') &&
isDigit x =
case identifyNum (x:y:xs) NumExponentialComponent
of (ys, Just zs) -> (ys, Just (w:zs))
z -> z
identifyNum (x:y:"") NumIntegralComponent | (x == 'e' || x == 'E') &&
isDigit y = ("", Just (x:y:""))
| isDigit x && isDigit y = ("", Just (x:y:""))
| isDigit x = (y:"", Just (x:""))
| otherwise = (x:y:"", Just "")
identifyNum (x:xs) NumIntegralComponent | isDigit x = case identifyNum xs NumIntegralComponent
of (ys, Just zs) -> (ys, Just (x:zs))
y -> y
| otherwise = (x:xs, Just "")
case identifyNum (y:xs) NumExponentialComponent of
Just (ys, zs) -> Just (ys, w:x:zs)
z -> z
identifyNum (x:y:xs) NumIntegralComponent | (x == 'e' || x == 'E') &&
isDigit y =
case identifyNum (y:xs) NumExponentialComponent of
Just (ys, zs) -> Just (ys, x:zs)
z -> z
identifyNum (x:xs) NumIntegralComponent | isDigit x = case identifyNum xs NumIntegralComponent of
Just (ys, zs) -> Just (ys, x:zs)
y -> y
| otherwise = Nothing
identifyNum (w:x:y:xs) NumFractionalComponent | (w == 'e' || w == 'E') &&
(x == '+' || x == '-') &&
isDigit y =
case identifyNum (y:xs) NumExponentialComponent
of (ys, Just zs) -> (ys, Just (w:x:zs))
z -> z
| (w == 'e' || w == 'E') &&
isDigit x =
case identifyNum (x:y:xs) NumExponentialComponent
of (ys, Just zs) -> (ys, Just (w:zs))
z -> z
identifyNum (x:y:"") NumFractionalComponent | (x == 'e' || x == 'E') &&
isDigit y = ("", Just (x:y:""))
| isDigit x && isDigit y = ("", Just (x:y:""))
| isDigit x = (y:"", Just (x:""))
| otherwise = (x:y:"", Just "")
identifyNum (x:xs) NumFractionalComponent | isDigit x = case identifyNum xs NumFractionalComponent
of (ys, Just zs) -> (ys, Just (x:zs))
y -> y
| otherwise = (x:xs, Just "")
identifyNum (x:xs) NumExponentialComponent | isDigit x = case identifyNum xs NumExponentialComponent
of (ys, Just zs) -> (ys, Just (x:zs))
y -> y
| otherwise = (x:xs, Just "")
identifyNum [] _ = ("", Just "")
case identifyNum (y:xs) NumExponentialComponent of
Just (ys, zs) -> Just (ys, w:x:zs)
z -> z
identifyNum (x:y:xs) NumFractionalComponent | (x == 'e' || x == 'E') &&
isDigit y =
case identifyNum (y:xs) NumExponentialComponent of
Just (ys, zs) -> Just (ys, x:zs)
z -> z
identifyNum (x:xs) NumFractionalComponent | isDigit x = case identifyNum xs NumFractionalComponent of
Just (ys, zs) -> Just (ys, x:zs)
y -> y
| otherwise = Nothing
identifyNum (x:xs) NumExponentialComponent | isDigit x = case identifyNum xs NumExponentialComponent of
Just (ys, zs) -> Just (ys, x:zs)
y -> y
| otherwise = Nothing
lexStr :: String -> [Token]
lexStr "" = []
@ -167,6 +158,6 @@ lexStr ('\r':xs) = lexStr xs
lexStr (x:xs) | isDigit x ||
x == '-' =
case identifyNum (x:xs) NumStart of
(ys, Nothing) -> TokErr "Failed to parse number" : lexStr ys
(ys, Just b) -> TokNumber (read b) : lexStr ys
Nothing -> case identifySymbol (x:xs) of (rest, symb) -> TokSymbol symb : lexStr rest
Just (ys, b) -> TokNumber (read b) : lexStr ys
lexStr xs = case identifySymbol xs of (ys, zs) -> TokSymbol zs : lexStr ys

View file

@ -4,68 +4,73 @@ import Test.HUnit
import Lib
testPars :: Test
testPars = TestCase (assertEqual "() -> [TokLPar,TokRPar]"
testPars = TestCase (assertEqual "recognize parentheses"
[Lib.TokLPar, Lib.TokRPar]
(lexStr "()"))
testSymbol :: Test
testSymbol = TestCase (assertEqual "hello world -> [TokSymbol \"hello\", TokSymbol \"world\"]"
testSymbol = TestCase (assertEqual "recognize symbols"
[Lib.TokSymbol "hello", Lib.TokSymbol "world"]
(lexStr "hello world"))
testQuotedPars :: Test
testQuotedPars = TestCase (assertEqual "'() -> [TokQuote,TokLPar,TokRPar]"
testQuotedPars = TestCase (assertEqual "recognize quoted parentheses"
[Lib.TokQuote, Lib.TokLPar, Lib.TokRPar]
(lexStr "'()"))
testLoneQuoteIsSymbol :: Test
testLoneQuoteIsSymbol = TestCase (assertEqual "' ' -> [TokSymbol \"'\", TokSymbol \"'\"]"
testLoneQuoteIsSymbol = TestCase (assertEqual "a quote on its own is just a symbol"
[Lib.TokSymbol "'", Lib.TokSymbol "'"]
(lexStr "' '"))
testCharacter :: Test
testCharacter = TestCase (assertEqual "'a' -> [TokCharacter 'a']"
testCharacter = TestCase (assertEqual "recognize characters"
[Lib.TokCharacter 'a']
(lexStr "'a'"))
testQuotedSymbol :: Test
testQuotedSymbol = TestCase (assertEqual "'hello -> [TokQuote,TokSymbol \"hello\"]"
testQuotedSymbol = TestCase (assertEqual "recognize quoted symbols"
[Lib.TokQuote, Lib.TokSymbol "hello"]
(lexStr "'hello"))
testUnicodeCharacter :: Test
testUnicodeCharacter = TestCase (assertEqual "'📎' -> [TokCharacter '📎']"
testUnicodeCharacter = TestCase (assertEqual "recognize unicode characters"
[Lib.TokCharacter '📎']
(lexStr "'📎'"))
testSimpleIntegers :: Test
testSimpleIntegers = TestCase (assertEqual "42 -> [TokNumber 42]"
testSimpleIntegers = TestCase (assertEqual "recognize basic integers"
[Lib.TokNumber 42]
(lexStr "42"))
(lexStr "42\t"))
testFractionalNumber :: Test
testFractionalNumber = TestCase (assertEqual "42.123 -> [TokNumber 42.123]"
testFractionalNumber = TestCase (assertEqual "recognize numbers with fractional components"
[Lib.TokNumber 42.123]
(lexStr "42.123"))
(lexStr "42.123 "))
testExponentialInteger :: Test
testExponentialInteger = TestCase (assertEqual "42e123 -> [TokNumber 42e123]"
testExponentialInteger = TestCase (assertEqual "recognize integers with exponents"
[Lib.TokNumber 42e123]
(lexStr "42e123"))
(lexStr "42e123\n"))
testExponential :: Test
testExponential = TestCase (assertEqual "42.03E+12 -> [TokNumber 42.03e12]"
testExponential = TestCase (assertEqual "recognize fractional numbers with exponents"
[Lib.TokNumber 42.03e12]
(lexStr "42.03E+12"))
(lexStr "42.03E+12\r"))
testNegativeNumber :: Test
testNegativeNumber = TestCase (assertEqual "-42.03e-12 -> [TokNumber -42.03e-12]"
testNegativeNumber = TestCase (assertEqual "recognize negative numbers"
[Lib.TokNumber (-42.03e-12)]
(lexStr "-42.03e-12"))
testMalformedNumbersAreSymbols :: Test
testMalformedNumbersAreSymbols = TestCase (assertEqual "recognize symbols that arise from invalid numbers"
[Lib.TokSymbol "-", Lib.TokSymbol "123abc", Lib.TokSymbol "-123.01f23"]
(lexStr "- 123abc -123.01f23"))
testStringsAreRecognized :: Test
testStringsAreRecognized = TestCase (assertEqual
"can strings be recognized?"
"recognize strings"
[Lib.TokStr "\43981asdf\b\f\r\n\t\\/"]
(lexStr "\"\\uabcdasdf\\b\\f\\r\\n\\t\\\\\\/\""))
@ -89,6 +94,7 @@ tests = TestList [
TestLabel "test integers with exponential component are recognized" testExponentialInteger,
TestLabel "test exponential number is recognized" testExponential,
TestLabel "test negative numbers are recognized" testNegativeNumber,
TestLabel "test malformed numbers count as symbols" testMalformedNumbersAreSymbols,
TestLabel "test strings are recognized" testStringsAreRecognized,
TestLabel "test unclosed strings are symbols" testUnclosedStringsAreSymbols
]