implement a parser

This commit is contained in:
jjanzen 2025-03-23 10:40:50 -05:00
parent 2ef4a21175
commit 7c6b5c6c79
7 changed files with 410 additions and 168 deletions

View file

@ -64,6 +64,7 @@ library
-- Modules included in this library but not exported.
other-modules:
Lexer
Parser
-- LANGUAGE extensions used by modules in this package.
-- other-extensions:
@ -110,7 +111,9 @@ test-suite lisp-interpreter-test
default-language: Haskell2010
-- Modules included in this executable, other than Main.
-- other-modules:
other-modules:
TestLexer
TestParser
-- LANGUAGE extensions used by modules in this package.
-- other-extensions:

View file

@ -12,7 +12,6 @@ data Token = TokLPar
| TokStr String
| TokCharacter Char
| TokSymbol String
| TokErr String
deriving (Eq, Show)
isWhitespace :: Char -> Bool

View file

@ -1,10 +1,13 @@
module Lib(
Lexer.Token(..),
Lexer.Token (..),
Lexer.lexStr,
Parser.ASTNode (..),
Parser.recognizeAST,
someFunc
)where
import Lexer
import Parser
someFunc :: IO ()
someFunc = putStrLn "Hello, world"

46
src/Parser.hs Normal file
View file

@ -0,0 +1,46 @@
module Parser(
ASTNode (..),
recognizeAST
)where
import Lexer
data ASTNode = ASTLiteralNumber Double
| ASTLiteralString String
| ASTLiteralChar Char
| ASTSymbol String
| ASTQuote ASTNode
| AST [ASTNode]
| ASTErr String
deriving (Eq, Show)
recognizeASTNode :: [Token] -> ([Token], ASTNode)
recognizeASTNode [] = ([], ASTErr "Unexpected EOF")
recognizeASTNode ((TokNumber x):xs) = (xs, ASTLiteralNumber x)
recognizeASTNode ((TokStr x):xs) = (xs, ASTLiteralString x)
recognizeASTNode ((TokCharacter x):xs) = (xs, ASTLiteralChar x)
recognizeASTNode ((TokSymbol x):xs) = (xs, ASTSymbol x)
recognizeASTNode (TokQuote:xs) = case recognizeASTNode xs of (ys, n) -> (ys, ASTQuote n)
recognizeASTNode (TokLPar:xs) = case recognizeSubAST xs of (ys, n) -> (ys, AST n)
recognizeASTNode (TokRPar:xs) = (xs, ASTErr "unreachable error") -- should never be called on TokRPar
recognizeSubAST :: [Token] -> ([Token], [ASTNode])
recognizeSubAST [] = ([], [ASTErr "Unexpected EOF"])
recognizeSubAST (x:xs) | x == TokRPar = (xs, [])
| otherwise = case recognizeASTNode (x:xs) of
(ys, node) ->
case recognizeSubAST ys of
(zs, ast) -> (zs, node:ast)
recognizeAST' :: [Token] -> ([Token], [ASTNode])
recognizeAST' [] = ([], [])
recognizeAST' (x:xs) | x == TokRPar = (xs, [ASTErr "Unexpected ')'"])
| otherwise = case recognizeASTNode (x:xs) of
(ys, node) ->
case recognizeAST' ys of
(zs, ast) -> (zs, node:ast)
recognizeAST :: [Token] -> [ASTNode]
recognizeAST xs = case recognizeAST' xs of
([], ys) -> ys
(_, ys) -> ys ++ [ASTErr "Unexpected )"]

View file

@ -1,171 +1,16 @@
module Main (main) where
import Test.HUnit
import Lib
testPars :: Test
testPars = TestCase (assertEqual "recognize parentheses"
[Lib.TokLPar, Lib.TokRPar]
(lexStr "()"))
testSymbol :: Test
testSymbol = TestCase (assertEqual "recognize symbols"
[Lib.TokSymbol "hello", Lib.TokSymbol "world"]
(lexStr "hello world"))
testQuotedPars :: Test
testQuotedPars = TestCase (assertEqual "recognize quoted parentheses"
[Lib.TokQuote, Lib.TokLPar, Lib.TokRPar]
(lexStr "'()"))
testLoneQuoteIsSymbol :: Test
testLoneQuoteIsSymbol = TestCase (assertEqual "a quote on its own is just a symbol"
[Lib.TokSymbol "'", Lib.TokSymbol "'"]
(lexStr "' '"))
testCharacter :: Test
testCharacter = TestCase (assertEqual "recognize characters"
[Lib.TokCharacter 'a']
(lexStr "'a'"))
testQuotedSymbol :: Test
testQuotedSymbol = TestCase (assertEqual "recognize quoted symbols"
[Lib.TokQuote, Lib.TokSymbol "hello"]
(lexStr "'hello"))
testUnicodeCharacter :: Test
testUnicodeCharacter = TestCase (assertEqual "recognize unicode characters"
[Lib.TokCharacter '📎']
(lexStr "'📎'"))
testSimpleIntegers :: Test
testSimpleIntegers = TestCase (assertEqual "recognize basic integers"
[Lib.TokNumber 42]
(lexStr "42\t"))
testFractionalNumber :: Test
testFractionalNumber = TestCase (assertEqual "recognize numbers with fractional components"
[Lib.TokNumber 42.123]
(lexStr "42.123 "))
testExponentialInteger :: Test
testExponentialInteger = TestCase (assertEqual "recognize integers with exponents"
[Lib.TokNumber 42e123]
(lexStr "42e123\n"))
testExponential :: Test
testExponential = TestCase (assertEqual "recognize fractional numbers with exponents"
[Lib.TokNumber 42.03e12]
(lexStr "42.03E+12\r"))
testNegativeNumber :: Test
testNegativeNumber = TestCase (assertEqual "recognize negative numbers"
[Lib.TokNumber (-42.03e-12)]
(lexStr "-42.03e-12"))
testMalformedNumbersAreSymbols :: Test
testMalformedNumbersAreSymbols = TestCase (assertEqual "recognize symbols that arise from invalid numbers"
[Lib.TokSymbol "-", Lib.TokSymbol "123abc", Lib.TokSymbol "-123.01f23"]
(lexStr "- 123abc -123.01f23"))
testStringsAreRecognized :: Test
testStringsAreRecognized = TestCase (assertEqual
"recognize strings"
[Lib.TokStr "\43981asdf\b\f\r\n\t\\/"]
(lexStr "\"\\uabcdasdf\\b\\f\\r\\n\\t\\\\\\/\""))
testUnclosedStringsAreSymbols :: Test
testUnclosedStringsAreSymbols = TestCase (assertEqual
"this would make for a terrible symbol, but it can hypothetically start with a double quote"
[Lib.TokSymbol "\"hello", Lib.TokSymbol "world"]
(lexStr "\"hello world"))
testLexComplexProgram :: Test
testLexComplexProgram = TestCase (assertEqual
"lex an entire program"
[
Lib.TokLPar,
Lib.TokSymbol "defun",
Lib.TokSymbol "pomodoro-toggle-state",
Lib.TokLPar,
Lib.TokRPar,
Lib.TokStr "Restart the pomodoro timer and switch to the other state.",
Lib.TokLPar,
Lib.TokSymbol "interactive",
Lib.TokRPar,
Lib.TokLPar,
Lib.TokSymbol "cond",
Lib.TokLPar,
Lib.TokLPar,
Lib.TokSymbol "equal",
Lib.TokSymbol "pomodoro-state",
Lib.TokQuote,
Lib.TokSymbol "working",
Lib.TokRPar,
Lib.TokLPar,
Lib.TokSymbol "setq",
Lib.TokSymbol "pomodoro-state",
Lib.TokQuote,
Lib.TokSymbol "break",
Lib.TokRPar,
Lib.TokRPar,
Lib.TokLPar,
Lib.TokLPar,
Lib.TokSymbol "equal",
Lib.TokSymbol "pomodoro-state",
Lib.TokQuote,
Lib.TokSymbol "break",
Lib.TokRPar,
Lib.TokLPar,
Lib.TokSymbol "setq",
Lib.TokSymbol "pomodoro-state",
Lib.TokQuote,
Lib.TokSymbol "working",
Lib.TokRPar,
Lib.TokRPar,
Lib.TokLPar,
Lib.TokSymbol "t",
Lib.TokLPar,
Lib.TokSymbol "user-error",
Lib.TokStr "Invalid pomodoro state",
Lib.TokRPar,
Lib.TokRPar,
Lib.TokRPar,
Lib.TokLPar,
Lib.TokSymbol "setq",
Lib.TokSymbol "pomodoro-start-time",
Lib.TokLPar,
Lib.TokSymbol "floor",
Lib.TokLPar,
Lib.TokSymbol "float-time",
Lib.TokRPar,
Lib.TokRPar,
Lib.TokRPar,
Lib.TokRPar
]
(lexStr ";; This is a comment\n(defun pomodoro-toggle-state () ; This is another comment\n\"Restart the pomodoro timer and switch to the other state.\"\n(interactive; a third comment\n)\n(cond ((equal pomodoro-state 'working)\n(setq pomodoro-state 'break))\n((equal pomodoro-state 'break)\n(setq pomodoro-state 'working))\n(t\n(user-error \"Invalid pomodoro state\")))\n(setq pomodoro-start-time (floor (float-time))))"))
tests :: Test
tests = TestList [
TestLabel "test paretheses are detected" testPars,
TestLabel "test symbols are recognized" testSymbol,
TestLabel "test quoted parentheses are detected" testQuotedPars,
TestLabel "test single quotes without symbols or characters are symbols themselves" testLoneQuoteIsSymbol,
TestLabel "test characters are recognized" testCharacter,
TestLabel "test quoted symbols are recognized" testQuotedSymbol,
TestLabel "test unicode characters are recognized" testUnicodeCharacter,
TestLabel "test simple integers are recognized" testSimpleIntegers,
TestLabel "test fractional numbers are recognized" testFractionalNumber,
TestLabel "test integers with exponential component are recognized" testExponentialInteger,
TestLabel "test exponential number is recognized" testExponential,
TestLabel "test negative numbers are recognized" testNegativeNumber,
TestLabel "test malformed numbers count as symbols" testMalformedNumbersAreSymbols,
TestLabel "test strings are recognized" testStringsAreRecognized,
TestLabel "test unclosed strings are symbols" testUnclosedStringsAreSymbols,
TestLabel "test lexing a complete program" testLexComplexProgram
]
import TestLexer
import TestParser
main :: IO ()
main = do
c <- runTestTT tests
putStrLn (showCounts c)
putStrLn "Testing Lexer"
lexTC <- runTestTT TestLexer.tests
putStrLn (showCounts lexTC)
putStrLn ""
putStrLn "Testing Parser"
parseTC <- runTestTT TestParser.tests
putStrLn (showCounts parseTC)
putStrLn ""

166
test/TestLexer.hs Normal file
View file

@ -0,0 +1,166 @@
module TestLexer where
import Test.HUnit
import Lib
testPars :: Test
testPars = TestCase (assertEqual "recognize parentheses"
[Lib.TokLPar, Lib.TokRPar]
(lexStr "()"))
testSymbol :: Test
testSymbol = TestCase (assertEqual "recognize symbols"
[Lib.TokSymbol "hello", Lib.TokSymbol "world"]
(lexStr "hello world"))
testQuotedPars :: Test
testQuotedPars = TestCase (assertEqual "recognize quoted parentheses"
[Lib.TokQuote, Lib.TokLPar, Lib.TokRPar]
(lexStr "'()"))
testLoneQuoteIsSymbol :: Test
testLoneQuoteIsSymbol = TestCase (assertEqual "a quote on its own is just a symbol"
[Lib.TokSymbol "'", Lib.TokSymbol "'"]
(lexStr "' '"))
testCharacter :: Test
testCharacter = TestCase (assertEqual "recognize characters"
[Lib.TokCharacter 'a']
(lexStr "'a'"))
testQuotedSymbol :: Test
testQuotedSymbol = TestCase (assertEqual "recognize quoted symbols"
[Lib.TokQuote, Lib.TokSymbol "hello"]
(lexStr "'hello"))
testUnicodeCharacter :: Test
testUnicodeCharacter = TestCase (assertEqual "recognize unicode characters"
[Lib.TokCharacter '📎']
(lexStr "'📎'"))
testSimpleIntegers :: Test
testSimpleIntegers = TestCase (assertEqual "recognize basic integers"
[Lib.TokNumber 42]
(lexStr "42\t"))
testFractionalNumber :: Test
testFractionalNumber = TestCase (assertEqual "recognize numbers with fractional components"
[Lib.TokNumber 42.123]
(lexStr "42.123 "))
testExponentialInteger :: Test
testExponentialInteger = TestCase (assertEqual "recognize integers with exponents"
[Lib.TokNumber 42e123]
(lexStr "42e123\n"))
testExponential :: Test
testExponential = TestCase (assertEqual "recognize fractional numbers with exponents"
[Lib.TokNumber 42.03e12]
(lexStr "42.03E+12\r"))
testNegativeNumber :: Test
testNegativeNumber = TestCase (assertEqual "recognize negative numbers"
[Lib.TokNumber (-42.03e-12)]
(lexStr "-42.03e-12"))
testMalformedNumbersAreSymbols :: Test
testMalformedNumbersAreSymbols = TestCase (assertEqual "recognize symbols that arise from invalid numbers"
[Lib.TokSymbol "-", Lib.TokSymbol "123abc", Lib.TokSymbol "-123.01f23"]
(lexStr "- 123abc -123.01f23"))
testStringsAreRecognized :: Test
testStringsAreRecognized = TestCase (assertEqual
"recognize strings"
[Lib.TokStr "\43981asdf\b\f\r\n\t\\/"]
(lexStr "\"\\uabcdasdf\\b\\f\\r\\n\\t\\\\\\/\""))
testUnclosedStringsAreSymbols :: Test
testUnclosedStringsAreSymbols = TestCase (assertEqual
"this would make for a terrible symbol, but it can hypothetically start with a double quote"
[Lib.TokSymbol "\"hello", Lib.TokSymbol "world"]
(lexStr "\"hello world"))
testLexComplexProgram :: Test
testLexComplexProgram = TestCase (assertEqual
"lex an entire program"
[
Lib.TokLPar,
Lib.TokSymbol "defun",
Lib.TokSymbol "pomodoro-toggle-state",
Lib.TokLPar,
Lib.TokRPar,
Lib.TokStr "Restart the pomodoro timer and switch to the other state.",
Lib.TokLPar,
Lib.TokSymbol "interactive",
Lib.TokRPar,
Lib.TokLPar,
Lib.TokSymbol "cond",
Lib.TokLPar,
Lib.TokLPar,
Lib.TokSymbol "equal",
Lib.TokSymbol "pomodoro-state",
Lib.TokQuote,
Lib.TokSymbol "working",
Lib.TokRPar,
Lib.TokLPar,
Lib.TokSymbol "setq",
Lib.TokSymbol "pomodoro-state",
Lib.TokQuote,
Lib.TokSymbol "break",
Lib.TokRPar,
Lib.TokRPar,
Lib.TokLPar,
Lib.TokLPar,
Lib.TokSymbol "equal",
Lib.TokSymbol "pomodoro-state",
Lib.TokQuote,
Lib.TokSymbol "break",
Lib.TokRPar,
Lib.TokLPar,
Lib.TokSymbol "setq",
Lib.TokSymbol "pomodoro-state",
Lib.TokQuote,
Lib.TokSymbol "working",
Lib.TokRPar,
Lib.TokRPar,
Lib.TokLPar,
Lib.TokSymbol "t",
Lib.TokLPar,
Lib.TokSymbol "user-error",
Lib.TokStr "Invalid pomodoro state",
Lib.TokRPar,
Lib.TokRPar,
Lib.TokRPar,
Lib.TokLPar,
Lib.TokSymbol "setq",
Lib.TokSymbol "pomodoro-start-time",
Lib.TokLPar,
Lib.TokSymbol "floor",
Lib.TokLPar,
Lib.TokSymbol "float-time",
Lib.TokRPar,
Lib.TokRPar,
Lib.TokRPar,
Lib.TokRPar
]
(lexStr ";; This is a comment\n(defun pomodoro-toggle-state () ; This is another comment\n\"Restart the pomodoro timer and switch to the other state.\"\n(interactive; a third comment\n)\n(cond ((equal pomodoro-state 'working)\n(setq pomodoro-state 'break))\n((equal pomodoro-state 'break)\n(setq pomodoro-state 'working))\n(t\n(user-error \"Invalid pomodoro state\")))\n(setq pomodoro-start-time (floor (float-time))))"))
tests :: Test
tests = TestList [
TestLabel "test paretheses are detected" testPars,
TestLabel "test symbols are recognized" testSymbol,
TestLabel "test quoted parentheses are detected" testQuotedPars,
TestLabel "test single quotes without symbols or characters are symbols themselves" testLoneQuoteIsSymbol,
TestLabel "test characters are recognized" testCharacter,
TestLabel "test quoted symbols are recognized" testQuotedSymbol,
TestLabel "test unicode characters are recognized" testUnicodeCharacter,
TestLabel "test simple integers are recognized" testSimpleIntegers,
TestLabel "test fractional numbers are recognized" testFractionalNumber,
TestLabel "test integers with exponential component are recognized" testExponentialInteger,
TestLabel "test exponential number is recognized" testExponential,
TestLabel "test negative numbers are recognized" testNegativeNumber,
TestLabel "test malformed numbers count as symbols" testMalformedNumbersAreSymbols,
TestLabel "test strings are recognized" testStringsAreRecognized,
TestLabel "test unclosed strings are symbols" testUnclosedStringsAreSymbols,
TestLabel "test lexing a complete program" testLexComplexProgram
]

180
test/TestParser.hs Normal file
View file

@ -0,0 +1,180 @@
module TestParser(tests) where
import Test.HUnit
import Lib
testEmptyAST :: Test
testEmptyAST = TestCase (assertEqual
"empty ASTs are recognized"
[]
(recognizeAST []))
testErrorOnMissingRPar :: Test
testErrorOnMissingRPar = TestCase (assertEqual
"error on missing TokRPar"
[AST [ASTErr "Unexpected EOF"]]
(recognizeAST [TokLPar]))
testErrorOnExtraRPar :: Test
testErrorOnExtraRPar = TestCase (assertEqual
"error on extra TokRPar"
[ASTErr "Unexpected ')'"]
(recognizeAST [TokRPar]))
testErrorOnMissingQuotedItem :: Test
testErrorOnMissingQuotedItem = TestCase (assertEqual
"error when EOF reached immediately after quote"
[ASTQuote (ASTErr "Unexpected EOF")]
(recognizeAST [TokQuote]))
testBasicComponentsAreRecognized :: Test
testBasicComponentsAreRecognized = TestCase (assertEqual
"basic components are recognized"
[
ASTLiteralNumber 123,
ASTLiteralString "hello",
ASTLiteralChar 'a',
ASTSymbol "symbol",
ASTQuote (AST [ASTLiteralChar 'b']),
ASTLiteralNumber 456
]
(recognizeAST [
TokNumber 123,
TokStr "hello",
TokCharacter 'a',
TokSymbol "symbol",
TokQuote,
TokLPar,
TokCharacter 'b',
TokRPar,
TokNumber 456
]
))
testParseAProgram :: Test
testParseAProgram = TestCase (assertEqual
"parse a full program"
[
AST [
ASTSymbol "defun",
ASTSymbol "pomodoro-toggle-state",
AST [],
ASTLiteralString
"Restart the pomodoro timer and switch to the other state.",
AST [ASTSymbol "interactive"],
AST [
ASTSymbol "cond",
AST [
AST [
ASTSymbol "equal",
ASTSymbol "pomodoro-state",
ASTQuote (ASTSymbol "working")
],
AST [
ASTSymbol "setq",
ASTSymbol "pomodoro-state",
ASTQuote (ASTSymbol "break")
]
],
AST [
AST [
ASTSymbol "equal",
ASTSymbol "pomodoro-state",
ASTQuote (ASTSymbol "break")
],
AST [
ASTSymbol "setq",
ASTSymbol "pomodoro-state",
ASTQuote (ASTSymbol "working")
]
],
AST [
ASTSymbol "t",
AST [
ASTSymbol "user-error",
ASTLiteralString "Invalid pomodoro state"
]
]
],
AST [
ASTSymbol "setq",
ASTSymbol "pomodoro-start-time",
AST [
ASTSymbol "floor",
AST [
ASTSymbol "float-time"
]
]
]
]
]
(recognizeAST [
Lib.TokLPar,
Lib.TokSymbol "defun",
Lib.TokSymbol "pomodoro-toggle-state",
Lib.TokLPar,
Lib.TokRPar,
Lib.TokStr "Restart the pomodoro timer and switch to the other state.",
Lib.TokLPar,
Lib.TokSymbol "interactive",
Lib.TokRPar,
Lib.TokLPar,
Lib.TokSymbol "cond",
Lib.TokLPar,
Lib.TokLPar,
Lib.TokSymbol "equal",
Lib.TokSymbol "pomodoro-state",
Lib.TokQuote,
Lib.TokSymbol "working",
Lib.TokRPar,
Lib.TokLPar,
Lib.TokSymbol "setq",
Lib.TokSymbol "pomodoro-state",
Lib.TokQuote,
Lib.TokSymbol "break",
Lib.TokRPar,
Lib.TokRPar,
Lib.TokLPar,
Lib.TokLPar,
Lib.TokSymbol "equal",
Lib.TokSymbol "pomodoro-state",
Lib.TokQuote,
Lib.TokSymbol "break",
Lib.TokRPar,
Lib.TokLPar,
Lib.TokSymbol "setq",
Lib.TokSymbol "pomodoro-state",
Lib.TokQuote,
Lib.TokSymbol "working",
Lib.TokRPar,
Lib.TokRPar,
Lib.TokLPar,
Lib.TokSymbol "t",
Lib.TokLPar,
Lib.TokSymbol "user-error",
Lib.TokStr "Invalid pomodoro state",
Lib.TokRPar,
Lib.TokRPar,
Lib.TokRPar,
Lib.TokLPar,
Lib.TokSymbol "setq",
Lib.TokSymbol "pomodoro-start-time",
Lib.TokLPar,
Lib.TokSymbol "floor",
Lib.TokLPar,
Lib.TokSymbol "float-time",
Lib.TokRPar,
Lib.TokRPar,
Lib.TokRPar,
Lib.TokRPar
]))
tests :: Test
tests = TestList [
TestLabel "empty ASTs are recognized" testEmptyAST,
TestLabel "missing TokRPar causes an error" testErrorOnMissingRPar,
TestLabel "extra TokRPar causes an error" testErrorOnExtraRPar,
TestLabel "missing item from quote causes an error" testErrorOnMissingQuotedItem,
TestLabel "all basic components are recognized" testBasicComponentsAreRecognized,
TestLabel "parse a full program" testParseAProgram
]