summaryrefslogtreecommitdiff
path: root/Haskell-book/24/ParserExercises/src
diff options
context:
space:
mode:
authorEugen Wissner <belka@caraus.de>2025-12-11 10:28:11 +0100
committerEugen Wissner <belka@caraus.de>2025-12-11 10:28:11 +0100
commit98329e0a3dd4f78b5d815ac3896272ec70904901 (patch)
tree80f9c56cfe2ac20232358f236d32e84bd683be1b /Haskell-book/24/ParserExercises/src
parent3624c712d72d246f21d4e710cec7c11e052e0326 (diff)
downloadbook-exercises-98329e0a3dd4f78b5d815ac3896272ec70904901.tar.gz
Add remaining haskell book exercises
Diffstat (limited to 'Haskell-book/24/ParserExercises/src')
-rw-r--r--Haskell-book/24/ParserExercises/src/Base10Integer.hs52
-rw-r--r--Haskell-book/24/ParserExercises/src/IPAddress.hs260
-rw-r--r--Haskell-book/24/ParserExercises/src/LogParser.hs83
-rw-r--r--Haskell-book/24/ParserExercises/src/PhoneNumber.hs28
-rw-r--r--Haskell-book/24/ParserExercises/src/SemVer.hs51
5 files changed, 474 insertions, 0 deletions
diff --git a/Haskell-book/24/ParserExercises/src/Base10Integer.hs b/Haskell-book/24/ParserExercises/src/Base10Integer.hs
new file mode 100644
index 0000000..4e8ee3a
--- /dev/null
+++ b/Haskell-book/24/ParserExercises/src/Base10Integer.hs
@@ -0,0 +1,52 @@
+module Base10Integer where
+
+import Control.Applicative
+import Text.Trifecta
+
+-- 2. Write a parser for positive integer values. Don't reuse the preexisting
+-- digit or integer functions, but you can use the rest of the libraries we've
+-- shown you so far. You are not expected to write a parsing library from
+-- scratch.
+--
+-- Hint: Assume you're parsing base-10 numbers. Use arithmetic as a cheap
+-- "accumulator" for your final number as you parse each digit left-to-right.
+
+parseDigit :: Parser Char
+parseDigit = (char '0')
+ <|> (char '1')
+ <|> (char '2')
+ <|> (char '3')
+ <|> (char '4')
+ <|> (char '5')
+ <|> (char '6')
+ <|> (char '7')
+ <|> (char '8')
+ <|> (char '9')
+
+charToDigit :: Char -> Integer
+charToDigit c = case c of
+ '0' -> 0
+ '1' -> 1
+ '2' -> 2
+ '3' -> 3
+ '4' -> 4
+ '5' -> 5
+ '6' -> 6
+ '7' -> 7
+ '8' -> 8
+ '9' -> 9
+
+base10Integer :: Parser Integer
+base10Integer = do
+ number <- some parseDigit
+ let n = foldl (\acc x -> (acc * 10) + (charToDigit x)) 0 number
+ return n
+
+-- 3. Extend the parser your wrote to handle negative and positive integers.
+-- Try writing a new parser in terms of the one you already have to do this.
+base10Integer' :: Parser Integer
+base10Integer' = do
+ negative <- (char '-' >> (return negate)) <|> (return id)
+ number <- some parseDigit
+ let n = foldl (\acc x -> (acc * 10) + (charToDigit x)) 0 number
+ return $ negative n
diff --git a/Haskell-book/24/ParserExercises/src/IPAddress.hs b/Haskell-book/24/ParserExercises/src/IPAddress.hs
new file mode 100644
index 0000000..e6a7102
--- /dev/null
+++ b/Haskell-book/24/ParserExercises/src/IPAddress.hs
@@ -0,0 +1,260 @@
+module IPAddress where
+
+import Numeric
+import Control.Monad (join)
+import Control.Applicative
+import Data.Char
+import Data.List
+import Data.Maybe
+import Data.Map (lookup, Map(..), fromList)
+import Data.Word
+import Data.Bits
+import Text.Trifecta
+
+-- 6. Write a parser for IPv4 addresses.
+
+data IPAddress = IPAddress Word32 deriving (Eq, Ord)
+
+parseIP4 :: Parser IPAddress
+parseIP4 = do
+ p1 <- natural
+ _ <- char '.'
+ p2 <- natural
+ _ <- char '.'
+ p3 <- natural
+ _ <- char '.'
+ p4 <- natural
+ return $ IPAddress $ fromIntegral $ xor (xor (xor (shift p1 24) (shift p2 16)) (shift p3 8)) p4
+
+-- A 32-bit word is a 32-bit unsigned int. Lowest value is 0 rahter than being
+-- capable of representing negative numbers, but the highest possible value in
+-- the same number of bits is twice as high.
+--
+-- Word32 is an appropriate and compact way to represent IPv4 addresses. You
+-- are expected to figure out not only how to parse the typical IP address
+-- format, but how IP addresses work numerically insofar as is required to
+-- write a working parser. This will require using a search engine unless you
+-- have an appropriate book on internet networking handy.
+
+-- 7. Same as before, but IPv6.
+
+data IPAddress6 = IPAddress6 Word64 Word64 deriving (Eq, Ord)
+
+-- One of the trickier parts about IPv6 will be full vs. collapsed
+-- addresses and the abbrevations. See this Q&A thread 13 about
+-- IPv6 abbreviations for more.
+
+
+newtype IPV6Normed = IPV6Normed String
+ deriving (Eq, Ord, Show)
+
+newtype IPV6Str = IPV6Str String
+ deriving (Eq, Ord, Show)
+
+spanList :: ([a] -> Bool) -> [a] -> ([a], [a])
+spanList _ [] = ([],[])
+spanList func list@(x:xs) =
+ if func list
+ then (x:ys,zs)
+ else ([],list)
+ where (ys,zs) = spanList func xs
+
+breakList :: ([a] -> Bool) -> [a] -> ([a], [a])
+breakList func = spanList (not . func)
+
+split' :: Eq a => [a] -> [a] -> [[a]]
+split' _ [] = []
+split' delim str =
+ let (firstline, remainder) = breakList (isPrefixOf delim) str
+ in
+ firstline : case remainder of
+ [] -> []
+ x -> if x == delim
+ then [] : []
+ else split' delim
+ (drop (length delim) x)
+
+join :: [a] -> [[a]] -> [a]
+join delim l = concat (intersperse delim l)
+
+replace :: Eq a => [a] -> [a] -> [a] -> [a]
+replace old new l = IPAddress.join new . split' old $ l
+
+split :: Eq a => a -> [a] -> [[a]]
+split d [] = []
+split d s = x : split d (drop 1 y)
+ where
+ (x, y) = Data.List.span (/= d) s
+
+twoRaised16Exp :: [Integer]
+twoRaised16Exp = fmap ((2 ^ 16) ^) [0,1 ..]
+
+validHexChars :: String
+validHexChars = "0123456789abcdefABCDEF"
+
+validHexCharsLowerOnly :: String
+validHexCharsLowerOnly = "0123456789abcdef"
+
+buildExpanded0s :: Int -> String
+buildExpanded0s i = intersperse ':' (take i (repeat '0'))
+
+ipv6NormedToIPAddress6 :: IPV6Normed -> IPAddress6
+ipv6NormedToIPAddress6 (IPV6Normed str) = IPAddress6 quotient remainder
+ where
+ asSegs = split ':' str
+ zippedWithExp = zip (reverse asSegs) twoRaised16Exp
+ asInteger = foldr (\(s, exp) acc -> hexToDec s * exp + acc) 0 zippedWithExp
+ (q, r) = quotRem asInteger word64Max
+ quotient = fromIntegral q
+ remainder = fromIntegral r
+
+hexToDec :: String -> Integer
+hexToDec s = toInteger asInt
+ where
+ asInt = baseNToDec 16 (\c -> fromMaybe 0 (Data.Map.lookup (toLower c) hexCharToValue)) s
+
+baseNToDec :: Num i => i -> (a -> i) -> [a] -> i
+baseNToDec base toInt = foldl' (\acc n -> base * acc + toInt n ) 0
+
+hexCharToValue :: Map Char Int
+hexCharToValue = Data.Map.fromList $ zip validHexCharsLowerOnly [0 ..]
+
+word64Max :: Integer
+word64Max = toInteger (maxBound :: Word64)
+
+mkIPV6Normed :: String -> Either String IPV6Normed
+mkIPV6Normed origS = result
+ where
+ expand s
+ | s == "::" = IPV6Normed $ buildExpanded0s 8
+ | isPrefixOf "::" s =
+ let expandCnt = 8 - (length $ split ':' s) + 2
+ filler = buildExpanded0s expandCnt ++ ":"
+ replaced = replace "::" filler s
+ in IPV6Normed replaced
+ | isSuffixOf "::" s =
+ let expandCnt = 8 - (length $ split ':' s) + 1
+ filler = ':' : buildExpanded0s expandCnt
+ replaced = replace "::" filler s
+ in IPV6Normed replaced
+ | isInfixOf "::" s =
+ let expandCnt = 8 - (length $ split ':' s) + 1
+ filler = ':' : buildExpanded0s expandCnt ++ ":"
+ replaced = replace "::" filler s
+ in IPV6Normed replaced
+ | otherwise = IPV6Normed s
+ expanded = expand origS
+ IPV6Normed expandedStr = expanded
+ result = if length (split ':' expandedStr) == 8
+ then Right expanded
+ else Left "invalid sections"
+
+parseIPV6Section :: Parser String
+parseIPV6Section = do
+ mL <- optional (try $ string "::" <|> string ":")
+ seq <- some (oneOf validHexChars)
+ mR <- optional (try $ string "::" <|> string ":")
+ let lowered = map toLower seq
+ l = fromMaybe "" mL
+ r = fromMaybe "" mR
+ return $ l ++ lowered ++ r
+
+parseIPV6Str :: Parser IPV6Str
+parseIPV6Str = do
+ s <- (try $ (fmap (: []) (string "::" <* eof))) <|> manyTill parseIPV6Section
+ eof
+ if length s < 1
+ then fail "Did not find valid sections"
+ else return $ IPV6Str $ Control.Monad.join s
+
+parseIPV6Normed :: Parser IPV6Normed
+parseIPV6Normed = do
+ str <- parseIPV6Str
+ let IPV6Str (s) = str
+ full = mkIPV6Normed s
+ case full of
+ Left err -> fail err
+ Right fullstr -> return fullstr
+
+parseIP6 :: Parser IPAddress6
+parseIP6 = do
+ normed <- parseIPV6Normed
+ return $ ipv6NormedToIPAddress6 normed
+
+-- 8. Remove the derived Show instances from the IPAddress/IPAddress6
+-- types, and write your own Show instance for each type that renders in the
+-- typical textual format appropriate to each.
+
+ipAddressToIPV4DotFields :: IPAddress -> [Integer]
+ipAddressToIPV4DotFields (IPAddress word) = repr
+ where
+ asInteger = toInteger word
+ repr = decToBaseN asInteger 0 [0 .. 255]
+
+instance Show IPAddress where
+ show ip = Control.Monad.join $ intersperse "." asStrings
+ where
+ repr = ipAddressToIPV4DotFields ip
+ asStrings = fmap show repr
+
+ipAddress6toInteger :: IPAddress6 -> Integer
+ipAddress6toInteger (IPAddress6 q r) = toInteger q * word64Max + toInteger r
+
+iPAddress6ToIPV6Normed :: IPAddress6 -> IPV6Normed
+iPAddress6ToIPV6Normed ip = IPV6Normed s
+ where
+ asInteger = ipAddress6toInteger ip
+ chopped = integerToChoppedUp asInteger
+ ss = fmap integerToHexString chopped
+ fillCnt = 8 - length ss
+ filled = (take fillCnt (repeat "0")) ++ ss
+ s = Control.Monad.join $ intersperse ":" filled
+
+instance Show IPAddress6 where
+ show ip = normed
+ where IPV6Normed normed = iPAddress6ToIPV6Normed ip
+
+-- 9. Write a function that converts between IPAddress and IPAddress6.
+
+decToBaseN :: Integral a => a -> b -> [b] -> [b]
+decToBaseN i zero digits = if base == 0
+ then []
+ else go i []
+ where
+ base = fromIntegral $ length digits
+ go 0 [] = [zero]
+ go 0 acc = acc
+ go curr acc =
+ let (q, r) = quotRem curr base
+ in go q ((digits !! fromIntegral r) : acc)
+
+integerToHexString :: Integer -> String
+integerToHexString i = decToBaseN i '0' validHexCharsLowerOnly
+
+integerToChoppedUp :: Integer -> [Integer]
+integerToChoppedUp i = go i []
+ where
+ go 0 [] = [0]
+ go 0 acc = acc
+ go curr acc =
+ let (q, r) = quotRem curr (2 ^ 16)
+ in go q (r : acc)
+
+ipV4ToIpV6Normed :: IPAddress -> IPV6Normed
+ipV4ToIpV6Normed (IPAddress word) = normed
+ where
+ asInteger = toInteger word
+ chopped = integerToChoppedUp asInteger
+ ss = fmap integerToHexString chopped
+ fillCnt = 8 - length ss - 1
+ -- - ffff signifies an ip4 to ip6 conversion
+ -- (http://www.tcpipguide.com/free/t_IPv6IPv4AddressEmbedding-2.htm)
+ filled = (take fillCnt $ repeat "0") ++ ["ffff"] ++ ss
+ s = Control.Monad.join $ intersperse ":" filled
+ normed = IPV6Normed s
+
+ipV4ToIpV6 :: IPAddress -> IPAddress6
+ipV4ToIpV6 ip = ipv6
+ where
+ normed = ipV4ToIpV6Normed ip
+ ipv6 = ipv6NormedToIPAddress6 normed
diff --git a/Haskell-book/24/ParserExercises/src/LogParser.hs b/Haskell-book/24/ParserExercises/src/LogParser.hs
new file mode 100644
index 0000000..480498d
--- /dev/null
+++ b/Haskell-book/24/ParserExercises/src/LogParser.hs
@@ -0,0 +1,83 @@
+module LogParser where
+
+import Control.Applicative
+import Data.Time
+import Test.QuickCheck
+import Text.Trifecta
+
+-- 5. Write a parser for a log file format and sum the time spent in
+-- each activity. Additionally, provide an alternative aggregation
+-- of the data that provides average time spent per activity per day.
+-- The format supports the use of comments which your parser
+-- will have to ignore. The # characters followed by a date mark
+-- the beginning of a particular day.
+--
+-- You are to derive a reasonable datatype for representing this
+-- data yourself. For bonus points, make this bi-directional by
+-- making a Show representation for the datatype which matches
+-- the format you are parsing. Then write a generator for this data
+-- using QuickCheck’s Gen and see if you can break your parser
+-- with QuickCheck.
+
+data Statement = Statement TimeOfDay String deriving Eq
+data LogEntry = LogEntry Day [Statement] deriving Eq
+newtype Log = Log { getLog :: [LogEntry] }
+
+instance Show Statement where
+ show (Statement x y) = (formatTime defaultTimeLocale "%R" x) ++ " " ++ y
+
+instance Show LogEntry where
+ show (LogEntry x y) = "# " ++ (show x) ++ "\n"
+ ++ (foldl (\acc v -> acc ++ (show v) ++ "\n") "" y)
+
+instance Show Log where
+ show (Log y) = (foldl (\acc v -> acc ++ (show v) ++ "\n") "" y)
+
+instance Arbitrary Statement where
+ arbitrary = do
+ h <- choose (0, 23)
+ m <- choose (0, 59)
+ text <- listOf1 $ elements ['.'..'~']
+ return $ Statement (TimeOfDay h m 0) text
+
+instance Arbitrary LogEntry where
+ arbitrary = do
+ day <- arbitrary
+ statements <- arbitrary
+ return $ LogEntry (ModifiedJulianDay day) statements
+
+skipEOL :: Parser ()
+skipEOL = skipMany (oneOf "\n")
+
+skipComments :: Parser ()
+skipComments =
+ skipMany (do _ <- char '-'
+ _ <- char '-'
+ skipMany (noneOf "\n")
+ skipEOL)
+
+parseStatement :: Parser Statement
+parseStatement = do
+ h <- integer
+ _ <- char ':'
+ m <- integer
+ text <- manyTill anyChar (try (string "--") <|> (string "\n") <|> (eof >> return ""))
+ skipComments
+ skipEOL
+ return $ Statement (TimeOfDay (fromIntegral h) (fromIntegral m) 0) text
+
+parseLogEntry :: Parser LogEntry
+parseLogEntry = do
+ _ <- string "# "
+ y <- integer
+ _ <- char '-'
+ m <- integer
+ _ <- char '-'
+ d <- integer
+ skipComments
+ skipEOL
+ statements <- many parseStatement
+ return $ LogEntry (fromGregorian (fromIntegral y) (fromIntegral m) (fromIntegral d)) statements
+
+parseLog :: Parser Log
+parseLog = Log <$> many parseLogEntry
diff --git a/Haskell-book/24/ParserExercises/src/PhoneNumber.hs b/Haskell-book/24/ParserExercises/src/PhoneNumber.hs
new file mode 100644
index 0000000..dad75a7
--- /dev/null
+++ b/Haskell-book/24/ParserExercises/src/PhoneNumber.hs
@@ -0,0 +1,28 @@
+module PhoneNumber where
+
+import Control.Applicative
+import Text.Trifecta
+
+-- 4. Write a parser for US/Canada phone numbers with varying formats.
+-- Cf. Wikipeida's article on "National conventions for writing telephone
+-- numbers". You are encouraged to adapt the exercise to your locality's
+-- conventions if they are not part of the NNAP scheme.
+
+-- aka area code
+type NumberingPlanArea = Int
+type Exchange = Int
+type LineNumber = Int
+
+data PhoneNumber = PhoneNumber NumberingPlanArea Exchange LineNumber
+ deriving (Eq, Show)
+
+parsePhone :: Parser PhoneNumber
+parsePhone = do
+ _ <- optional ((try $ char '(') <|> (try (char '1' >> char '-')))
+ area <- count 3 digit
+ _ <- optional $ (try $ char ')')
+ _ <- optional $ (try $ char ' ') <|> (try $ char '-')
+ exchange <- count 3 digit
+ _ <- optional $ (char ' ') <|> (char '-')
+ lineNumber <- decimal
+ return $ PhoneNumber (read area) (read exchange) (fromIntegral lineNumber)
diff --git a/Haskell-book/24/ParserExercises/src/SemVer.hs b/Haskell-book/24/ParserExercises/src/SemVer.hs
new file mode 100644
index 0000000..47b49ab
--- /dev/null
+++ b/Haskell-book/24/ParserExercises/src/SemVer.hs
@@ -0,0 +1,51 @@
+module SemVer where
+
+import Control.Applicative
+import Text.Trifecta
+
+-- 1. Write a parser for semantic versions as defined by http://semver.org/.
+-- After making a working parser, write an Ord instance for the SemVer type
+-- that obeys the specification outlined on the SemVer website.
+
+-- Relevant to precedence/ordering,
+-- cannot sort numbers like strings.
+data NumberOrString = NOSS String
+ | NOSI Integer
+ deriving (Eq, Show)
+
+type Major = Integer
+type Minor = Integer
+type Patch = Integer
+type Release = [NumberOrString]
+type Metadata = [NumberOrString]
+
+data SemVer = SemVer Major Minor Patch Release Metadata
+ deriving (Eq, Show)
+
+parseNos :: Parser NumberOrString
+parseNos = do
+ nos <- (NOSI <$> integer)
+ <|> (NOSS <$> some letter)
+ return nos
+
+parseRelease :: Parser [NumberOrString]
+parseRelease = do
+ _ <- char '-'
+ sepBy parseNos (char '.')
+
+parseMetadata :: Parser [NumberOrString]
+parseMetadata = do
+ _ <- char '+'
+ sepBy parseNos (char '.')
+
+parseSemVer :: Parser SemVer
+parseSemVer = do
+ major <- decimal
+ _ <- char '.'
+ minor <- decimal
+ _ <- char '.'
+ patch <- decimal
+ release <- option [] parseRelease
+ metadata <- option [] parseMetadata
+
+ return $ SemVer major minor patch release metadata