Decompress defalte compressed fonts

This commit is contained in:
Eugen Wissner 2023-12-27 16:19:21 +01:00
parent a841f138fc
commit 16d9fc384f
Signed by: belka
GPG Key ID: A27FDC1E8EE902C0
6 changed files with 70 additions and 351 deletions

View File

@ -2,44 +2,25 @@ module Main
( main
) where
import Control.Monad (foldM_)
import Data.Int (Int64)
import qualified Data.ByteString as ByteString
import qualified Data.ByteString.Char8 as Char8
import Data.ByteString (ByteString)
import Data.Bifunctor (Bifunctor(..))
import qualified Text.Megaparsec as Megaparsec
import Data.Foldable (find)
import qualified Data.Text.Lazy.Builder as Text.Builder
import qualified Data.Text.Lazy.Builder.Int as Text.Builder
import qualified Data.Text.Lazy as Text.Lazy
import qualified Data.Text.Lazy.IO as Text.Lazy
import qualified Data.Text.Encoding as Text
import GHC.Records (HasField(..))
import Graphics.Fountainhead.Dumper (DumpError(..), dumpTrueType)
import Graphics.Fountainhead (parseFontDirectoryFromFile)
import Graphics.Fountainhead.Dumper (DumpError(..), dumpTables)
-- TODO: kern table since format 1.
-- For details on subtable format see examples in TrueType reference.
import Graphics.Fountainhead.Parser
( fontDirectoryP
, os2TableP
, parseTable
, shortLocaTableP
)
import Graphics.Fountainhead.TrueType
( FontDirectory(..)
, OffsetSubtable(..)
, TableDirectory(..)
)
import System.Environment (getArgs)
import System.Exit (exitWith)
import GHC.IO.Exception (ExitCode(..))
fontMain :: FilePath -> IO ()
fontMain fontFile = do
putStrLn $ "Dumping File:" <> fontFile <> "\n\n"
putStrLn ("Dumping File:" <> fontFile <> "\n\n")
ttfContents <- ByteString.readFile fontFile
case dumpTrueType ttfContents fontFile of
(processedState, initialResult) <- parseFontDirectoryFromFile fontFile
case first DumpParseError initialResult >>= dumpTables processedState of
Right fontDump -> Text.Lazy.putStrLn $ Text.Builder.toLazyText fontDump
Left e
| DumpParseError bundle <- e -> putStr

View File

@ -21,21 +21,22 @@ extra-source-files:
common dependencies
build-depends:
text ^>= 2.0
bytestring ^>= 0.11.0,
text ^>= 2.0,
zlib ^>= 0.6.3
default-language: Haskell2010
library
import: dependencies
exposed-modules:
Graphics.Fountainhead
Graphics.Fountainhead.Dumper
Graphics.Fountainhead.Parser
Graphics.Fountainhead.PDF
Graphics.Fountainhead.Type
Graphics.Fountainhead.TrueType
hs-source-dirs:
src
hs-source-dirs: src
build-depends:
base >= 4.16 && < 5,
bytestring ^>= 0.11.0,
containers ^>= 0.6.5,
megaparsec ^>= 9.3,
time ^>= 1.12,
@ -53,7 +54,6 @@ executable fountainhead
ExplicitForAll
build-depends:
base,
bytestring,
containers,
parser-combinators,
vector,
@ -62,4 +62,4 @@ executable fountainhead
megaparsec,
fountainhead
hs-source-dirs: app
default-language: Haskell2010
ghc-options: -Wall

View File

@ -0,0 +1,50 @@
{- This Source Code Form is subject to the terms of the Mozilla Public License,
v. 2.0. If a copy of the MPL was not distributed with this file, You can
obtain one at https://mozilla.org/MPL/2.0/. -}
module Graphics.Fountainhead
( parseFontDirectoryFromFile
) where
import qualified Codec.Compression.Zlib as Zlib
import Data.ByteString (ByteString)
import qualified Data.ByteString as ByteString
import qualified Data.ByteString.Lazy as ByteString.Lazy
import Data.Void (Void)
import Graphics.Fountainhead.Parser (ParseErrorBundle, fontDirectoryP)
import Graphics.Fountainhead.TrueType (FontDirectory(..))
import qualified Text.Megaparsec as Megaparsec
import Text.Megaparsec (PosState(..), State(..))
import System.IO (IOMode(..), SeekMode(..), hFileSize, hSeek, withBinaryFile)
parseFontDirectoryFromFile :: String
-> IO (State ByteString Void, Either ParseErrorBundle FontDirectory)
parseFontDirectoryFromFile fontFile =
withBinaryFile fontFile ReadMode withFontHandle
where
withFontHandle fontHandle = doParsing
<$> readFontContents fontHandle
doParsing ttfContents =
let initialState = Megaparsec.State
{ stateInput = ttfContents
, stateOffset = 0
, statePosState = Megaparsec.PosState
{ pstateInput = ttfContents
, pstateOffset = 0
, pstateSourcePos = Megaparsec.initialPos fontFile
, pstateTabWidth = Megaparsec.defaultTabWidth
, pstateLinePrefix = ""
}
, stateParseErrors = []
}
in Megaparsec.runParser' fontDirectoryP initialState
readFontContents fontHandle = do
firstBytes <- ByteString.unpack <$> ByteString.hGet fontHandle 2
hSeek fontHandle AbsoluteSeek 0
fileSize <- fromIntegral <$> hFileSize fontHandle
case firstBytes of
0x78 : [secondByte]
| secondByte `elem` [0x01, 0x9c, 0x5e, 0xda] ->
ByteString.Lazy.toStrict . Zlib.decompress
<$> ByteString.Lazy.hGet fontHandle fileSize
_ -> ByteString.hGetContents fontHandle

View File

@ -22,6 +22,7 @@ module Graphics.Fountainhead.Dumper
, dumpMaxp
, dumpOs2
, dumpPost
, dumpTables
, dumpTrueType
, dumpOffsetTable
) where
@ -91,7 +92,8 @@ import Graphics.Fountainhead.Parser
, maxpTableP
, nameTableP
, os2TableP
, postTableP, cvTableP
, postTableP
, cvTableP
)
import Graphics.Fountainhead.Type (Fixed32(..), ttfEpoch)
import Data.Foldable (Foldable(..), find)

View File

@ -1,316 +0,0 @@
{- This Source Code Form is subject to the terms of the Mozilla Public License,
v. 2.0. If a copy of the MPL was not distributed with this file, You can
obtain one at https://mozilla.org/MPL/2.0/. -}
{-# LANGUAGE EmptyDataDecls #-}
module Graphics.Fountainhead.PDF
( Dictionary(..)
, E5
, Header(..)
, Link(..)
, Name(..)
, Object(..)
, Sink
, TextString(..)
, Trailer(..)
, Type(..)
, UncoatedString(..)
, XRefSection(..)
, XRefEntry(..)
, arrayType
, headerToPdf
, dictionaryToPdf
, dictionaryType
, linkToPdf
, linkType
, nameToPdf
, nameType
, nextName
, objectToPdf
, sinkWithLength
, stringType
, textStringToPdf
, textType
, trailerToPdf
, typeToPdf
, uncoatedStringToPdf
, xrefEntryToPdf
, xrefSectionToPdf
, writeObject
) where
import Control.Monad.IO.Class (MonadIO(..))
import Control.Monad.Trans.State (StateT, get, gets, put)
import Data.ByteString (ByteString)
import qualified Data.ByteString as ByteString
import qualified Data.ByteString.Builder as ByteString.Builder
import qualified Data.ByteString.Builder as ByteString (Builder)
import qualified Data.ByteString.Lazy as ByteString.Lazy
import Data.Char (ord)
import Data.Fixed (Fixed(..), HasResolution(..), showFixed)
import Data.Vector (Vector)
import qualified Data.Vector as Vector
-- | The header in the first line of a PDF file contains a PDF version number
-- consisting of a major and a minor version.
data Header = Header Int Int
deriving (Eq, Show)
-- | See t'Header'.
headerToPdf :: Header -> ByteString.Builder
headerToPdf (Header major minor)
= ByteString.Builder.string7 "%PDF-"
<> ByteString.Builder.intDec major
<> ByteString.Builder.char7 '.'
<> ByteString.Builder.intDec minor
<> ByteString.Builder.char7 '\n'
-- | A name object is an atomic symbol uniquely defined by a sequence of
-- characters.
newtype Name = Name String
deriving (Eq, Show)
-- | See t'Name'.
nameToPdf :: Name -> ByteString.Builder
nameToPdf (Name name) = ByteString.Builder.char7 '/'
<> ByteString.Builder.string7 name
-- | A dictionary object is an associative table containing pairs of objects.
newtype Dictionary = Dictionary (Vector (Name, Type))
-- | See t'Dictionary'.
dictionaryToPdf :: Dictionary -> IO ByteString.Builder
dictionaryToPdf (Dictionary valuePairs) = do
pairs <- traverse pairToPdf valuePairs
pure
$ ByteString.Builder.string7 "<<"
<> unwordBuilder pairs
<> ByteString.Builder.string7 ">>"
where
pairToPdf (name, value) = do
value' <- typeToPdf value
pure $ nameToPdf name <> ByteString.Builder.char7 ' ' <> value'
-- | Hexadecimal data.
newtype UncoatedString = UncoatedString String
deriving (Eq, Show)
-- | See t'UncoatedString'.
uncoatedStringToPdf :: UncoatedString -> ByteString.Builder
uncoatedStringToPdf (UncoatedString uncoatedString)
= ByteString.Builder.char7 '<'
<> ByteString.Builder.string8 uncoatedString
<> ByteString.Builder.char7 '>'
-- | A sequence of literal characters.
newtype TextString = TextString String
deriving (Eq, Show)
-- | See t'TextString'.
textStringToPdf :: TextString -> ByteString.Builder
textStringToPdf (TextString textString)
= ByteString.Builder.char7 '('
<> ByteString.Builder.stringUtf8 textString
<> ByteString.Builder.char7 ')'
-- Resolution of 10^-5 = .001.
data E5
instance HasResolution E5
where
resolution _ = 100000
-- | Reference to an inderect object, consisting of the object name and
-- revision.
data Link = Link Int Int
deriving (Eq, Show)
-- | See t'Link'.
linkToPdf :: Link -> ByteString.Builder
linkToPdf (Link name revision)
= ByteString.Builder.intDec name
<> ByteString.Builder.char7 ' '
<> ByteString.Builder.intDec revision
<> ByteString.Builder.string8 " R"
-- | Basic types of object.
data Type
= DictionaryType Dictionary
| ArrayType (Vector Type)
| LinkType Link
| NameType Name
| IntegerType Int
| RealType (Fixed E5)
| StreamType Dictionary (IO ByteString)
| StringType UncoatedString
| TextType TextString
| NullType
-- | See t'Type'.
typeToPdf :: Type -> IO ByteString.Builder
typeToPdf (DictionaryType dictionary) = dictionaryToPdf dictionary
typeToPdf (ArrayType values) = do
converted <- traverse typeToPdf values
pure
$ ByteString.Builder.char7 '['
<> unwordBuilder converted
<> ByteString.Builder.char7 ']'
typeToPdf (LinkType link) = pure $ linkToPdf link
typeToPdf (NameType name) = pure $ nameToPdf name
typeToPdf (IntegerType pdfInteger) = pure $ ByteString.Builder.intDec pdfInteger
typeToPdf (StreamType dictionary producer) = do
streamContents <- producer
producedDictionary <- dictionaryToPdf dictionary
pure
$ producedDictionary
<> ByteString.Builder.string8 "\nstream\n"
<> ByteString.Builder.byteString streamContents
<> ByteString.Builder.string8 "\nendstream"
typeToPdf (StringType string) = pure $ uncoatedStringToPdf string
typeToPdf (TextType text) = pure $ textStringToPdf text
typeToPdf (RealType realType) =
pure $ ByteString.Builder.string7 $ showFixed True realType
typeToPdf NullType = pure $ ByteString.Builder.string7 "null"
-- | Object number, generation number and object contents.
data Object = Object Int Int Type
-- | See t'Object'.
objectToPdf :: Object -> IO ByteString.Builder
objectToPdf (Object name revision type') = do
producedType <- typeToPdf type'
pure $ ByteString.Builder.intDec name
<> ByteString.Builder.char7 ' '
<> ByteString.Builder.intDec revision
<> ByteString.Builder.string7 " obj\n"
<> producedType
<> ByteString.Builder.string7 "\nendobj\n"
-- | Shortcut to create a t'Dictionary' type.
dictionaryType :: [(Name, Type)] -> Type
dictionaryType = DictionaryType . Dictionary . Vector.fromList
-- | Shortcut to create an t'Array' type.
arrayType :: [Type] -> Type
arrayType = ArrayType . Vector.fromList
-- | Shortcut to create a t'Name' type.
nameType :: String -> Type
nameType = NameType . Name
-- | Shortcut to create a t'UncoatedString' type.
stringType :: String -> Type
stringType = StringType . UncoatedString
-- | Shortcut to create a t'TextString' type.
textType :: String -> Type
textType = TextType . TextString
-- | Shortcut to create a t'Link' type.
linkType :: Int -> Int -> Type
linkType name revision = LinkType $ Link name revision
-- | Byte offset of an object in the file, generation number and whether this is
-- an in-use entry.
data XRefEntry = XRefEntry Int Int Bool
deriving (Eq, Show)
-- | See t'XRefEntry'.
xrefEntryToPdf :: XRefEntry -> ByteString.Builder
xrefEntryToPdf (XRefEntry offset generation True)
= pad 10 offset
<> ByteString.Builder.char7 ' '
<> pad 5 generation
<> ByteString.Builder.string7 " n"
xrefEntryToPdf (XRefEntry offset generation False)
= pad 10 offset
<> ByteString.Builder.char7 ' '
<> pad 5 generation
<> ByteString.Builder.string7 " f"
-- | Cross-reference table containing information about the indirect objects in
-- the file.
newtype XRefSection = XRefSection
{ unXRefSection :: Vector XRefEntry
} deriving (Eq, Show)
-- | See t'XRefSection'.
xrefSectionToPdf :: XRefSection -> ByteString.Builder
xrefSectionToPdf (XRefSection entries)
= ByteString.Builder.string7 "xref\n0 "
<> ByteString.Builder.intDec (length entries)
<> newline
<> Vector.foldMap (newline <>) (xrefEntryToPdf <$> entries)
<> newline
instance Semigroup XRefSection
where
(XRefSection lhs) <> (XRefSection rhs) = XRefSection $ lhs <> rhs
instance Monoid XRefSection
where
mempty = XRefSection mempty
-- | A trailer giving the location of the cross-reference table and of certain
-- special objects within the body of the file.
data Trailer = Trailer Dictionary Int
-- | See t'Trailer'.
trailerToPdf :: Trailer -> IO ByteString.Builder
trailerToPdf (Trailer dictionary startxref) = do
producedDictionary <- dictionaryToPdf dictionary
pure $ ByteString.Builder.string7 "trailer "
<> producedDictionary
<> ByteString.Builder.string7 "\nstartxref\n"
<> ByteString.Builder.intDec startxref
<> ByteString.Builder.string7 "\n%%EOF\n"
pad :: Int -> Int -> ByteString.Builder
pad length' number =
let asString = ByteString.Builder.intDec number
numberLength = builderLength asString
padding = ByteString.Builder.byteString
$ ByteString.replicate (length' - numberLength) zero
in padding <> asString
where
zero = fromIntegral $ ord '0'
builderLength = fromIntegral
. ByteString.Lazy.length
. ByteString.Builder.toLazyByteString
unwordBuilder :: Vector ByteString.Builder -> ByteString.Builder
unwordBuilder = Vector.foldMap (ByteString.Builder.char7 ' ' <>)
newline :: ByteString.Builder
newline = ByteString.Builder.char7 '\n'
type Sink = ByteString.Lazy.ByteString -> IO ()
-- | Creates a new object using the provided value, writes the object to the
-- sink, and returns a reference to that object.
--
-- For example if the passed value is a dictionary, the created object could be
-- @
-- 2 0 obj <<…>> endobj
-- @
-- where "2 0" identifies the object. The name (2) is generated using the state,
-- the revision is always 0.
writeObject :: Sink -> Type -> StateT (Int, XRefSection) IO Link
writeObject sink object = do
(previousLength, XRefSection refs) <- get
let objectName = length refs
objectContents <- liftIO $ objectToPdf $ Object objectName 0 object
writtenLength <- liftIO $ sinkWithLength sink objectContents
put (previousLength + writtenLength, XRefSection $ Vector.snoc refs (XRefEntry previousLength 0 True))
pure $ Link objectName 0
-- | Gets the name of the object which will be generated next.
nextName :: StateT (Int, XRefSection) IO Int
nextName = gets (length . unXRefSection . snd)
-- | Writes the data into the sink and returns the number of the written bytes.
sinkWithLength :: Sink -> ByteString.Builder -> IO Int
sinkWithLength sink data' =
let lazyData = ByteString.Builder.toLazyByteString data'
in sink lazyData >> pure (fromIntegral $ ByteString.Lazy.length lazyData)

View File

@ -12,6 +12,7 @@
-- | Font parser.
module Graphics.Fountainhead.Parser
( Parser
, ParseErrorBundle
, cmapTableP
, cvTableP
, f2Dot14P
@ -147,6 +148,7 @@ import qualified Text.Megaparsec as Megaparsec
import qualified Text.Megaparsec.Byte.Binary as Megaparsec.Binary
type Parser = Megaparsec.Parsec Void ByteString
type ParseErrorBundle = Megaparsec.ParseErrorBundle ByteString Void
-- * Font directory
@ -942,7 +944,7 @@ parseTable
:: TableDirectory
-> Parser a
-> Megaparsec.State ByteString Void
-> Either (Megaparsec.ParseErrorBundle ByteString Void) a
-> Either ParseErrorBundle a
parseTable TableDirectory{ offset, length = length' } parser state = snd
$ Megaparsec.runParser' parser
$ state