1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
|
{- This Source Code Form is subject to the terms of the Mozilla Public License,
v. 2.0. If a copy of the MPL was not distributed with this file, You can
obtain one at https://mozilla.org/MPL/2.0/. -}
{-# LANGUAGE EmptyDataDecls #-}
module Graphics.Fountainhead.PDF
( Dictionary(..)
, E5
, Header(..)
, Link(..)
, Name(..)
, Object(..)
, Sink
, TextString(..)
, Trailer(..)
, Type(..)
, UncoatedString(..)
, XRefSection(..)
, XRefEntry(..)
, arrayType
, headerToPdf
, dictionaryToPdf
, dictionaryType
, linkToPdf
, linkType
, nameToPdf
, nameType
, nextName
, objectToPdf
, sinkWithLength
, stringType
, textStringToPdf
, textType
, trailerToPdf
, typeToPdf
, uncoatedStringToPdf
, xrefEntryToPdf
, xrefSectionToPdf
, writeObject
) where
import Control.Monad.IO.Class (MonadIO(..))
import Control.Monad.Trans.State (StateT, get, gets, put)
import Data.ByteString (ByteString)
import qualified Data.ByteString as ByteString
import qualified Data.ByteString.Builder as ByteString.Builder
import qualified Data.ByteString.Builder as ByteString (Builder)
import qualified Data.ByteString.Lazy as ByteString.Lazy
import Data.Char (ord)
import Data.Fixed (Fixed(..), HasResolution(..), showFixed)
import Data.Vector (Vector)
import qualified Data.Vector as Vector
-- | The header in the first line of a PDF file contains a PDF version number
-- consisting of a major and a minor version.
data Header = Header Int Int
deriving (Eq, Show)
-- | See t'Header'.
headerToPdf :: Header -> ByteString.Builder
headerToPdf (Header major minor)
= ByteString.Builder.string7 "%PDF-"
<> ByteString.Builder.intDec major
<> ByteString.Builder.char7 '.'
<> ByteString.Builder.intDec minor
<> ByteString.Builder.char7 '\n'
-- | A name object is an atomic symbol uniquely defined by a sequence of
-- characters.
newtype Name = Name String
deriving (Eq, Show)
-- | See t'Name'.
nameToPdf :: Name -> ByteString.Builder
nameToPdf (Name name) = ByteString.Builder.char7 '/'
<> ByteString.Builder.string7 name
-- | A dictionary object is an associative table containing pairs of objects.
newtype Dictionary = Dictionary (Vector (Name, Type))
-- | See t'Dictionary'.
dictionaryToPdf :: Dictionary -> IO ByteString.Builder
dictionaryToPdf (Dictionary valuePairs) = do
pairs <- traverse pairToPdf valuePairs
pure
$ ByteString.Builder.string7 "<<"
<> unwordBuilder pairs
<> ByteString.Builder.string7 ">>"
where
pairToPdf (name, value) = do
value' <- typeToPdf value
pure $ nameToPdf name <> ByteString.Builder.char7 ' ' <> value'
-- | Hexadecimal data.
newtype UncoatedString = UncoatedString String
deriving (Eq, Show)
-- | See t'UncoatedString'.
uncoatedStringToPdf :: UncoatedString -> ByteString.Builder
uncoatedStringToPdf (UncoatedString uncoatedString)
= ByteString.Builder.char7 '<'
<> ByteString.Builder.string8 uncoatedString
<> ByteString.Builder.char7 '>'
-- | A sequence of literal characters.
newtype TextString = TextString String
deriving (Eq, Show)
-- | See t'TextString'.
textStringToPdf :: TextString -> ByteString.Builder
textStringToPdf (TextString textString)
= ByteString.Builder.char7 '('
<> ByteString.Builder.stringUtf8 textString
<> ByteString.Builder.char7 ')'
-- Resolution of 10^-5 = .001.
data E5
instance HasResolution E5
where
resolution _ = 100000
-- | Reference to an inderect object, consisting of the object name and
-- revision.
data Link = Link Int Int
deriving (Eq, Show)
-- | See t'Link'.
linkToPdf :: Link -> ByteString.Builder
linkToPdf (Link name revision)
= ByteString.Builder.intDec name
<> ByteString.Builder.char7 ' '
<> ByteString.Builder.intDec revision
<> ByteString.Builder.string8 " R"
-- | Basic types of object.
data Type
= DictionaryType Dictionary
| ArrayType (Vector Type)
| LinkType Link
| NameType Name
| IntegerType Int
| RealType (Fixed E5)
| StreamType Dictionary (IO ByteString)
| StringType UncoatedString
| TextType TextString
| NullType
-- | See t'Type'.
typeToPdf :: Type -> IO ByteString.Builder
typeToPdf (DictionaryType dictionary) = dictionaryToPdf dictionary
typeToPdf (ArrayType values) = do
converted <- traverse typeToPdf values
pure
$ ByteString.Builder.char7 '['
<> unwordBuilder converted
<> ByteString.Builder.char7 ']'
typeToPdf (LinkType link) = pure $ linkToPdf link
typeToPdf (NameType name) = pure $ nameToPdf name
typeToPdf (IntegerType pdfInteger) = pure $ ByteString.Builder.intDec pdfInteger
typeToPdf (StreamType dictionary producer) = do
streamContents <- producer
producedDictionary <- dictionaryToPdf dictionary
pure
$ producedDictionary
<> ByteString.Builder.string8 "\nstream\n"
<> ByteString.Builder.byteString streamContents
<> ByteString.Builder.string8 "\nendstream"
typeToPdf (StringType string) = pure $ uncoatedStringToPdf string
typeToPdf (TextType text) = pure $ textStringToPdf text
typeToPdf (RealType realType) =
pure $ ByteString.Builder.string7 $ showFixed True realType
typeToPdf NullType = pure $ ByteString.Builder.string7 "null"
-- | Object number, generation number and object contents.
data Object = Object Int Int Type
-- | See t'Object'.
objectToPdf :: Object -> IO ByteString.Builder
objectToPdf (Object name revision type') = do
producedType <- typeToPdf type'
pure $ ByteString.Builder.intDec name
<> ByteString.Builder.char7 ' '
<> ByteString.Builder.intDec revision
<> ByteString.Builder.string7 " obj\n"
<> producedType
<> ByteString.Builder.string7 "\nendobj\n"
-- | Shortcut to create a t'Dictionary' type.
dictionaryType :: [(Name, Type)] -> Type
dictionaryType = DictionaryType . Dictionary . Vector.fromList
-- | Shortcut to create an t'Array' type.
arrayType :: [Type] -> Type
arrayType = ArrayType . Vector.fromList
-- | Shortcut to create a t'Name' type.
nameType :: String -> Type
nameType = NameType . Name
-- | Shortcut to create a t'UncoatedString' type.
stringType :: String -> Type
stringType = StringType . UncoatedString
-- | Shortcut to create a t'TextString' type.
textType :: String -> Type
textType = TextType . TextString
-- | Shortcut to create a t'Link' type.
linkType :: Int -> Int -> Type
linkType name revision = LinkType $ Link name revision
-- | Byte offset of an object in the file, generation number and whether this is
-- an in-use entry.
data XRefEntry = XRefEntry Int Int Bool
deriving (Eq, Show)
-- | See t'XRefEntry'.
xrefEntryToPdf :: XRefEntry -> ByteString.Builder
xrefEntryToPdf (XRefEntry offset generation True)
= pad 10 offset
<> ByteString.Builder.char7 ' '
<> pad 5 generation
<> ByteString.Builder.string7 " n"
xrefEntryToPdf (XRefEntry offset generation False)
= pad 10 offset
<> ByteString.Builder.char7 ' '
<> pad 5 generation
<> ByteString.Builder.string7 " f"
-- | Cross-reference table containing information about the indirect objects in
-- the file.
newtype XRefSection = XRefSection
{ unXRefSection :: Vector XRefEntry
} deriving (Eq, Show)
-- | See t'XRefSection'.
xrefSectionToPdf :: XRefSection -> ByteString.Builder
xrefSectionToPdf (XRefSection entries)
= ByteString.Builder.string7 "xref\n0 "
<> ByteString.Builder.intDec (length entries)
<> newline
<> Vector.foldMap (newline <>) (xrefEntryToPdf <$> entries)
<> newline
instance Semigroup XRefSection
where
(XRefSection lhs) <> (XRefSection rhs) = XRefSection $ lhs <> rhs
instance Monoid XRefSection
where
mempty = XRefSection mempty
-- | A trailer giving the location of the cross-reference table and of certain
-- special objects within the body of the file.
data Trailer = Trailer Dictionary Int
-- | See t'Trailer'.
trailerToPdf :: Trailer -> IO ByteString.Builder
trailerToPdf (Trailer dictionary startxref) = do
producedDictionary <- dictionaryToPdf dictionary
pure $ ByteString.Builder.string7 "trailer "
<> producedDictionary
<> ByteString.Builder.string7 "\nstartxref\n"
<> ByteString.Builder.intDec startxref
<> ByteString.Builder.string7 "\n%%EOF\n"
pad :: Int -> Int -> ByteString.Builder
pad length' number =
let asString = ByteString.Builder.intDec number
numberLength = builderLength asString
padding = ByteString.Builder.byteString
$ ByteString.replicate (length' - numberLength) zero
in padding <> asString
where
zero = fromIntegral $ ord '0'
builderLength = fromIntegral
. ByteString.Lazy.length
. ByteString.Builder.toLazyByteString
unwordBuilder :: Vector ByteString.Builder -> ByteString.Builder
unwordBuilder = Vector.foldMap (ByteString.Builder.char7 ' ' <>)
newline :: ByteString.Builder
newline = ByteString.Builder.char7 '\n'
type Sink = ByteString.Lazy.ByteString -> IO ()
-- | Creates a new object using the provided value, writes the object to the
-- sink, and returns a reference to that object.
--
-- For example if the passed value is a dictionary, the created object could be
-- @
-- 2 0 obj <<…>> endobj
-- @
-- where "2 0" identifies the object. The name (2) is generated using the state,
-- the revision is always 0.
writeObject :: Sink -> Type -> StateT (Int, XRefSection) IO Link
writeObject sink object = do
(previousLength, XRefSection refs) <- get
let objectName = length refs
objectContents <- liftIO $ objectToPdf $ Object objectName 0 object
writtenLength <- liftIO $ sinkWithLength sink objectContents
put (previousLength + writtenLength, XRefSection $ Vector.snoc refs (XRefEntry previousLength 0 True))
pure $ Link objectName 0
-- | Gets the name of the object which will be generated next.
nextName :: StateT (Int, XRefSection) IO Int
nextName = gets (length . unXRefSection . snd)
-- | Writes the data into the sink and returns the number of the written bytes.
sinkWithLength :: Sink -> ByteString.Builder -> IO Int
sinkWithLength sink data' =
let lazyData = ByteString.Builder.toLazyByteString data'
in sink lazyData >> pure (fromIntegral $ ByteString.Lazy.length lazyData)
|