summaryrefslogtreecommitdiffstats
path: root/pydyf/__init__.py
diff options
context:
space:
mode:
Diffstat (limited to 'pydyf/__init__.py')
-rwxr-xr-xpydyf/__init__.py298
1 files changed, 223 insertions, 75 deletions
diff --git a/pydyf/__init__.py b/pydyf/__init__.py
index 05dccf6..86d321d 100755
--- a/pydyf/__init__.py
+++ b/pydyf/__init__.py
@@ -3,26 +3,28 @@ A low-level PDF generator.
"""
+import base64
import re
import zlib
from codecs import BOM_UTF16_BE
+from hashlib import md5
+from math import ceil, log
+from warnings import warn
-VERSION = __version__ = '0.1.2'
+VERSION = __version__ = '0.10.0'
def _to_bytes(item):
"""Convert item to bytes."""
if isinstance(item, bytes):
return item
- elif isinstance(item, Object):
- return item.data
elif isinstance(item, float):
if item.is_integer():
- return f'{int(item):d}'.encode('ascii')
+ return str(int(item)).encode('ascii')
else:
- return f'{item:f}'.encode('ascii')
- elif isinstance(item, int):
- return f'{item:d}'.encode('ascii')
+ return f'{item:f}'.rstrip('0').encode('ascii')
+ elif isinstance(item, Object):
+ return item.data
return str(item).encode('ascii')
@@ -42,51 +44,41 @@ class Object:
@property
def indirect(self):
"""Indirect representation of an object."""
- return b'\n'.join((
- str(self.number).encode() + b' ' +
- str(self.generation).encode() + b' obj',
- self.data,
- b'endobj',
- ))
+ header = f'{self.number} {self.generation} obj\n'.encode()
+ return header + self.data + b'\nendobj'
@property
def reference(self):
"""Object identifier."""
- return (
- str(self.number).encode() + b' ' +
- str(self.generation).encode() + b' R')
+ return f'{self.number} {self.generation} R'.encode()
@property
def data(self):
"""Data contained in the object. Shall be defined in each subclass."""
raise NotImplementedError()
+ @property
+ def compressible(self):
+ """Whether the object can be included in an object stream."""
+ return not self.generation and not isinstance(self, Stream)
-class Dictionary(Object, dict):
- """PDF Dictionary object.
-
- Inherits from :class:`Object` and Python :obj:`dict`.
- """
+class Dictionary(Object, dict):
+ """PDF Dictionary object."""
def __init__(self, values=None):
Object.__init__(self)
dict.__init__(self, values or {})
@property
def data(self):
- result = [b'<<']
- for key, value in self.items():
- result.append(b'/' + _to_bytes(key) + b' ' + _to_bytes(value))
- result.append(b'>>')
- return b'\n'.join(result)
+ result = [
+ b'/' + _to_bytes(key) + b' ' + _to_bytes(value)
+ for key, value in self.items()]
+ return b'<<' + b''.join(result) + b'>>'
class Stream(Object):
- """PDF Stream object.
-
- Inherits from :class:`Object`.
-
- """
+ """PDF Stream object."""
def __init__(self, stream=None, extra=None, compress=False):
super().__init__()
#: Python array of data composing stream.
@@ -96,6 +88,15 @@ class Stream(Object):
#: Compress the stream data if set to ``True``. Default is ``False``.
self.compress = compress
+ def begin_marked_content(self, tag, property_list=None):
+ """Begin marked-content sequence."""
+ self.stream.append(f'/{tag}')
+ if property_list is None:
+ self.stream.append(b'BMC')
+ else:
+ self.stream.append(property_list)
+ self.stream.append(b'BDC')
+
def begin_text(self):
"""Begin a text object."""
self.stream.append(b'BT')
@@ -171,6 +172,10 @@ class Stream(Object):
"""End path without filling or stroking."""
self.stream.append(b'n')
+ def end_marked_content(self):
+ """End marked-content sequence."""
+ self.stream.append(b'EMC')
+
def end_text(self):
"""End text object."""
self.stream.append(b'ET')
@@ -199,6 +204,37 @@ class Stream(Object):
"""
self.stream.append(b'b*' if even_odd else b'b')
+ def inline_image(self, width, height, color_space, bpc, raw_data):
+ """Add an inline image.
+
+ :param width: The width of the image.
+ :type width: :obj:`int`
+ :param height: The height of the image.
+ :type height: :obj:`int`
+ :param colorspace: The color space of the image, f.e. RGB, Gray.
+ :type colorspace: :obj:`str`
+ :param bpc: The bits per component. 1 for BW, 8 for grayscale.
+ :type bpc: :obj:`int`
+ :param raw_data: The raw pixel data.
+
+ """
+ data = zlib.compress(raw_data) if self.compress else raw_data
+ a85_data = base64.a85encode(data) + b'~>'
+ self.stream.append(b' '.join((
+ b'BI',
+ b'/W', _to_bytes(width),
+ b'/H', _to_bytes(height),
+ b'/BPC', _to_bytes(bpc),
+ b'/CS',
+ b'/Device' + _to_bytes(color_space),
+ b'/F',
+ b'[/A85 /Fl]' if self.compress else b'/A85',
+ b'/L', _to_bytes(len(a85_data)),
+ b'ID',
+ a85_data,
+ b'EI',
+ )))
+
def line_to(self, x, y):
"""Add line from current point to point ``(x, y)``."""
self.stream.append(b' '.join((_to_bytes(x), _to_bytes(y), b'l')))
@@ -207,6 +243,10 @@ class Stream(Object):
"""Begin new subpath by moving current point to ``(x, y)``."""
self.stream.append(b' '.join((_to_bytes(x), _to_bytes(y), b'm')))
+ def move_text_to(self, x, y):
+ """Move text to next line at ``(x, y)`` distance from previous line."""
+ self.stream.append(b' '.join((_to_bytes(x), _to_bytes(y), b'Td')))
+
def shading(self, name):
"""Paint shape and color shading using shading dictionary ``name``."""
self.stream.append(b'/' + _to_bytes(name) + b' sh')
@@ -271,6 +311,10 @@ class Stream(Object):
"""Set text rendering mode."""
self.stream.append(_to_bytes(mode) + b' Tr')
+ def set_text_rise(self, height):
+ """Set text rise."""
+ self.stream.append(_to_bytes(height) + b' Ts')
+
def set_line_cap(self, line_cap):
"""Set line cap style."""
self.stream.append(_to_bytes(line_cap) + b' J')
@@ -296,9 +340,13 @@ class Stream(Object):
self.stream.append(b'/' + _to_bytes(state_name) + b' gs')
def show_text(self, text):
- """Show text."""
+ """Show text strings with individual glyph positioning."""
self.stream.append(b'[' + _to_bytes(text) + b'] TJ')
+ def show_text_string(self, text):
+ """Show single text string."""
+ self.stream.append(String(text).data + b' Tj')
+
def stroke(self):
"""Stroke path."""
self.stream.append(b'S')
@@ -355,7 +403,7 @@ class Stream(Object):
extra = Dictionary(self.extra.copy())
if self.compress:
extra['Filter'] = '/FlateDecode'
- compressobj = zlib.compressobj()
+ compressobj = zlib.compressobj(level=9)
stream = compressobj.compress(stream)
stream += compressobj.flush()
extra['Length'] = len(stream)
@@ -363,11 +411,7 @@ class Stream(Object):
class String(Object):
- """PDF String object.
-
- Inherits from :class:`Object`.
-
- """
+ """PDF String object."""
def __init__(self, string=''):
super().__init__()
#: Unicode string.
@@ -388,27 +432,29 @@ class String(Object):
class Array(Object, list):
- """PDF Array object.
-
- Inherits from :class:`Object` and Python :obj:`list`.
-
- """
+ """PDF Array object."""
def __init__(self, array=None):
Object.__init__(self)
list.__init__(self, array or [])
@property
def data(self):
- result = [b'[']
- for child in self:
- result.append(_to_bytes(child))
- result.append(b']')
- return b' '.join(result)
+ return b'[' + b' '.join(_to_bytes(child) for child in self) + b']'
class PDF:
"""PDF document."""
- def __init__(self):
+ def __init__(self, version=None, identifier=None):
+ """Create a PDF document."""
+ if version or identifier: # to be removed in next version
+ warn(
+ "PDF objects don’t take version or identifier during initialization "
+ "anymore. These properties are now stored but ignored, and will be "
+ "removed and rejected in next version of pydyf. Please pass these "
+ "properties to the PDF.write() method instead.", DeprecationWarning)
+ self.version = _to_bytes(version) if version else b'1.7' # to be removed
+ self.identifier = identifier # to be removed
+
#: Python :obj:`list` containing the PDF’s objects.
self.objects = []
@@ -425,7 +471,7 @@ class PDF:
})
self.add_object(self.pages)
- #: PDF :class:`Dictionary` containing the PDF’s metadata.
+ #: PDF :class:`Dictionary` containing the PDF’s metadata.
self.info = Dictionary({})
self.add_object(self.info)
@@ -457,6 +503,12 @@ class PDF:
object_.number = len(self.objects)
self.objects.append(object_)
+ @property
+ def page_references(self):
+ return tuple(
+ f'{object_number} 0 R'.encode('ascii')
+ for object_number in self.pages['Kids'][::3])
+
def write_line(self, content, output):
"""Write line to output.
@@ -469,40 +521,136 @@ class PDF:
self.current_position += len(content) + 1
output.write(content + b'\n')
- def write(self, output):
+ def write(self, output, version=b'1.7', identifier=False, compress=False):
"""Write PDF to output.
:param output: Output stream.
:type output: binary :term:`file object`
+ :param bytes version: PDF version.
+ :param identifier: PDF file identifier. Default is :obj:`False`
+ to include no identifier, can be set to :obj:`True` to generate an
+ automatic identifier.
+ :type identifier: :obj:`bytes` or :obj:`bool`
+ :param bool compress: whether the PDF uses a compressed object stream.
"""
+ # Convert version and identifier to bytes
+ version = _to_bytes(version or b'1.7') # Force 1.7 when None
+ if identifier not in (False, True, None):
+ identifier = _to_bytes(identifier)
+
# Write header
- self.write_line(b'%PDF-1.7', output)
+ self.write_line(b'%PDF-' + version, output)
self.write_line(b'%\xf0\x9f\x96\xa4', output)
- # Write all non-free PDF objects
- for object_ in self.objects:
- if object_.free == 'f':
- continue
- object_.offset = self.current_position
- self.write_line(object_.indirect, output)
-
- # Write cross reference table
- self.xref_position = self.current_position
- self.write_line(b'xref', output)
- self.write_line(f'0 {len(self.objects)}'.encode(), output)
- for object_ in self.objects:
- self.write_line(
- (f'{object_.offset:010} {object_.generation:05} '
- f'{object_.free} ').encode(), output)
-
- # Write trailer
- self.write_line(b'trailer', output)
- self.write_line(b'<<', output)
- self.write_line(f'/Size {len(self.objects)}'.encode(), output)
- self.write_line(b'/Root ' + self.catalog.reference, output)
- self.write_line(b'/Info ' + self.info.reference, output)
- self.write_line(b'>>', output)
+ if version >= b'1.5' and compress:
+ # Store compressed objects for later and write other ones in PDF
+ compressed_objects = []
+ for object_ in self.objects:
+ if object_.free == 'f':
+ continue
+ if object_.compressible:
+ compressed_objects.append(object_)
+ else:
+ object_.offset = self.current_position
+ self.write_line(object_.indirect, output)
+
+ # Write compressed objects in object stream
+ stream = [[]]
+ position = 0
+ for i, object_ in enumerate(compressed_objects):
+ data = object_.data
+ stream.append(data)
+ stream[0].append(object_.number)
+ stream[0].append(position)
+ position += len(data) + 1
+ stream[0] = ' '.join(str(i) for i in stream[0])
+ extra = {
+ 'Type': '/ObjStm',
+ 'N': len(compressed_objects),
+ 'First': len(stream[0]) + 1,
+ }
+ object_stream = Stream(stream, extra, compress)
+ object_stream.offset = self.current_position
+ self.add_object(object_stream)
+ self.write_line(object_stream.indirect, output)
+
+ # Write cross-reference stream
+ xref = []
+ dict_index = 0
+ for object_ in self.objects:
+ if object_.compressible:
+ xref.append((2, object_stream.number, dict_index))
+ dict_index += 1
+ else:
+ xref.append((
+ bool(object_.number), object_.offset,
+ object_.generation))
+ xref.append((1, self.current_position, 0))
+
+ field2_size = ceil(log(self.current_position + 1, 256))
+ max_generation = max(
+ object_.generation for object_ in self.objects)
+ field3_size = ceil(log(
+ max(max_generation, len(compressed_objects)) + 1, 256))
+ xref_lengths = (1, field2_size, field3_size)
+ xref_stream = b''.join(
+ value.to_bytes(length, 'big')
+ for line in xref for length, value in zip(xref_lengths, line))
+ extra = {
+ 'Type': '/XRef',
+ 'Index': Array((0, len(self.objects) + 1)),
+ 'W': Array(xref_lengths),
+ 'Size': len(self.objects) + 1,
+ 'Root': self.catalog.reference,
+ 'Info': self.info.reference,
+ }
+ if identifier:
+ data = b''.join(
+ obj.data for obj in self.objects if obj.free != 'f')
+ data_hash = md5(data).hexdigest().encode()
+ if identifier is True:
+ identifier = data_hash
+ extra['ID'] = Array((
+ String(identifier).data, String(data_hash).data))
+ dict_stream = Stream([xref_stream], extra, compress)
+ self.xref_position = dict_stream.offset = self.current_position
+ self.add_object(dict_stream)
+ self.write_line(dict_stream.indirect, output)
+ else:
+ # Write all non-free PDF objects
+ for object_ in self.objects:
+ if object_.free == 'f':
+ continue
+ object_.offset = self.current_position
+ self.write_line(object_.indirect, output)
+
+ # Write cross-reference table
+ self.xref_position = self.current_position
+ self.write_line(b'xref', output)
+ self.write_line(f'0 {len(self.objects)}'.encode(), output)
+ for object_ in self.objects:
+ self.write_line(
+ (f'{object_.offset:010} {object_.generation:05} '
+ f'{object_.free} ').encode(), output)
+
+ # Write trailer
+ self.write_line(b'trailer', output)
+ self.write_line(b'<<', output)
+ self.write_line(f'/Size {len(self.objects)}'.encode(), output)
+ self.write_line(b'/Root ' + self.catalog.reference, output)
+ self.write_line(b'/Info ' + self.info.reference, output)
+ if identifier:
+ data = b''.join(
+ obj.data for obj in self.objects if obj.free != 'f')
+ data_hash = md5(data).hexdigest().encode()
+ if identifier is True:
+ identifier = data_hash
+ self.write_line(
+ b'/ID [' + String(identifier).data + b' ' +
+ String(data_hash).data + b']', output)
+ self.write_line(b'>>', output)
+
self.write_line(b'startxref', output)
self.write_line(f'{self.xref_position}'.encode(), output)
self.write_line(b'%%EOF', output)