Adding upstream version 0.6.0.upstream/0.6.0

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2023-04-03 07:29:21 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2023-04-03 07:29:21 +0000
commit: 3136ffb7c57ef56e0609ceb3c7726ea4f51f3574 (patch)
tree: 57df199141ce79f945ce394b6007424cbf31aa04
parent: Adding upstream version 0.5.0. (diff)
download: pydyf-3136ffb7c57ef56e0609ceb3c7726ea4f51f3574.tar.xz
pydyf-3136ffb7c57ef56e0609ceb3c7726ea4f51f3574.zip
4 files changed, 178 insertions, 79 deletions
diff --git a/docs/api_reference.rst b/docs/api_reference.rst
index 80bf16c..0ceb6c5 100644
--- a/docs/api_reference.rst
+++ b/docs/api_reference.rst
@@ -7,13 +7,17 @@ API Reference
    :members:
 
 .. autoclass:: Dictionary
+   :show-inheritance:
 
 .. autoclass:: Stream
    :members:
+   :show-inheritance:
 
 .. autoclass:: String
+   :show-inheritance:
 
 .. autoclass:: Array
+   :show-inheritance:
 
 .. autoclass:: PDF
    :members:
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 3eb90e6..f652602 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -2,6 +2,40 @@ Changelog
 =========
 
 
+Version 0.6.0
+-------------
+
+Released on 2023-03-29.
+
+New features:
+
+* Add an option to use compressed object streams for PDF 1.5+, with financial support from Code & Co.
+* Add new text operators
+* Clean and fix documentation
+
+Backers and sponsors:
+
+* Kobalt
+* Grip Angebotssoftware
+* Spacinov
+* Crisp BV
+* Castedo Ellerman
+* Manuel Barkhau
+* SimonSoft
+* Menutech
+* KontextWork
+* NCC Group
+* René Fritz
+* Moritz Mahringer
+* Yanal-Yvez Fargialla
+* Piotr Horzycki
+* Healthchecks.io
+* Hammerbacher
+* TrainingSparkle
+* Synapsium
+
+
+
 Version 0.5.0
 -------------
 
@@ -10,7 +44,7 @@ Released on 2022-10-11.
 New features:
 
 * Add the PDF.page_references property
-* Revert the PDF.pages['Kids'] behavior to be retro-compatible with version 0.3.0
+* Revert the PDF.pages['Kids'] behavior to be backwards compatible with version 0.3.0
 
 Backers and sponsors:
 
diff --git a/docs/common_use_cases.rst b/docs/common_use_cases.rst
index 45e2d3d..b045fa6 100644
--- a/docs/common_use_cases.rst
+++ b/docs/common_use_cases.rst
@@ -97,7 +97,7 @@ Display image
 
    document = pydyf.PDF()
 
-   extra = Dictionary({
+   extra = pydyf.Dictionary({
        'Type': '/XObject',
        'Subtype': '/Image',
        'Width': 197,
@@ -158,9 +158,9 @@ Display text
   # And display it
   text = pydyf.Stream()
   text.begin_text()
-  text.set_font_size('F1', 24)
+  text.set_font_size('F1', 20)
   text.text_matrix(1, 0, 0, 1, 10, 90)
-  text.show_text(pydyf.String('Hello World'))
+  text.show_text(pydyf.String('Bœuf grillé & café'.encode('macroman')))
   text.end_text()
 
   document.add_object(text)
@@ -179,4 +179,3 @@ Display text
 
   with open('document.pdf', 'wb') as f:
       document.write(f)
-
diff --git a/pydyf/__init__.py b/pydyf/__init__.py
index 85811f4..2e589fe 100755
--- a/pydyf/__init__.py
+++ b/pydyf/__init__.py
@@ -7,8 +7,9 @@ import re
 import zlib
 from codecs import BOM_UTF16_BE
 from hashlib import md5
+from math import ceil, log
 
-VERSION = __version__ = '0.5.0'
+VERSION = __version__ = '0.6.0'
 
 
 def _to_bytes(item):
@@ -21,7 +22,7 @@ def _to_bytes(item):
         if item.is_integer():
             return f'{int(item):d}'.encode('ascii')
         else:
-            return f'{item:f}'.encode('ascii')
+            return f'{item:f}'.rstrip('0').encode('ascii')
     elif isinstance(item, int):
         return f'{item:d}'.encode('ascii')
     return str(item).encode('ascii')
@@ -43,51 +44,41 @@ class Object:
     @property
     def indirect(self):
         """Indirect representation of an object."""
-        return b'\n'.join((
-            str(self.number).encode() + b' ' +
-            str(self.generation).encode() + b' obj',
-            self.data,
-            b'endobj',
-        ))
+        header = f'{self.number} {self.generation} obj\n'.encode()
+        return header + self.data + b'\nendobj'
 
     @property
     def reference(self):
         """Object identifier."""
-        return (
-            str(self.number).encode() + b' ' +
-            str(self.generation).encode() + b' R')
+        return f'{self.number} {self.generation} R'.encode()
 
     @property
     def data(self):
         """Data contained in the object. Shall be defined in each subclass."""
         raise NotImplementedError()
 
+    @property
+    def compressible(self):
+        """Whether the object can be included in an object stream."""
+        return not self.generation and not isinstance(self, Stream)
 
-class Dictionary(Object, dict):
-    """PDF Dictionary object.
-
-    Inherits from :class:`Object` and Python :obj:`dict`.
 
-    """
+class Dictionary(Object, dict):
+    """PDF Dictionary object."""
     def __init__(self, values=None):
         Object.__init__(self)
         dict.__init__(self, values or {})
 
     @property
     def data(self):
-        result = [b'<<']
-        for key, value in self.items():
-            result.append(b'/' + _to_bytes(key) + b' ' + _to_bytes(value))
-        result.append(b'>>')
-        return b'\n'.join(result)
+        result = [
+            b'/' + _to_bytes(key) + b' ' + _to_bytes(value)
+            for key, value in self.items()]
+        return b'<<' + b''.join(result) + b'>>'
 
 
 class Stream(Object):
-    """PDF Stream object.
-
-    Inherits from :class:`Object`.
-
-    """
+    """PDF Stream object."""
     def __init__(self, stream=None, extra=None, compress=False):
         super().__init__()
         #: Python array of data composing stream.
@@ -221,6 +212,10 @@ class Stream(Object):
         """Begin new subpath by moving current point to ``(x, y)``."""
         self.stream.append(b' '.join((_to_bytes(x), _to_bytes(y), b'm')))
 
+    def move_text_to(self, x, y):
+        """Move text to next line at ``(x, y)`` distance from previous line."""
+        self.stream.append(b' '.join((_to_bytes(x), _to_bytes(y), b'Td')))
+
     def shading(self, name):
         """Paint shape and color shading using shading dictionary ``name``."""
         self.stream.append(b'/' + _to_bytes(name) + b' sh')
@@ -310,9 +305,13 @@ class Stream(Object):
         self.stream.append(b'/' + _to_bytes(state_name) + b' gs')
 
     def show_text(self, text):
-        """Show text."""
+        """Show text strings with individual glyph positioning."""
         self.stream.append(b'[' + _to_bytes(text) + b'] TJ')
 
+    def show_text_string(self, text):
+        """Show single text string."""
+        self.stream.append(String(text).data + b' Tj')
+
     def stroke(self):
         """Stroke path."""
         self.stream.append(b'S')
@@ -369,7 +368,7 @@ class Stream(Object):
         extra = Dictionary(self.extra.copy())
         if self.compress:
             extra['Filter'] = '/FlateDecode'
-            compressobj = zlib.compressobj()
+            compressobj = zlib.compressobj(level=9)
             stream = compressobj.compress(stream)
             stream += compressobj.flush()
         extra['Length'] = len(stream)
@@ -377,11 +376,7 @@ class Stream(Object):
 
 
 class String(Object):
-    """PDF String object.
-
-    Inherits from :class:`Object`.
-
-    """
+    """PDF String object."""
     def __init__(self, string=''):
         super().__init__()
         #: Unicode string.
@@ -402,22 +397,14 @@ class String(Object):
 
 
 class Array(Object, list):
-    """PDF Array object.
-
-    Inherits from :class:`Object` and Python :obj:`list`.
-
-    """
+    """PDF Array object."""
     def __init__(self, array=None):
         Object.__init__(self)
         list.__init__(self, array or [])
 
     @property
     def data(self):
-        result = [b'[']
-        for child in self:
-            result.append(_to_bytes(child))
-        result.append(b']')
-        return b' '.join(result)
+        return b'[' + b' '.join(_to_bytes(child) for child in self) + b']'
 
 
 class PDF:
@@ -500,13 +487,14 @@ class PDF:
         self.current_position += len(content) + 1
         output.write(content + b'\n')
 
-    def write(self, output, version=None, identifier=None):
+    def write(self, output, version=None, identifier=None, compress=False):
         """Write PDF to output.
 
         :param output: Output stream.
         :type output: binary :term:`file object`
         :param bytes version: PDF version.
         :param bytes identifier: PDF file identifier.
+        :param bool compress: whether the PDF uses a compressed object stream.
 
         """
         version = self.version if version is None else _to_bytes(version)
@@ -516,36 +504,110 @@ class PDF:
         self.write_line(b'%PDF-' + version, output)
         self.write_line(b'%\xf0\x9f\x96\xa4', output)
 
-        # Write all non-free PDF objects
-        for object_ in self.objects:
-            if object_.free == 'f':
-                continue
-            object_.offset = self.current_position
-            self.write_line(object_.indirect, output)
-
-        # Write cross reference table
-        self.xref_position = self.current_position
-        self.write_line(b'xref', output)
-        self.write_line(f'0 {len(self.objects)}'.encode(), output)
-        for object_ in self.objects:
-            self.write_line(
-                (f'{object_.offset:010} {object_.generation:05} '
-                 f'{object_.free} ').encode(), output)
-
-        # Write trailer
-        self.write_line(b'trailer', output)
-        self.write_line(b'<<', output)
-        self.write_line(f'/Size {len(self.objects)}'.encode(), output)
-        self.write_line(b'/Root ' + self.catalog.reference, output)
-        self.write_line(b'/Info ' + self.info.reference, output)
-        if identifier is not None:
-            data = b''.join(
-                obj.data for obj in self.objects if obj.free != 'f')
-            data_hash = md5(data).hexdigest().encode()
-            self.write_line(
-                b'/ID [' + String(identifier).data + b' ' +
-                String(data_hash).data + b']', output)
-        self.write_line(b'>>', output)
+        if version >= b'1.5' and compress:
+            # Store compressed objects for later and write other ones in PDF
+            compressed_objects = []
+            for object_ in self.objects:
+                if object_.free == 'f':
+                    continue
+                if object_.compressible:
+                    compressed_objects.append(object_)
+                else:
+                    object_.offset = self.current_position
+                    self.write_line(object_.indirect, output)
+
+            # Write compressed objects in object stream
+            stream = [[]]
+            position = 0
+            for i, object_ in enumerate(compressed_objects):
+                data = object_.data
+                stream.append(data)
+                stream[0].append(object_.number)
+                stream[0].append(position)
+                position += len(data) + 1
+            stream[0] = ' '.join(str(i) for i in stream[0])
+            extra = {
+                'Type': '/ObjStm',
+                'N': len(compressed_objects),
+                'First': len(stream[0]) + 1,
+            }
+            object_stream = Stream(stream, extra, compress)
+            object_stream.offset = self.current_position
+            self.add_object(object_stream)
+            self.write_line(object_stream.indirect, output)
+
+            # Write cross-reference stream
+            xref = []
+            dict_index = 0
+            for object_ in self.objects:
+                if object_.compressible:
+                    xref.append((2, object_stream.number, dict_index))
+                    dict_index += 1
+                else:
+                    xref.append((
+                        bool(object_.number), object_.offset,
+                        object_.generation))
+            xref.append((1, self.current_position, 0))
+
+            field2_size = ceil(log(self.current_position, 8))
+            max_generation = max(
+                object_.generation for object_ in self.objects)
+            field3_size = ceil(log(
+                max(max_generation, len(compressed_objects)), 8))
+            xref_lengths = (1, field2_size, field3_size)
+            xref_stream = b''.join(
+                value.to_bytes(length, 'big')
+                for line in xref for length, value in zip(xref_lengths, line))
+            extra = {
+                'Type': '/XRef',
+                'Index': Array((0, len(self.objects) + 1)),
+                'W': Array(xref_lengths),
+                'Size': len(self.objects) + 1,
+                'Root': self.catalog.reference,
+                'Info': self.info.reference,
+            }
+            if identifier is not None:
+                data = b''.join(
+                    obj.data for obj in self.objects if obj.free != 'f')
+                data_hash = md5(data).hexdigest().encode()
+                extra['ID'] = Array((
+                    String(identifier).data, String(data_hash).data))
+            dict_stream = Stream([xref_stream], extra, compress)
+            self.xref_position = dict_stream.offset = self.current_position
+            self.add_object(dict_stream)
+            self.write_line(dict_stream.indirect, output)
+        else:
+            # Write all non-free PDF objects
+            for object_ in self.objects:
+                if object_.free == 'f':
+                    continue
+                object_.offset = self.current_position
+                self.write_line(object_.indirect, output)
+
+            # Write cross-reference table
+            self.xref_position = self.current_position
+            self.write_line(b'xref', output)
+            self.write_line(f'0 {len(self.objects)}'.encode(), output)
+            for object_ in self.objects:
+                self.write_line(
+                    (f'{object_.offset:010} {object_.generation:05} '
+                     f'{object_.free} ').encode(), output)
+
+            # Write trailer
+            self.write_line(b'trailer', output)
+            self.write_line(b'<<', output)
+            self.write_line(f'/Size {len(self.objects)}'.encode(), output)
+            self.write_line(b'/Root ' + self.catalog.reference, output)
+            self.write_line(b'/Info ' + self.info.reference, output)
+            if identifier is not None:
+                data = b''.join(
+                    obj.data for obj in self.objects if obj.free != 'f')
+                data_hash = md5(data).hexdigest().encode()
+                self.write_line(
+                    b'/ID [' + String(identifier).data + b' ' +
+                    String(data_hash).data + b']', output)
+            self.write_line(b'>>', output)
+
         self.write_line(b'startxref', output)
         self.write_line(f'{self.xref_position}'.encode(), output)
         self.write_line(b'%%EOF', output)
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2023-04-03 07:29:21 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2023-04-03 07:29:21 +0000
commit	3136ffb7c57ef56e0609ceb3c7726ea4f51f3574 (patch)
tree	57df199141ce79f945ce394b6007424cbf31aa04
parent	Adding upstream version 0.5.0. (diff)
download	pydyf-3136ffb7c57ef56e0609ceb3c7726ea4f51f3574.tar.xz pydyf-3136ffb7c57ef56e0609ceb3c7726ea4f51f3574.zip