diff options
-rw-r--r-- | .github/FUNDING.yml | 1 | ||||
-rw-r--r-- | .github/workflows/doconfly.yml | 29 | ||||
-rw-r--r-- | .github/workflows/tests.yml | 43 | ||||
-rw-r--r-- | .gitignore | 5 | ||||
-rw-r--r-- | PKG-INFO | 57 | ||||
-rw-r--r-- | README.rst | 2 | ||||
-rw-r--r-- | debian/changelog | 67 | ||||
-rw-r--r-- | debian/control | 3 | ||||
-rw-r--r-- | debian/copyright | 4 | ||||
-rw-r--r-- | docs/api_reference.rst | 4 | ||||
-rw-r--r-- | docs/changelog.rst | 289 | ||||
-rw-r--r-- | docs/common_use_cases.rst | 89 | ||||
-rw-r--r-- | docs/conf.py | 9 | ||||
-rw-r--r-- | docs/contribute.rst | 10 | ||||
-rw-r--r-- | docs/going_further.rst | 21 | ||||
-rwxr-xr-x | pydyf/__init__.py | 296 | ||||
-rw-r--r-- | pyproject.toml | 12 | ||||
-rw-r--r-- | setup.py | 31 | ||||
-rw-r--r-- | tests/test_pydyf.py | 20 |
19 files changed, 795 insertions, 197 deletions
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..709eb98 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +open_collective: courtbouillon diff --git a/.github/workflows/doconfly.yml b/.github/workflows/doconfly.yml new file mode 100644 index 0000000..c256435 --- /dev/null +++ b/.github/workflows/doconfly.yml @@ -0,0 +1,29 @@ +name: doconfly +on: + push: + branches: + - main + tags: + - "*" + +jobs: + doconfly: + name: doconfly job + runs-on: ubuntu-latest + env: + PORT: ${{ secrets.PORT }} + SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} + TAKOYAKI: ${{ secrets.TAKOYAKI }} + USER: ${{ secrets.USER }} + DOCUMENTATION_PATH: ${{ secrets.DOCUMENTATION_PATH }} + DOCUMENTATION_URL: ${{ secrets.DOCUMENTATION_URL }} + steps: + - run: | + which ssh-agent || ( apt-get update -y && apt-get install openssh-client -y ) + eval $(ssh-agent -s) + echo "$SSH_PRIVATE_KEY" | tr -d '\r' | ssh-add - + mkdir -p ~/.ssh + chmod 700 ~/.ssh + ssh-keyscan -p $PORT $TAKOYAKI >> ~/.ssh/known_hosts + chmod 644 ~/.ssh/known_hosts + ssh $USER@$TAKOYAKI -p $PORT "doconfly/doconfly.sh $GITHUB_REPOSITORY $GITHUB_REF $DOCUMENTATION_PATH $DOCUMENTATION_URL" diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..8de9881 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,43 @@ +name: pydyf's tests +on: [push, pull_request] + +jobs: + tests: + name: ${{ matrix.os }} - ${{ matrix.python-version }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + python-version: ['3.12'] + include: + - os: ubuntu-latest + python-version: '3.8' + - os: ubuntu-latest + python-version: 'pypy-3.8' + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install Ghostscript (Ubuntu) + if: matrix.os == 'ubuntu-latest' + run: sudo apt-get update -y && sudo apt-get install ghostscript -y + - name: Install Ghostscript (macOS) + if: matrix.os == 'macos-latest' + run: brew install ghostscript + - name: Install Ghostscript (Windows) + if: matrix.os == 'windows-latest' + run: | + C:\msys64\usr\bin\bash -lc 'pacman -S mingw-w64-x86_64-ghostscript --noconfirm' + echo "C:\msys64\mingw64\bin" | Out-File -FilePath $env:GITHUB_PATH + rm C:\msys64\mingw64\bin\python.exe + - name: Upgrade pip and setuptools + run: python -m pip install --upgrade pip setuptools + - name: Install tests’ requirements + run: python -m pip install .[test] + - name: Launch tests + run: python -m pytest + - name: Check coding style + run: python -m flake8 + - name: Check imports order + run: python -m isort . --check --diff diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ecd9e60 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +/docs/_build +/tests/results +/dist/ +__pycache__ +.coverage diff --git a/PKG-INFO b/PKG-INFO deleted file mode 100644 index d6c6777..0000000 --- a/PKG-INFO +++ /dev/null @@ -1,57 +0,0 @@ -Metadata-Version: 2.1 -Name: pydyf -Version: 0.1.2 -Summary: A low-level PDF generator. -Keywords: pdf,generator -Author-email: CourtBouillon <contact@courtbouillon.org> -Maintainer-email: CourtBouillon <contact@courtbouillon.org> -Requires-Python: >=3.6 -Description-Content-Type: text/x-rst -Classifier: Development Status :: 4 - Beta -Classifier: Intended Audience :: Developers -Classifier: License :: OSI Approved :: BSD License -Classifier: Operating System :: OS Independent -Classifier: Programming Language :: Python -Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3 :: Only -Classifier: Programming Language :: Python :: 3.6 -Classifier: Programming Language :: Python :: 3.7 -Classifier: Programming Language :: Python :: 3.8 -Classifier: Programming Language :: Python :: 3.9 -Classifier: Programming Language :: Python :: Implementation :: CPython -Classifier: Programming Language :: Python :: Implementation :: PyPy -Requires-Dist: sphinx ; extra == "doc" -Requires-Dist: sphinx_rtd_theme ; extra == "doc" -Requires-Dist: pytest ; extra == "test" -Requires-Dist: pytest-cov ; extra == "test" -Requires-Dist: pytest-flake8 ; extra == "test" -Requires-Dist: pytest-isort ; extra == "test" -Requires-Dist: coverage[toml] ; extra == "test" -Requires-Dist: pillow ; extra == "test" -Project-URL: Changelog, https://github.com/CourtBouillon/pydyf/releases -Project-URL: Code, https://github.com/CourtBouillon/pydyf -Project-URL: Documentation, https://doc.courtbouillon.org/pydyf/ -Project-URL: Donation, https://opencollective.com/courtbouillon -Project-URL: Homepage, https://www.courtbouillon.org/pydyf -Project-URL: Issues, https://github.com/CourtBouillon/pydyf/issues -Provides-Extra: doc -Provides-Extra: test - -pydyf is a low-level PDF generator written in Python and based on PDF -specification 1.7. - -* Free software: BSD license -* For Python 3.6+, tested on CPython and PyPy -* Documentation: https://doc.courtbouillon.org/pydyf -* Changelog: https://github.com/CourtBouillon/pydyf/releases -* Code, issues, tests: https://github.com/CourtBouillon/pydyf -* Code of conduct: https://www.courtbouillon.org/code-of-conduct -* Professional support: https://www.courtbouillon.org -* Donation: https://opencollective.com/courtbouillon - -Copyrights are retained by their contributors, no copyright assignment is -required to contribute to pydyf. Unless explicitly stated otherwise, any -contribution intentionally submitted for inclusion is licensed under the BSD -3-clause license, without any additional terms or conditions. For full -authorship information, see the version control history. - @@ -2,7 +2,7 @@ pydyf is a low-level PDF generator written in Python and based on PDF specification 1.7. * Free software: BSD license -* For Python 3.6+, tested on CPython and PyPy +* For Python 3.7+, tested on CPython and PyPy * Documentation: https://doc.courtbouillon.org/pydyf * Changelog: https://github.com/CourtBouillon/pydyf/releases * Code, issues, tests: https://github.com/CourtBouillon/pydyf diff --git a/debian/changelog b/debian/changelog index 724f75f..7596f4c 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,70 @@ +pydyf (0.9.0-1) sid; urgency=medium + + * Uploading to sid. + * Merging upstream version 0.9.0. + * Updating copyright for 2024. + + -- Daniel Baumann <daniel.baumann@progress-linux.org> Sun, 03 Mar 2024 15:08:25 +0100 + +pydyf (0.7.0-1) sid; urgency=medium + + * Uploading to sid. + * Merging upstream version 0.7.0. + + -- Daniel Baumann <daniel.baumann@progress-linux.org> Wed, 05 Jul 2023 08:16:22 +0200 + +pydyf (0.6.0-2) sid; urgency=medium + + * Uploading to sid. + * Uploading without changes after bookworm release. + + -- Daniel Baumann <daniel.baumann@progress-linux.org> Sun, 11 Jun 2023 14:44:14 +0200 + +pydyf (0.6.0-1) experimental; urgency=medium + + * Uploading to experimental. + * Merging upstream version 0.6.0. + + -- Daniel Baumann <daniel.baumann@progress-linux.org> Mon, 03 Apr 2023 09:29:29 +0200 + +pydyf (0.5.0-3) sid; urgency=medium + + * Uploading to sid. + * Updating to standards version 4.6.2. + + -- Daniel Baumann <daniel.baumann@progress-linux.org> Mon, 30 Jan 2023 17:59:09 +0100 + +pydyf (0.5.0-2) sid; urgency=medium + + * Uploading to sid. + * Using pybuild-plugin-pyproject instead of flit within pybuild (Closes: + #1025397). + + -- Daniel Baumann <daniel.baumann@progress-linux.org> Sun, 04 Dec 2022 07:31:31 +0100 + +pydyf (0.5.0-1) sid; urgency=medium + + * Uploading to sid. + * Merging upstream version 0.5.0. + + -- Daniel Baumann <daniel.baumann@progress-linux.org> Sat, 15 Oct 2022 15:55:59 +0200 + +pydyf (0.3.0-1) sid; urgency=medium + + * Uploading to sid. + * Merging upstream version 0.3.0. + + -- Daniel Baumann <mail@daniel-baumann.ch> Fri, 23 Sep 2022 19:03:46 +0200 + +pydyf (0.2.0-1) sid; urgency=medium + + * Uploading to sid. + * Merging upstream version 0.2.0. + * Updating copyright for 2022. + * Updating to standards version 4.6.1. + + -- Daniel Baumann <mail@daniel-baumann.ch> Sun, 19 Jun 2022 09:06:54 +0200 + pydyf (0.1.2-4) sid; urgency=medium * Uploading to sid. diff --git a/debian/control b/debian/control index 5c59c3a..f71af40 100644 --- a/debian/control +++ b/debian/control @@ -6,10 +6,11 @@ Build-Depends: debhelper-compat (= 13), dh-sequence-python3, flit (>= 3.2), + pybuild-plugin-pyproject, python3-all, python3-pil <!nocheck>, Rules-Requires-Root: no -Standards-Version: 4.6.0 +Standards-Version: 4.7.0 Homepage: https://github.com/CourtBouillon/pydyf Vcs-Browser: https://git.progress-linux.org/users/daniel.baumann/debian/packages/pydyf Vcs-Git: https://git.progress-linux.org/users/daniel.baumann/debian/packages/pydyf diff --git a/debian/copyright b/debian/copyright index 195aed5..64949aa 100644 --- a/debian/copyright +++ b/debian/copyright @@ -4,11 +4,11 @@ Upstream-Contact: CourtBouillon <contact@courtbouillon.org> Source: https://github.com/CourtBouillon/pydyf/releases Files: * -Copyright: 2020-2021 CourtBouillon <contact@courtbouillon.org> +Copyright: 2020-2024 CourtBouillon <contact@courtbouillon.org> License: BSD-3-clause Files: debian/* -Copyright: 2021 Daniel Baumann <daniel.baumann@progress-linux.org> +Copyright: 2021-2024 Daniel Baumann <daniel.baumann@progress-linux.org> License: BSD-3-clause License: BSD-3-clause diff --git a/docs/api_reference.rst b/docs/api_reference.rst index 80bf16c..0ceb6c5 100644 --- a/docs/api_reference.rst +++ b/docs/api_reference.rst @@ -7,13 +7,17 @@ API Reference :members: .. autoclass:: Dictionary + :show-inheritance: .. autoclass:: Stream :members: + :show-inheritance: .. autoclass:: String + :show-inheritance: .. autoclass:: Array + :show-inheritance: .. autoclass:: PDF :members: diff --git a/docs/changelog.rst b/docs/changelog.rst index acbf3c3..661a696 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -2,6 +2,295 @@ Changelog ========= +Version 0.9.0 +------------- + +Released on 2024-02-26. + +Dependencies: + +* Python 3.12 is supported and tested +* Python 3.8+ is now needed, Python 3.7 is not supported anymore + +New features: + +* Add inline images support + +Performance: + +* Simplify `_to_bytes()` + +Documentation: + +* Add sample to create a PDF with metadata + +Contributors: + +* Panagiotis H.M. Issaris +* Guillaume Ayoub +* Lucie Anglade + +Backers and sponsors: + +* Spacinov +* Kobalt +* Grip Angebotssoftware +* Manuel Barkhau +* SimonSoft +* Menutech +* KontextWork +* René Fritz +* Simon Sapin +* Arcanite +* TrainingSparkle +* Healthchecks.io +* Hammerbacher +* Docraptor +* Yanal-Yvez Fargialla +* Morntag +* NBCO + + +Version 0.8.0 +------------- + +Released on 2023-09-25. + +New features: + +* Add text rise operator + +Backers and sponsors: + +* Spacinov +* Kobalt +* Grip Angebotssoftware +* Manuel Barkhau +* SimonSoft +* Menutech +* KontextWork +* NCC Group +* René Fritz +* Nicola Auchmuty +* Syslifters +* Hammerbacher +* TrainingSparkle +* Daniel Kucharski +* Healthchecks.io +* Yanal-Yvez Fargialla +* WakaTime +* Paheko +* Synapsium +* DocRaptor + + +Version 0.7.0 +------------- + +Released on 2023-07-03. + +Dependencies: + +* Python 3.11 is supported and tested + +Bug fixes: + +* Fix size of fields for xref + +Backers and sponsors: + +* Castedo Ellerman +* Spacinov +* Kobalt +* Grip Angebotssoftware +* Crisp BV +* Manuel Barkhau +* SimonSoft +* Menutech +* KontextWork +* NCC Group +* René Fritz +* TrainingSparkle +* Healthchecks.io +* Moritz Mahringer +* Yanal-Yvez Fargialla +* Synapsium +* Piotr Horzycki +* Hammerbacher + + +Version 0.6.0 +------------- + +Released on 2023-03-29. + +New features: + +* Add an option to use compressed object streams for PDF 1.5+, with financial support from Code & Co. +* Add new text operators +* Clean and fix documentation + +Backers and sponsors: + +* Kobalt +* Grip Angebotssoftware +* Spacinov +* Crisp BV +* Castedo Ellerman +* Manuel Barkhau +* SimonSoft +* Menutech +* KontextWork +* NCC Group +* René Fritz +* Moritz Mahringer +* Yanal-Yvez Fargialla +* Piotr Horzycki +* Healthchecks.io +* Hammerbacher +* TrainingSparkle +* Synapsium + + + +Version 0.5.0 +------------- + +Released on 2022-10-11. + +New features: + +* Add the PDF.page_references property +* Revert the PDF.pages['Kids'] behavior to be backwards compatible with version 0.3.0 + +Backers and sponsors: + +* Grip Angebotssoftware +* Manuel Barkhau +* Crisp BV +* SimonSoft +* Menutech +* Spacinov +* KontextWork +* René Fritz +* NCC Group +* Kobalt +* Tom Pohl +* John R Ellis +* Moritz Mahringer +* Yanal-Yvez Fargialla +* Gábor +* Piotr Horzycki +* Andrew Ittner + + +Version 0.4.0 +------------- + +Released on 2022-10-11. + +New features: + +* Allow nth page’s reference to be retrieved using PDF.pages['Kids'][n] + +Backers and sponsors: + +* Grip Angebotssoftware +* Manuel Barkhau +* Crisp BV +* SimonSoft +* Menutech +* Spacinov +* KontextWork +* René Fritz +* NCC Group +* Kobalt +* Tom Pohl +* John R Ellis +* Moritz Mahringer +* Yanal-Yvez Fargialla +* Gábor +* Piotr Horzycki +* Andrew Ittner + + +Version 0.3.0 +------------- + +Released on 2022-09-19. + +New features: + +* Support marked content +* Allow version and ID to be specified when initializing PDF objects + +Contributors: + +* Guillaume Ayoub + +Backers and sponsors: + +* Grip Angebotssoftware +* Manuel Barkhau +* Crisp BV +* SimonSoft +* Menutech +* Spacinov +* KontextWork +* René Fritz +* NCC Group +* Kobalt +* Tom Pohl +* John R Ellis +* Moritz Mahringer +* Gábor +* Piotr Horzycki +* Andrew Ittner + + +Version 0.2.0 +------------- + +Released on 2022-05-23. + +Dependencies: + +* Python 3.7+ is now needed, Python 3.6 is not supported anymore + +New features: + +* `d0be36b <https://github.com/CourtBouillon/pydyf/commit/d0be36b>`_: + Allow to set PDF version +* `879261c <https://github.com/CourtBouillon/pydyf/commit/879261c>`_: + Allow to set PDF identifier + +Contributors: + +* Guillaume Ayoub + +Backers and sponsors: + +* Grip Angebotssoftware +* Manuel Barkhau +* Crisp BV +* SimonSoft +* Menutech +* Spacinov +* KontextWork +* René Fritz +* Kobalt +* NCC Group +* Des images et des mots +* Nathalie Gutton +* Andreas Zettl +* Tom Pohl +* Moritz Mahringer +* Florian Demmer +* Yanal-Yvez Fargialla +* Gábor +* Piotr Horzycki + + Version 0.1.2 ------------- diff --git a/docs/common_use_cases.rst b/docs/common_use_cases.rst index 45e2d3d..d8343b3 100644 --- a/docs/common_use_cases.rst +++ b/docs/common_use_cases.rst @@ -97,7 +97,7 @@ Display image document = pydyf.PDF() - extra = Dictionary({ + extra = pydyf.Dictionary({ 'Type': '/XObject', 'Subtype': '/Image', 'Width': 197, @@ -158,9 +158,9 @@ Display text # And display it text = pydyf.Stream() text.begin_text() - text.set_font_size('F1', 24) + text.set_font_size('F1', 20) text.text_matrix(1, 0, 0, 1, 10, 90) - text.show_text(pydyf.String('Hello World')) + text.show_text(pydyf.String('Bœuf grillé & café'.encode('macroman'))) text.end_text() document.add_object(text) @@ -180,3 +180,86 @@ Display text with open('document.pdf', 'wb') as f: document.write(f) + +Add metadata +------------ + +.. code-block:: python + + import datetime + + import pydyf + + document = pydyf.PDF() + document.info['Author'] = pydyf.String('Jane Doe') + document.info['Creator'] = pydyf.String('pydyf') + document.info['Keywords'] = pydyf.String('some keywords') + document.info['Producer'] = pydyf.String('The producer') + document.info['Subject'] = pydyf.String('An example PDF') + document.info['Title'] = pydyf.String('A PDF containing metadata') + now = datetime.datetime.now() + document.info['CreationDate'] = pydyf.String(now.strftime('D:%Y%m%d%H%M%S')) + + document.add_page( + pydyf.Dictionary( + { + 'Type': '/Page', + 'Parent': document.pages.reference, + 'MediaBox': pydyf.Array([0, 0, 200, 200]), + } + ) + ) + + # 550 bytes PDF + with open('metadata.pdf', 'wb') as f: + document.write(f) + + +Display inline QR-code image +---------------------------- + +.. code-block:: python + + import pydyf + import qrcode + + # Create a QR code image + image = qrcode.make('Some data here') + raw_data = image.tobytes() + width = image.size[0] + height = image.size[1] + + document = pydyf.PDF() + stream = pydyf.Stream(compress=True) + stream.push_state() + x = 0 + y = 0 + stream.transform(width, 0, 0, height, x, y) + # Add the 1-bit grayscale image inline in the PDF + stream.inline_image(width, height, 'Gray', 1, raw_data) + stream.pop_state() + document.add_object(stream) + + # Put the image in the resources of the PDF + document.add_page( + pydyf.Dictionary( + { + 'Type': '/Page', + 'Parent': document.pages.reference, + 'MediaBox': pydyf.Array([0, 0, 400, 400]), + 'Resources': pydyf.Dictionary( + { + 'ProcSet': pydyf.Array( + ['/PDF', '/ImageB', '/ImageC', '/ImageI'] + ), + } + ), + 'Contents': stream.reference, + } + ) + ) + + # 909 bytes PDF + with open('qrcode.pdf', 'wb') as f: + document.write(f, compress=True) + diff --git a/docs/conf.py b/docs/conf.py index 585afd0..c868316 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,13 +1,7 @@ # pydyf documentation build configuration file. -import sys -from pathlib import Path - import pydyf -# Add current path for css_diagram_role -sys.path.append(str(Path(__file__).parent)) - # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = [ @@ -52,6 +46,9 @@ html_theme_options = { 'collapse_navigation': False, } +# Favicon URL +html_favicon = 'https://www.courtbouillon.org/static/images/favicon.png' + # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". diff --git a/docs/contribute.rst b/docs/contribute.rst index adf9024..f94a3d6 100644 --- a/docs/contribute.rst +++ b/docs/contribute.rst @@ -46,12 +46,20 @@ the pytest_ library. Launching tests require to have Ghostscript_ installed and available in ``PATH``. -You can launch tests (with code coverage and lint) using the following command:: +You can launch tests using the following command:: venv/bin/pytest +WeasyPrint also uses isort_ to check imports and flake8_ to check the coding +style:: + + venv/bin/python -m isort . --check --diff + venv/bin/python -m flake8 + .. _pytest: https://docs.pytest.org/ .. _Ghostscript: https://www.ghostscript.com/ +.. _isort: https://pycqa.github.io/isort/ +.. _flake8: https://flake8.pycqa.org/ Documentation diff --git a/docs/going_further.rst b/docs/going_further.rst index ce9a8f3..c66f6c2 100644 --- a/docs/going_further.rst +++ b/docs/going_further.rst @@ -5,12 +5,11 @@ Going Further Why pydyf? ------------- -pydyf has been created to replace Cairo PDF generation in WeasyPrint_. +pydyf has been created to replace Cairo_ PDF generation in WeasyPrint_. -Indeed, there are some bugs in WeasyPrint caused by Cairo_ and Cairo has some -difficulties to make releases. -Also there are features which will be easier to implement while having more -control on the PDF generation. +Indeed, there were some bugs in WeasyPrint caused by Cairo, and new versions of +Cairo can take a long time to be released. There are also many features that +are easier to implement with more control on the PDF generation. So we created pydyf. @@ -20,13 +19,13 @@ So we created pydyf. Why Python? ----------- -Python is a really good language to design a small, OS-agnostic parser. As it -is object-oriented, it gives the possibility to follow the specification with -high-level classes and a small amount of very simple code. +Python is a really good language to design a small, OS-agnostic library. As it +is object-oriented, it gives the possibility to follow the PDF specification +with high-level classes and a small amount of very simple code. And of course, WeasyPrint is written in Python too, giving an obvious reason for this choice. -Speed is not pydyf’s main goal. Code simplicity, maintainability and -flexibility are more important goals for this library, as they give the -ability to stay really close to the specification and to fix bugs easily. +Speed is not pydyf’s main goal. Code simplicity, maintainability and +flexibility are more important goals for this library, as they give the ability +to stay really close to the specification and to fix bugs easily. diff --git a/pydyf/__init__.py b/pydyf/__init__.py index 05dccf6..d8e1d7b 100755 --- a/pydyf/__init__.py +++ b/pydyf/__init__.py @@ -3,26 +3,27 @@ A low-level PDF generator. """ +import base64 import re import zlib from codecs import BOM_UTF16_BE +from hashlib import md5 +from math import ceil, log -VERSION = __version__ = '0.1.2' +VERSION = __version__ = '0.9.0' def _to_bytes(item): """Convert item to bytes.""" if isinstance(item, bytes): return item - elif isinstance(item, Object): - return item.data elif isinstance(item, float): if item.is_integer(): - return f'{int(item):d}'.encode('ascii') + return str(int(item)).encode('ascii') else: - return f'{item:f}'.encode('ascii') - elif isinstance(item, int): - return f'{item:d}'.encode('ascii') + return f'{item:f}'.rstrip('0').encode('ascii') + elif isinstance(item, Object): + return item.data return str(item).encode('ascii') @@ -42,51 +43,41 @@ class Object: @property def indirect(self): """Indirect representation of an object.""" - return b'\n'.join(( - str(self.number).encode() + b' ' + - str(self.generation).encode() + b' obj', - self.data, - b'endobj', - )) + header = f'{self.number} {self.generation} obj\n'.encode() + return header + self.data + b'\nendobj' @property def reference(self): """Object identifier.""" - return ( - str(self.number).encode() + b' ' + - str(self.generation).encode() + b' R') + return f'{self.number} {self.generation} R'.encode() @property def data(self): """Data contained in the object. Shall be defined in each subclass.""" raise NotImplementedError() + @property + def compressible(self): + """Whether the object can be included in an object stream.""" + return not self.generation and not isinstance(self, Stream) -class Dictionary(Object, dict): - """PDF Dictionary object. - - Inherits from :class:`Object` and Python :obj:`dict`. - """ +class Dictionary(Object, dict): + """PDF Dictionary object.""" def __init__(self, values=None): Object.__init__(self) dict.__init__(self, values or {}) @property def data(self): - result = [b'<<'] - for key, value in self.items(): - result.append(b'/' + _to_bytes(key) + b' ' + _to_bytes(value)) - result.append(b'>>') - return b'\n'.join(result) + result = [ + b'/' + _to_bytes(key) + b' ' + _to_bytes(value) + for key, value in self.items()] + return b'<<' + b''.join(result) + b'>>' class Stream(Object): - """PDF Stream object. - - Inherits from :class:`Object`. - - """ + """PDF Stream object.""" def __init__(self, stream=None, extra=None, compress=False): super().__init__() #: Python array of data composing stream. @@ -96,6 +87,15 @@ class Stream(Object): #: Compress the stream data if set to ``True``. Default is ``False``. self.compress = compress + def begin_marked_content(self, tag, property_list=None): + """Begin marked-content sequence.""" + self.stream.append(f'/{tag}') + if property_list is None: + self.stream.append(b'BMC') + else: + self.stream.append(property_list) + self.stream.append(b'BDC') + def begin_text(self): """Begin a text object.""" self.stream.append(b'BT') @@ -171,6 +171,10 @@ class Stream(Object): """End path without filling or stroking.""" self.stream.append(b'n') + def end_marked_content(self): + """End marked-content sequence.""" + self.stream.append(b'EMC') + def end_text(self): """End text object.""" self.stream.append(b'ET') @@ -207,6 +211,10 @@ class Stream(Object): """Begin new subpath by moving current point to ``(x, y)``.""" self.stream.append(b' '.join((_to_bytes(x), _to_bytes(y), b'm'))) + def move_text_to(self, x, y): + """Move text to next line at ``(x, y)`` distance from previous line.""" + self.stream.append(b' '.join((_to_bytes(x), _to_bytes(y), b'Td'))) + def shading(self, name): """Paint shape and color shading using shading dictionary ``name``.""" self.stream.append(b'/' + _to_bytes(name) + b' sh') @@ -271,6 +279,10 @@ class Stream(Object): """Set text rendering mode.""" self.stream.append(_to_bytes(mode) + b' Tr') + def set_text_rise(self, height): + """Set text rise.""" + self.stream.append(_to_bytes(height) + b' Ts') + def set_line_cap(self, line_cap): """Set line cap style.""" self.stream.append(_to_bytes(line_cap) + b' J') @@ -296,9 +308,13 @@ class Stream(Object): self.stream.append(b'/' + _to_bytes(state_name) + b' gs') def show_text(self, text): - """Show text.""" + """Show text strings with individual glyph positioning.""" self.stream.append(b'[' + _to_bytes(text) + b'] TJ') + def show_text_string(self, text): + """Show single text string.""" + self.stream.append(String(text).data + b' Tj') + def stroke(self): """Stroke path.""" self.stream.append(b'S') @@ -349,13 +365,51 @@ class Stream(Object): _to_bytes(a), _to_bytes(b), _to_bytes(c), _to_bytes(d), _to_bytes(e), _to_bytes(f), b'cm'))) + def inline_image(self, width, height, color_space, bpc, raw_data): + """Add an inline image. + + :param width: The width of the image. + :type width: :obj:`int` + :param height: The height of the image. + :type height: :obj:`int` + :param colorspace: The color space of the image, f.e. RGB, Gray. + :type colorspace: :obj:`str` + :param bpc: The bits per component. 1 for BW, 8 for grayscale. + :type bpc: :obj:`int` + :param raw_data: The raw pixel data. + + """ + if self.compress: + data = zlib.compress(raw_data) + else: + data = raw_data + enc_data = base64.a85encode(data) + self.stream.append( + b' '.join( + ( + b'BI', + b'/W', _to_bytes(width), + b'/H', _to_bytes(height), + b'/BPC', _to_bytes(bpc), + b'/CS', + b'/Device' + color_space.encode(), + b'/F', + b'[/A85 /Fl]' if self.compress else b'/A85', + b'/L', _to_bytes(len(enc_data) + 2), + b'ID', + enc_data + b'~>', + b'EI', + ) + ) + ) + @property def data(self): stream = b'\n'.join(_to_bytes(item) for item in self.stream) extra = Dictionary(self.extra.copy()) if self.compress: extra['Filter'] = '/FlateDecode' - compressobj = zlib.compressobj() + compressobj = zlib.compressobj(level=9) stream = compressobj.compress(stream) stream += compressobj.flush() extra['Length'] = len(stream) @@ -363,11 +417,7 @@ class Stream(Object): class String(Object): - """PDF String object. - - Inherits from :class:`Object`. - - """ + """PDF String object.""" def __init__(self, string=''): super().__init__() #: Unicode string. @@ -388,27 +438,30 @@ class String(Object): class Array(Object, list): - """PDF Array object. - - Inherits from :class:`Object` and Python :obj:`list`. - - """ + """PDF Array object.""" def __init__(self, array=None): Object.__init__(self) list.__init__(self, array or []) @property def data(self): - result = [b'['] - for child in self: - result.append(_to_bytes(child)) - result.append(b']') - return b' '.join(result) + return b'[' + b' '.join(_to_bytes(child) for child in self) + b']' class PDF: """PDF document.""" - def __init__(self): + def __init__(self, version=b'1.7', identifier=None): + """Create a PDF document. + + :param bytes version: PDF version. + :param bytes identifier: PDF file identifier. + + """ + #: PDF version, as :obj:`bytes`. + self.version = _to_bytes(version) + #: PDF file identifier. + self.identifier = identifier + #: Python :obj:`list` containing the PDF’s objects. self.objects = [] @@ -425,7 +478,7 @@ class PDF: }) self.add_object(self.pages) - #: PDF :class:`Dictionary` containing the PDF’s metadata. + #: PDF :class:`Dictionary` containing the PDF’s metadata. self.info = Dictionary({}) self.add_object(self.info) @@ -457,6 +510,12 @@ class PDF: object_.number = len(self.objects) self.objects.append(object_) + @property + def page_references(self): + return tuple( + f'{object_number} 0 R'.encode('ascii') + for object_number in self.pages['Kids'][::3]) + def write_line(self, content, output): """Write line to output. @@ -469,40 +528,127 @@ class PDF: self.current_position += len(content) + 1 output.write(content + b'\n') - def write(self, output): + def write(self, output, version=None, identifier=None, compress=False): """Write PDF to output. :param output: Output stream. :type output: binary :term:`file object` + :param bytes version: PDF version. + :param bytes identifier: PDF file identifier. + :param bool compress: whether the PDF uses a compressed object stream. """ + version = self.version if version is None else _to_bytes(version) + identifier = self.identifier if identifier is None else identifier + # Write header - self.write_line(b'%PDF-1.7', output) + self.write_line(b'%PDF-' + version, output) self.write_line(b'%\xf0\x9f\x96\xa4', output) - # Write all non-free PDF objects - for object_ in self.objects: - if object_.free == 'f': - continue - object_.offset = self.current_position - self.write_line(object_.indirect, output) - - # Write cross reference table - self.xref_position = self.current_position - self.write_line(b'xref', output) - self.write_line(f'0 {len(self.objects)}'.encode(), output) - for object_ in self.objects: - self.write_line( - (f'{object_.offset:010} {object_.generation:05} ' - f'{object_.free} ').encode(), output) - - # Write trailer - self.write_line(b'trailer', output) - self.write_line(b'<<', output) - self.write_line(f'/Size {len(self.objects)}'.encode(), output) - self.write_line(b'/Root ' + self.catalog.reference, output) - self.write_line(b'/Info ' + self.info.reference, output) - self.write_line(b'>>', output) + if version >= b'1.5' and compress: + # Store compressed objects for later and write other ones in PDF + compressed_objects = [] + for object_ in self.objects: + if object_.free == 'f': + continue + if object_.compressible: + compressed_objects.append(object_) + else: + object_.offset = self.current_position + self.write_line(object_.indirect, output) + + # Write compressed objects in object stream + stream = [[]] + position = 0 + for i, object_ in enumerate(compressed_objects): + data = object_.data + stream.append(data) + stream[0].append(object_.number) + stream[0].append(position) + position += len(data) + 1 + stream[0] = ' '.join(str(i) for i in stream[0]) + extra = { + 'Type': '/ObjStm', + 'N': len(compressed_objects), + 'First': len(stream[0]) + 1, + } + object_stream = Stream(stream, extra, compress) + object_stream.offset = self.current_position + self.add_object(object_stream) + self.write_line(object_stream.indirect, output) + + # Write cross-reference stream + xref = [] + dict_index = 0 + for object_ in self.objects: + if object_.compressible: + xref.append((2, object_stream.number, dict_index)) + dict_index += 1 + else: + xref.append(( + bool(object_.number), object_.offset, + object_.generation)) + xref.append((1, self.current_position, 0)) + + field2_size = ceil(log(self.current_position + 1, 256)) + max_generation = max( + object_.generation for object_ in self.objects) + field3_size = ceil(log( + max(max_generation, len(compressed_objects)) + 1, 256)) + xref_lengths = (1, field2_size, field3_size) + xref_stream = b''.join( + value.to_bytes(length, 'big') + for line in xref for length, value in zip(xref_lengths, line)) + extra = { + 'Type': '/XRef', + 'Index': Array((0, len(self.objects) + 1)), + 'W': Array(xref_lengths), + 'Size': len(self.objects) + 1, + 'Root': self.catalog.reference, + 'Info': self.info.reference, + } + if identifier is not None: + data = b''.join( + obj.data for obj in self.objects if obj.free != 'f') + data_hash = md5(data).hexdigest().encode() + extra['ID'] = Array(( + String(identifier).data, String(data_hash).data)) + dict_stream = Stream([xref_stream], extra, compress) + self.xref_position = dict_stream.offset = self.current_position + self.add_object(dict_stream) + self.write_line(dict_stream.indirect, output) + else: + # Write all non-free PDF objects + for object_ in self.objects: + if object_.free == 'f': + continue + object_.offset = self.current_position + self.write_line(object_.indirect, output) + + # Write cross-reference table + self.xref_position = self.current_position + self.write_line(b'xref', output) + self.write_line(f'0 {len(self.objects)}'.encode(), output) + for object_ in self.objects: + self.write_line( + (f'{object_.offset:010} {object_.generation:05} ' + f'{object_.free} ').encode(), output) + + # Write trailer + self.write_line(b'trailer', output) + self.write_line(b'<<', output) + self.write_line(f'/Size {len(self.objects)}'.encode(), output) + self.write_line(b'/Root ' + self.catalog.reference, output) + self.write_line(b'/Info ' + self.info.reference, output) + if identifier is not None: + data = b''.join( + obj.data for obj in self.objects if obj.free != 'f') + data_hash = md5(data).hexdigest().encode() + self.write_line( + b'/ID [' + String(identifier).data + b' ' + + String(data_hash).data + b']', output) + self.write_line(b'>>', output) + self.write_line(b'startxref', output) self.write_line(f'{self.xref_position}'.encode(), output) self.write_line(b'%%EOF', output) diff --git a/pyproject.toml b/pyproject.toml index 5a88369..2c17097 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ description = 'A low-level PDF generator.' keywords = ['pdf', 'generator'] authors = [{name = 'CourtBouillon', email = 'contact@courtbouillon.org'}] maintainers = [{name = 'CourtBouillon', email = 'contact@courtbouillon.org'}] -requires-python = '>=3.6' +requires-python = '>=3.7' readme = {file = 'README.rst', content-type = 'text/x-rst'} license = {file = 'LICENSE'} classifiers = [ @@ -19,10 +19,11 @@ classifiers = [ 'Programming Language :: Python', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3 :: Only', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy', ] @@ -38,14 +39,11 @@ Donation = 'https://opencollective.com/courtbouillon' [project.optional-dependencies] doc = ['sphinx', 'sphinx_rtd_theme'] -test = ['pytest', 'pytest-cov', 'pytest-flake8', 'pytest-isort', 'coverage[toml]', 'pillow'] +test = ['pytest', 'isort', 'flake8', 'pillow'] [tool.flit.sdist] exclude = ['.*'] -[tool.pytest.ini_options] -addopts = '--isort --flake8 --cov --no-cov-on-fail' - [tool.coverage.run] branch = true include = ['tests/*', 'pydyf/*'] diff --git a/setup.py b/setup.py deleted file mode 100644 index 40b63c0..0000000 --- a/setup.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python -# setup.py generated by flit for tools that don't yet use PEP 517 - -from distutils.core import setup - -packages = \ -['pydyf'] - -package_data = \ -{'': ['*']} - -extras_require = \ -{'doc': ['sphinx', 'sphinx_rtd_theme'], - 'test': ['pytest', - 'pytest-cov', - 'pytest-flake8', - 'pytest-isort', - 'coverage[toml]', - 'pillow']} - -setup(name='pydyf', - version='0.1.2', - description='A low-level PDF generator.', - author=None, - author_email='CourtBouillon <contact@courtbouillon.org>', - url=None, - packages=packages, - package_data=package_data, - extras_require=extras_require, - python_requires='>=3.6', - ) diff --git a/tests/test_pydyf.py b/tests/test_pydyf.py index 1e758a5..83c260c 100644 --- a/tests/test_pydyf.py +++ b/tests/test_pydyf.py @@ -1,3 +1,5 @@ +import io + import pydyf from . import assert_pixels @@ -283,10 +285,10 @@ def test_transform(): document = pydyf.PDF() draw = pydyf.Stream() + draw.transform(1, 0, 0, 1, 1, 1) draw.move_to(2, 2) draw.set_line_width(2) draw.line_to(2, 5) - draw.transform(1, 0, 0, 1, 1, 1) draw.stroke() document.add_object(draw) @@ -694,7 +696,7 @@ def test_text(): KKKKKKKKKK KKKKKKKKKK KKKKKKKKKK - __________ + zzzzzzzzzz __________ __________ __________ @@ -702,6 +704,20 @@ def test_text(): ''') +def test_identifier(): + document = pydyf.PDF() + pdf = io.BytesIO() + document.write(pdf, identifier=b'abc') + assert b'abc' in pdf.getvalue() + + +def test_version(): + document = pydyf.PDF() + pdf = io.BytesIO() + document.write(pdf, version=b'2.0') + assert b'2.0' in pdf.getvalue() + + def test_string_encoding(): assert pydyf.String('abc').data == b'(abc)' assert pydyf.String('déf').data == b'<feff006400e90066>' |