diff options
Diffstat (limited to 'vendor/setasign/fpdi/src/PdfParser/CrossReference')
6 files changed, 900 insertions, 0 deletions
diff --git a/vendor/setasign/fpdi/src/PdfParser/CrossReference/AbstractReader.php b/vendor/setasign/fpdi/src/PdfParser/CrossReference/AbstractReader.php new file mode 100644 index 0000000..b54237b --- /dev/null +++ b/vendor/setasign/fpdi/src/PdfParser/CrossReference/AbstractReader.php @@ -0,0 +1,95 @@ +<?php + +/** + * This file is part of FPDI + * + * @package setasign\Fpdi + * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com) + * @license http://opensource.org/licenses/mit-license The MIT License + */ + +namespace setasign\Fpdi\PdfParser\CrossReference; + +use setasign\Fpdi\PdfParser\PdfParser; +use setasign\Fpdi\PdfParser\Type\PdfDictionary; +use setasign\Fpdi\PdfParser\Type\PdfToken; +use setasign\Fpdi\PdfParser\Type\PdfTypeException; + +/** + * Abstract class for cross-reference reader classes. + */ +abstract class AbstractReader +{ + /** + * @var PdfParser + */ + protected $parser; + + /** + * @var PdfDictionary + */ + protected $trailer; + + /** + * AbstractReader constructor. + * + * @param PdfParser $parser + * @throws CrossReferenceException + * @throws PdfTypeException + */ + public function __construct(PdfParser $parser) + { + $this->parser = $parser; + $this->readTrailer(); + } + + /** + * Get the trailer dictionary. + * + * @return PdfDictionary + */ + public function getTrailer() + { + return $this->trailer; + } + + /** + * Read the trailer dictionary. + * + * @throws CrossReferenceException + * @throws PdfTypeException + */ + protected function readTrailer() + { + try { + $trailerKeyword = $this->parser->readValue(null, PdfToken::class); + if ($trailerKeyword->value !== 'trailer') { + throw new CrossReferenceException( + \sprintf( + 'Unexpected end of cross reference. "trailer"-keyword expected, got: %s.', + $trailerKeyword->value + ), + CrossReferenceException::UNEXPECTED_END + ); + } + } catch (PdfTypeException $e) { + throw new CrossReferenceException( + 'Unexpected end of cross reference. "trailer"-keyword expected, got an invalid object type.', + CrossReferenceException::UNEXPECTED_END, + $e + ); + } + + try { + $trailer = $this->parser->readValue(null, PdfDictionary::class); + } catch (PdfTypeException $e) { + throw new CrossReferenceException( + 'Unexpected end of cross reference. Trailer not found.', + CrossReferenceException::UNEXPECTED_END, + $e + ); + } + + $this->trailer = $trailer; + } +} diff --git a/vendor/setasign/fpdi/src/PdfParser/CrossReference/CrossReference.php b/vendor/setasign/fpdi/src/PdfParser/CrossReference/CrossReference.php new file mode 100644 index 0000000..c47e8c4 --- /dev/null +++ b/vendor/setasign/fpdi/src/PdfParser/CrossReference/CrossReference.php @@ -0,0 +1,326 @@ +<?php + +/** + * This file is part of FPDI + * + * @package setasign\Fpdi + * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com) + * @license http://opensource.org/licenses/mit-license The MIT License + */ + +namespace setasign\Fpdi\PdfParser\CrossReference; + +use setasign\Fpdi\PdfParser\PdfParser; +use setasign\Fpdi\PdfParser\Type\PdfDictionary; +use setasign\Fpdi\PdfParser\Type\PdfIndirectObject; +use setasign\Fpdi\PdfParser\Type\PdfNumeric; +use setasign\Fpdi\PdfParser\Type\PdfStream; +use setasign\Fpdi\PdfParser\Type\PdfToken; +use setasign\Fpdi\PdfParser\Type\PdfTypeException; + +/** + * Class CrossReference + * + * This class processes the standard cross reference of a PDF document. + */ +class CrossReference +{ + /** + * The byte length in which the "startxref" keyword should be searched. + * + * @var int + */ + public static $trailerSearchLength = 5500; + + /** + * @var int + */ + protected $fileHeaderOffset = 0; + + /** + * @var PdfParser + */ + protected $parser; + + /** + * @var ReaderInterface[] + */ + protected $readers = []; + + /** + * CrossReference constructor. + * + * @param PdfParser $parser + * @throws CrossReferenceException + * @throws PdfTypeException + */ + public function __construct(PdfParser $parser, $fileHeaderOffset = 0) + { + $this->parser = $parser; + $this->fileHeaderOffset = $fileHeaderOffset; + + $offset = $this->findStartXref(); + $reader = null; + /** @noinspection TypeUnsafeComparisonInspection */ + while ($offset != false) { // By doing an unsafe comparsion we ignore faulty references to byte offset 0 + try { + $reader = $this->readXref($offset + $this->fileHeaderOffset); + } catch (CrossReferenceException $e) { + // sometimes the file header offset is part of the byte offsets, so let's retry by resetting it to zero. + if ($e->getCode() === CrossReferenceException::INVALID_DATA && $this->fileHeaderOffset !== 0) { + $this->fileHeaderOffset = 0; + $reader = $this->readXref($offset + $this->fileHeaderOffset); + } else { + throw $e; + } + } + + $trailer = $reader->getTrailer(); + $this->checkForEncryption($trailer); + $this->readers[] = $reader; + + if (isset($trailer->value['Prev'])) { + $offset = $trailer->value['Prev']->value; + } else { + $offset = false; + } + } + + // fix faulty sub-section header + if ($reader instanceof FixedReader) { + /** + * @var FixedReader $reader + */ + $reader->fixFaultySubSectionShift(); + } + + if ($reader === null) { + throw new CrossReferenceException('No cross-reference found.', CrossReferenceException::NO_XREF_FOUND); + } + } + + /** + * Get the size of the cross reference. + * + * @return integer + */ + public function getSize() + { + return $this->getTrailer()->value['Size']->value; + } + + /** + * Get the trailer dictionary. + * + * @return PdfDictionary + */ + public function getTrailer() + { + return $this->readers[0]->getTrailer(); + } + + /** + * Get the cross reference readser instances. + * + * @return ReaderInterface[] + */ + public function getReaders() + { + return $this->readers; + } + + /** + * Get the offset by an object number. + * + * @param int $objectNumber + * @return integer|bool + */ + public function getOffsetFor($objectNumber) + { + foreach ($this->getReaders() as $reader) { + $offset = $reader->getOffsetFor($objectNumber); + if ($offset !== false) { + return $offset; + } + } + + return false; + } + + /** + * Get an indirect object by its object number. + * + * @param int $objectNumber + * @return PdfIndirectObject + * @throws CrossReferenceException + */ + public function getIndirectObject($objectNumber) + { + $offset = $this->getOffsetFor($objectNumber); + if ($offset === false) { + throw new CrossReferenceException( + \sprintf('Object (id:%s) not found.', $objectNumber), + CrossReferenceException::OBJECT_NOT_FOUND + ); + } + + $parser = $this->parser; + + $parser->getTokenizer()->clearStack(); + $parser->getStreamReader()->reset($offset + $this->fileHeaderOffset); + + try { + /** @var PdfIndirectObject $object */ + $object = $parser->readValue(null, PdfIndirectObject::class); + } catch (PdfTypeException $e) { + throw new CrossReferenceException( + \sprintf('Object (id:%s) not found at location (%s).', $objectNumber, $offset), + CrossReferenceException::OBJECT_NOT_FOUND, + $e + ); + } + + if ($object->objectNumber !== $objectNumber) { + throw new CrossReferenceException( + \sprintf('Wrong object found, got %s while %s was expected.', $object->objectNumber, $objectNumber), + CrossReferenceException::OBJECT_NOT_FOUND + ); + } + + return $object; + } + + /** + * Read the cross-reference table at a given offset. + * + * Internally the method will try to evaluate the best reader for this cross-reference. + * + * @param int $offset + * @return ReaderInterface + * @throws CrossReferenceException + * @throws PdfTypeException + */ + protected function readXref($offset) + { + $this->parser->getStreamReader()->reset($offset); + $this->parser->getTokenizer()->clearStack(); + $initValue = $this->parser->readValue(); + + return $this->initReaderInstance($initValue); + } + + /** + * Get a cross-reference reader instance. + * + * @param PdfToken|PdfIndirectObject $initValue + * @return ReaderInterface|bool + * @throws CrossReferenceException + * @throws PdfTypeException + */ + protected function initReaderInstance($initValue) + { + $position = $this->parser->getStreamReader()->getPosition() + + $this->parser->getStreamReader()->getOffset() + $this->fileHeaderOffset; + + if ($initValue instanceof PdfToken && $initValue->value === 'xref') { + try { + return new FixedReader($this->parser); + } catch (CrossReferenceException $e) { + $this->parser->getStreamReader()->reset($position); + $this->parser->getTokenizer()->clearStack(); + + return new LineReader($this->parser); + } + } + + if ($initValue instanceof PdfIndirectObject) { + try { + $stream = PdfStream::ensure($initValue->value); + } catch (PdfTypeException $e) { + throw new CrossReferenceException( + 'Invalid object type at xref reference offset.', + CrossReferenceException::INVALID_DATA, + $e + ); + } + + $type = PdfDictionary::get($stream->value, 'Type'); + if ($type->value !== 'XRef') { + throw new CrossReferenceException( + 'The xref position points to an incorrect object type.', + CrossReferenceException::INVALID_DATA + ); + } + + $this->checkForEncryption($stream->value); + + throw new CrossReferenceException( + 'This PDF document probably uses a compression technique which is not supported by the ' . + 'free parser shipped with FPDI. (See https://www.setasign.com/fpdi-pdf-parser for more details)', + CrossReferenceException::COMPRESSED_XREF + ); + } + + throw new CrossReferenceException( + 'The xref position points to an incorrect object type.', + CrossReferenceException::INVALID_DATA + ); + } + + /** + * Check for encryption. + * + * @param PdfDictionary $dictionary + * @throws CrossReferenceException + */ + protected function checkForEncryption(PdfDictionary $dictionary) + { + if (isset($dictionary->value['Encrypt'])) { + throw new CrossReferenceException( + 'This PDF document is encrypted and cannot be processed with FPDI.', + CrossReferenceException::ENCRYPTED + ); + } + } + + /** + * Find the start position for the first cross-reference. + * + * @return int The byte-offset position of the first cross-reference. + * @throws CrossReferenceException + */ + protected function findStartXref() + { + $reader = $this->parser->getStreamReader(); + $reader->reset(-self::$trailerSearchLength, self::$trailerSearchLength); + + $buffer = $reader->getBuffer(false); + $pos = \strrpos($buffer, 'startxref'); + $addOffset = 9; + if ($pos === false) { + // Some corrupted documents uses startref, instead of startxref + $pos = \strrpos($buffer, 'startref'); + if ($pos === false) { + throw new CrossReferenceException( + 'Unable to find pointer to xref table', + CrossReferenceException::NO_STARTXREF_FOUND + ); + } + $addOffset = 8; + } + + $reader->setOffset($pos + $addOffset); + + try { + $value = $this->parser->readValue(null, PdfNumeric::class); + } catch (PdfTypeException $e) { + throw new CrossReferenceException( + 'Invalid data after startxref keyword.', + CrossReferenceException::INVALID_DATA, + $e + ); + } + + return $value->value; + } +} diff --git a/vendor/setasign/fpdi/src/PdfParser/CrossReference/CrossReferenceException.php b/vendor/setasign/fpdi/src/PdfParser/CrossReference/CrossReferenceException.php new file mode 100644 index 0000000..7d88d5d --- /dev/null +++ b/vendor/setasign/fpdi/src/PdfParser/CrossReference/CrossReferenceException.php @@ -0,0 +1,79 @@ +<?php + +/** + * This file is part of FPDI + * + * @package setasign\Fpdi + * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com) + * @license http://opensource.org/licenses/mit-license The MIT License + */ + +namespace setasign\Fpdi\PdfParser\CrossReference; + +use setasign\Fpdi\PdfParser\PdfParserException; + +/** + * Exception used by the CrossReference and Reader classes. + */ +class CrossReferenceException extends PdfParserException +{ + /** + * @var int + */ + const INVALID_DATA = 0x0101; + + /** + * @var int + */ + const XREF_MISSING = 0x0102; + + /** + * @var int + */ + const ENTRIES_TOO_LARGE = 0x0103; + + /** + * @var int + */ + const ENTRIES_TOO_SHORT = 0x0104; + + /** + * @var int + */ + const NO_ENTRIES = 0x0105; + + /** + * @var int + */ + const NO_TRAILER_FOUND = 0x0106; + + /** + * @var int + */ + const NO_STARTXREF_FOUND = 0x0107; + + /** + * @var int + */ + const NO_XREF_FOUND = 0x0108; + + /** + * @var int + */ + const UNEXPECTED_END = 0x0109; + + /** + * @var int + */ + const OBJECT_NOT_FOUND = 0x010A; + + /** + * @var int + */ + const COMPRESSED_XREF = 0x010B; + + /** + * @var int + */ + const ENCRYPTED = 0x010C; +} diff --git a/vendor/setasign/fpdi/src/PdfParser/CrossReference/FixedReader.php b/vendor/setasign/fpdi/src/PdfParser/CrossReference/FixedReader.php new file mode 100644 index 0000000..883feec --- /dev/null +++ b/vendor/setasign/fpdi/src/PdfParser/CrossReference/FixedReader.php @@ -0,0 +1,199 @@ +<?php + +/** + * This file is part of FPDI + * + * @package setasign\Fpdi + * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com) + * @license http://opensource.org/licenses/mit-license The MIT License + */ + +namespace setasign\Fpdi\PdfParser\CrossReference; + +use setasign\Fpdi\PdfParser\PdfParser; +use setasign\Fpdi\PdfParser\StreamReader; + +/** + * Class FixedReader + * + * This reader allows a very less overhead parsing of single entries of the cross-reference, because the main entries + * are only read when needed and not in a single run. + */ +class FixedReader extends AbstractReader implements ReaderInterface +{ + /** + * @var StreamReader + */ + protected $reader; + + /** + * Data of subsections. + * + * @var array + */ + protected $subSections; + + /** + * FixedReader constructor. + * + * @param PdfParser $parser + * @throws CrossReferenceException + */ + public function __construct(PdfParser $parser) + { + $this->reader = $parser->getStreamReader(); + $this->read(); + parent::__construct($parser); + } + + /** + * Get all subsection data. + * + * @return array + */ + public function getSubSections() + { + return $this->subSections; + } + + /** + * @inheritdoc + */ + public function getOffsetFor($objectNumber) + { + foreach ($this->subSections as $offset => list($startObject, $objectCount)) { + /** + * @var int $startObject + * @var int $objectCount + */ + if ($objectNumber >= $startObject && $objectNumber < ($startObject + $objectCount)) { + $position = $offset + 20 * ($objectNumber - $startObject); + $this->reader->ensure($position, 20); + $line = $this->reader->readBytes(20); + if ($line[17] === 'f') { + return false; + } + + return (int) \substr($line, 0, 10); + } + } + + return false; + } + + /** + * Read the cross-reference. + * + * This reader will only read the subsections in this method. The offsets were resolved individually by this + * information. + * + * @throws CrossReferenceException + */ + protected function read() + { + $subSections = []; + + $startObject = $entryCount = $lastLineStart = null; + $validityChecked = false; + while (($line = $this->reader->readLine(20)) !== false) { + if (\strpos($line, 'trailer') !== false) { + $this->reader->reset($lastLineStart); + break; + } + + // jump over if line content doesn't match the expected string + if (\sscanf($line, '%d %d', $startObject, $entryCount) !== 2) { + continue; + } + + $oldPosition = $this->reader->getPosition(); + $position = $oldPosition + $this->reader->getOffset(); + + if (!$validityChecked && $entryCount > 0) { + $nextLine = $this->reader->readBytes(21); + /* Check the next line for maximum of 20 bytes and not longer + * By catching 21 bytes and trimming the length should be still 21. + */ + if (\strlen(\trim($nextLine)) !== 21) { + throw new CrossReferenceException( + 'Cross-reference entries are larger than 20 bytes.', + CrossReferenceException::ENTRIES_TOO_LARGE + ); + } + + /* Check for less than 20 bytes: cut the line to 20 bytes and trim; have to result in exactly 18 bytes. + * If it would have less bytes the substring would get the first bytes of the next line which would + * evaluate to a 20 bytes long string after trimming. + */ + if (\strlen(\trim(\substr($nextLine, 0, 20))) !== 18) { + throw new CrossReferenceException( + 'Cross-reference entries are less than 20 bytes.', + CrossReferenceException::ENTRIES_TOO_SHORT + ); + } + + $validityChecked = true; + } + + $subSections[$position] = [$startObject, $entryCount]; + + $lastLineStart = $position + $entryCount * 20; + $this->reader->reset($lastLineStart); + } + + // reset after the last correct parsed line + $this->reader->reset($lastLineStart); + + if (\count($subSections) === 0) { + throw new CrossReferenceException( + 'No entries found in cross-reference.', + CrossReferenceException::NO_ENTRIES + ); + } + + $this->subSections = $subSections; + } + + /** + * Fixes an invalid object number shift. + * + * This method can be used to repair documents with an invalid subsection header: + * + * <code> + * xref + * 1 7 + * 0000000000 65535 f + * 0000000009 00000 n + * 0000412075 00000 n + * 0000412172 00000 n + * 0000412359 00000 n + * 0000412417 00000 n + * 0000412468 00000 n + * </code> + * + * It shall only be called on the first table. + * + * @return bool + */ + public function fixFaultySubSectionShift() + { + $subSections = $this->getSubSections(); + if (\count($subSections) > 1) { + return false; + } + + $subSection = \current($subSections); + if ($subSection[0] != 1) { + return false; + } + + if ($this->getOffsetFor(1) === false) { + foreach ($subSections as $offset => list($startObject, $objectCount)) { + $this->subSections[$offset] = [$startObject - 1, $objectCount]; + } + return true; + } + + return false; + } +} diff --git a/vendor/setasign/fpdi/src/PdfParser/CrossReference/LineReader.php b/vendor/setasign/fpdi/src/PdfParser/CrossReference/LineReader.php new file mode 100644 index 0000000..b6f0e42 --- /dev/null +++ b/vendor/setasign/fpdi/src/PdfParser/CrossReference/LineReader.php @@ -0,0 +1,167 @@ +<?php + +/** + * This file is part of FPDI + * + * @package setasign\Fpdi + * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com) + * @license http://opensource.org/licenses/mit-license The MIT License + */ + +namespace setasign\Fpdi\PdfParser\CrossReference; + +use setasign\Fpdi\PdfParser\PdfParser; +use setasign\Fpdi\PdfParser\StreamReader; + +/** + * Class LineReader + * + * This reader class read all cross-reference entries in a single run. + * It supports reading cross-references with e.g. invalid data (e.g. entries with a length < or > 20 bytes). + */ +class LineReader extends AbstractReader implements ReaderInterface +{ + /** + * The object offsets. + * + * @var array + */ + protected $offsets; + + /** + * LineReader constructor. + * + * @param PdfParser $parser + * @throws CrossReferenceException + */ + public function __construct(PdfParser $parser) + { + $this->read($this->extract($parser->getStreamReader())); + parent::__construct($parser); + } + + /** + * @inheritdoc + */ + public function getOffsetFor($objectNumber) + { + if (isset($this->offsets[$objectNumber])) { + return $this->offsets[$objectNumber][0]; + } + + return false; + } + + /** + * Get all found offsets. + * + * @return array + */ + public function getOffsets() + { + return $this->offsets; + } + + /** + * Extracts the cross reference data from the stream reader. + * + * @param StreamReader $reader + * @return string + * @throws CrossReferenceException + */ + protected function extract(StreamReader $reader) + { + $bytesPerCycle = 100; + $reader->reset(null, $bytesPerCycle); + + $cycles = 0; + do { + // 6 = length of "trailer" - 1 + $pos = \max(($bytesPerCycle * $cycles) - 6, 0); + $trailerPos = \strpos($reader->getBuffer(false), 'trailer', $pos); + $cycles++; + } while ($trailerPos === false && $reader->increaseLength($bytesPerCycle) !== false); + + if ($trailerPos === false) { + throw new CrossReferenceException( + 'Unexpected end of cross reference. "trailer"-keyword not found.', + CrossReferenceException::NO_TRAILER_FOUND + ); + } + + $xrefContent = \substr($reader->getBuffer(false), 0, $trailerPos); + $reader->reset($reader->getPosition() + $trailerPos); + + return $xrefContent; + } + + /** + * Read the cross-reference entries. + * + * @param string $xrefContent + * @throws CrossReferenceException + */ + protected function read($xrefContent) + { + // get eol markers in the first 100 bytes + \preg_match_all("/(\r\n|\n|\r)/", \substr($xrefContent, 0, 100), $m); + + if (\count($m[0]) === 0) { + throw new CrossReferenceException( + 'No data found in cross-reference.', + CrossReferenceException::INVALID_DATA + ); + } + + // count(array_count_values()) is faster then count(array_unique()) + // @see https://github.com/symfony/symfony/pull/23731 + // can be reverted for php7.2 + $differentLineEndings = \count(\array_count_values($m[0])); + if ($differentLineEndings > 1) { + $lines = \preg_split("/(\r\n|\n|\r)/", $xrefContent, -1, PREG_SPLIT_NO_EMPTY); + } else { + $lines = \explode($m[0][0], $xrefContent); + } + + unset($differentLineEndings, $m); + if (!\is_array($lines)) { + $this->offsets = []; + return; + } + + $start = 0; + $offsets = []; + + // trim all lines and remove empty lines + $lines = \array_filter(\array_map('\trim', $lines)); + foreach ($lines as $line) { + $pieces = \explode(' ', $line); + + switch (\count($pieces)) { + case 2: + $start = (int) $pieces[0]; + break; + + case 3: + switch ($pieces[2]) { + case 'n': + $offsets[$start] = [(int) $pieces[0], (int) $pieces[1]]; + $start++; + break 2; + case 'f': + $start++; + break 2; + } + // fall through if pieces doesn't match + + default: + throw new CrossReferenceException( + \sprintf('Unexpected data in xref table (%s)', \implode(' ', $pieces)), + CrossReferenceException::INVALID_DATA + ); + } + } + + $this->offsets = $offsets; + } +} diff --git a/vendor/setasign/fpdi/src/PdfParser/CrossReference/ReaderInterface.php b/vendor/setasign/fpdi/src/PdfParser/CrossReference/ReaderInterface.php new file mode 100644 index 0000000..d2dfdd0 --- /dev/null +++ b/vendor/setasign/fpdi/src/PdfParser/CrossReference/ReaderInterface.php @@ -0,0 +1,34 @@ +<?php + +/** + * This file is part of FPDI + * + * @package setasign\Fpdi + * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com) + * @license http://opensource.org/licenses/mit-license The MIT License + */ + +namespace setasign\Fpdi\PdfParser\CrossReference; + +use setasign\Fpdi\PdfParser\Type\PdfDictionary; + +/** + * ReaderInterface for cross-reference readers. + */ +interface ReaderInterface +{ + /** + * Get an offset by an object number. + * + * @param int $objectNumber + * @return int|bool False if the offset was not found. + */ + public function getOffsetFor($objectNumber); + + /** + * Get the trailer related to this cross reference. + * + * @return PdfDictionary + */ + public function getTrailer(); +} |