diff options
Diffstat (limited to 'gfx/skia/skia/src/pdf/SkPDFMakeToUnicodeCmap.cpp')
-rw-r--r-- | gfx/skia/skia/src/pdf/SkPDFMakeToUnicodeCmap.cpp | 220 |
1 files changed, 220 insertions, 0 deletions
diff --git a/gfx/skia/skia/src/pdf/SkPDFMakeToUnicodeCmap.cpp b/gfx/skia/skia/src/pdf/SkPDFMakeToUnicodeCmap.cpp new file mode 100644 index 0000000000..e6d6c6f06c --- /dev/null +++ b/gfx/skia/skia/src/pdf/SkPDFMakeToUnicodeCmap.cpp @@ -0,0 +1,220 @@ +/* + * Copyright 2011 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "src/pdf/SkPDFMakeToUnicodeCmap.h" + +#include "include/private/base/SkTo.h" +#include "src/base/SkUTF.h" +#include "src/pdf/SkPDFUtils.h" + +static void append_tounicode_header(SkDynamicMemoryWStream* cmap, + bool multibyte) { + // 12 dict begin: 12 is an Adobe-suggested value. Shall not change. + // It's there to prevent old version Adobe Readers from malfunctioning. + const char* kHeader = + "/CIDInit /ProcSet findresource begin\n" + "12 dict begin\n" + "begincmap\n"; + cmap->writeText(kHeader); + + // The /CIDSystemInfo must be consistent to the one in + // SkPDFFont::populateCIDFont(). + // We can not pass over the system info object here because the format is + // different. This is not a reference object. + const char* kSysInfo = + "/CIDSystemInfo\n" + "<< /Registry (Adobe)\n" + "/Ordering (UCS)\n" + "/Supplement 0\n" + ">> def\n"; + cmap->writeText(kSysInfo); + + // The CMapName must be consistent to /CIDSystemInfo above. + // /CMapType 2 means ToUnicode. + // Codespace range just tells the PDF processor the valid range. + const char* kTypeInfoHeader = + "/CMapName /Adobe-Identity-UCS def\n" + "/CMapType 2 def\n" + "1 begincodespacerange\n"; + cmap->writeText(kTypeInfoHeader); + if (multibyte) { + cmap->writeText("<0000> <FFFF>\n"); + } else { + cmap->writeText("<00> <FF>\n"); + } + cmap->writeText("endcodespacerange\n"); +} + +static void append_cmap_footer(SkDynamicMemoryWStream* cmap) { + const char kFooter[] = + "endcmap\n" + "CMapName currentdict /CMap defineresource pop\n" + "end\n" + "end"; + cmap->writeText(kFooter); +} + +namespace { +struct BFChar { + SkGlyphID fGlyphId; + SkUnichar fUnicode; +}; + +struct BFRange { + SkGlyphID fStart; + SkGlyphID fEnd; + SkUnichar fUnicode; +}; +} // namespace + +static void write_glyph(SkDynamicMemoryWStream* cmap, + bool multiByte, + SkGlyphID gid) { + if (multiByte) { + SkPDFUtils::WriteUInt16BE(cmap, gid); + } else { + SkPDFUtils::WriteUInt8(cmap, SkToU8(gid)); + } +} + +static void append_bfchar_section(const std::vector<BFChar>& bfchar, + bool multiByte, + SkDynamicMemoryWStream* cmap) { + // PDF spec defines that every bf* list can have at most 100 entries. + for (size_t i = 0; i < bfchar.size(); i += 100) { + int count = SkToInt(bfchar.size() - i); + count = std::min(count, 100); + cmap->writeDecAsText(count); + cmap->writeText(" beginbfchar\n"); + for (int j = 0; j < count; ++j) { + cmap->writeText("<"); + write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId); + cmap->writeText("> <"); + SkPDFUtils::WriteUTF16beHex(cmap, bfchar[i + j].fUnicode); + cmap->writeText(">\n"); + } + cmap->writeText("endbfchar\n"); + } +} + +static void append_bfrange_section(const std::vector<BFRange>& bfrange, + bool multiByte, + SkDynamicMemoryWStream* cmap) { + // PDF spec defines that every bf* list can have at most 100 entries. + for (size_t i = 0; i < bfrange.size(); i += 100) { + int count = SkToInt(bfrange.size() - i); + count = std::min(count, 100); + cmap->writeDecAsText(count); + cmap->writeText(" beginbfrange\n"); + for (int j = 0; j < count; ++j) { + cmap->writeText("<"); + write_glyph(cmap, multiByte, bfrange[i + j].fStart); + cmap->writeText("> <"); + write_glyph(cmap, multiByte, bfrange[i + j].fEnd); + cmap->writeText("> <"); + SkPDFUtils::WriteUTF16beHex(cmap, bfrange[i + j].fUnicode); + cmap->writeText(">\n"); + } + cmap->writeText("endbfrange\n"); + } +} + +// Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe +// Technote 5014. +// The function is not static so we can test it in unit tests. +// +// Current implementation guarantees bfchar and bfrange entries do not overlap. +// +// Current implementation does not attempt aggressive optimizations against +// following case because the specification is not clear. +// +// 4 beginbfchar 1 beginbfchar +// <0003> <0013> <0020> <0014> +// <0005> <0015> to endbfchar +// <0007> <0017> 1 beginbfrange +// <0020> <0014> <0003> <0007> <0013> +// endbfchar endbfrange +// +// Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may +// overlap, but succeeding maps supersede preceding maps." +// +// In case of searching text in PDF, bfrange will have higher precedence so +// typing char id 0x0014 in search box will get glyph id 0x0004 first. However, +// the spec does not mention how will this kind of conflict being resolved. +// +// For the worst case (having 65536 continuous unicode and we use every other +// one of them), the possible savings by aggressive optimization is 416KB +// pre-compressed and does not provide enough motivation for implementation. +void SkPDFAppendCmapSections(const SkUnichar* glyphToUnicode, + const SkPDFGlyphUse* subset, + SkDynamicMemoryWStream* cmap, + bool multiByteGlyphs, + SkGlyphID firstGlyphID, + SkGlyphID lastGlyphID) { + int glyphOffset = 0; + if (!multiByteGlyphs) { + glyphOffset = firstGlyphID - 1; + } + + std::vector<BFChar> bfcharEntries; + std::vector<BFRange> bfrangeEntries; + + BFRange currentRangeEntry = {0, 0, 0}; + bool rangeEmpty = true; + const int limit = (int)lastGlyphID + 1 - glyphOffset; + + for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) { + SkGlyphID gid = i + glyphOffset; + bool inSubset = i < limit && (subset == nullptr || subset->has(gid)); + if (!rangeEmpty) { + // PDF spec requires bfrange not changing the higher byte, + // e.g. <1035> <10FF> <2222> is ok, but + // <1035> <1100> <2222> is no good + bool inRange = + i == currentRangeEntry.fEnd + 1 && + i >> 8 == currentRangeEntry.fStart >> 8 && + i < limit && + glyphToUnicode[gid] == + currentRangeEntry.fUnicode + i - currentRangeEntry.fStart; + if (!inSubset || !inRange) { + if (currentRangeEntry.fEnd > currentRangeEntry.fStart) { + bfrangeEntries.push_back(currentRangeEntry); + } else { + bfcharEntries.push_back({currentRangeEntry.fStart, currentRangeEntry.fUnicode}); + } + rangeEmpty = true; + } + } + if (inSubset) { + currentRangeEntry.fEnd = i; + if (rangeEmpty) { + currentRangeEntry.fStart = i; + currentRangeEntry.fUnicode = glyphToUnicode[gid]; + rangeEmpty = false; + } + } + } + + // The spec requires all bfchar entries for a font must come before bfrange + // entries. + append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap); + append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap); +} + +std::unique_ptr<SkStreamAsset> SkPDFMakeToUnicodeCmap( + const SkUnichar* glyphToUnicode, + const SkPDFGlyphUse* subset, + bool multiByteGlyphs, + SkGlyphID firstGlyphID, + SkGlyphID lastGlyphID) { + SkDynamicMemoryWStream cmap; + append_tounicode_header(&cmap, multiByteGlyphs); + SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs, + firstGlyphID, lastGlyphID); + append_cmap_footer(&cmap); + return cmap.detachAsStream(); +} |