1 files changed, 220 insertions, 0 deletions
diff --git a/gfx/skia/skia/src/pdf/SkPDFMakeToUnicodeCmap.cpp b/gfx/skia/skia/src/pdf/SkPDFMakeToUnicodeCmap.cpp
new file mode 100644
index 0000000000..e6d6c6f06c
--- /dev/null
+++ b/gfx/skia/skia/src/pdf/SkPDFMakeToUnicodeCmap.cpp
@@ -0,0 +1,220 @@
+/*
+ * Copyright 2011 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "src/pdf/SkPDFMakeToUnicodeCmap.h"
+
+#include "include/private/base/SkTo.h"
+#include "src/base/SkUTF.h"
+#include "src/pdf/SkPDFUtils.h"
+
+static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
+                                    bool multibyte) {
+    // 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
+    // It's there to prevent old version Adobe Readers from malfunctioning.
+    const char* kHeader =
+        "/CIDInit /ProcSet findresource begin\n"
+        "12 dict begin\n"
+        "begincmap\n";
+    cmap->writeText(kHeader);
+
+    // The /CIDSystemInfo must be consistent to the one in
+    // SkPDFFont::populateCIDFont().
+    // We can not pass over the system info object here because the format is
+    // different. This is not a reference object.
+    const char* kSysInfo =
+        "/CIDSystemInfo\n"
+        "<<  /Registry (Adobe)\n"
+        "/Ordering (UCS)\n"
+        "/Supplement 0\n"
+        ">> def\n";
+    cmap->writeText(kSysInfo);
+
+    // The CMapName must be consistent to /CIDSystemInfo above.
+    // /CMapType 2 means ToUnicode.
+    // Codespace range just tells the PDF processor the valid range.
+    const char* kTypeInfoHeader =
+        "/CMapName /Adobe-Identity-UCS def\n"
+        "/CMapType 2 def\n"
+        "1 begincodespacerange\n";
+    cmap->writeText(kTypeInfoHeader);
+    if (multibyte) {
+        cmap->writeText("<0000> <FFFF>\n");
+    } else {
+        cmap->writeText("<00> <FF>\n");
+    }
+    cmap->writeText("endcodespacerange\n");
+}
+
+static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
+    const char kFooter[] =
+        "endcmap\n"
+        "CMapName currentdict /CMap defineresource pop\n"
+        "end\n"
+        "end";
+    cmap->writeText(kFooter);
+}
+
+namespace {
+struct BFChar {
+    SkGlyphID fGlyphId;
+    SkUnichar fUnicode;
+};
+
+struct BFRange {
+    SkGlyphID fStart;
+    SkGlyphID fEnd;
+    SkUnichar fUnicode;
+};
+}  // namespace
+
+static void write_glyph(SkDynamicMemoryWStream* cmap,
+                        bool multiByte,
+                        SkGlyphID gid) {
+    if (multiByte) {
+        SkPDFUtils::WriteUInt16BE(cmap, gid);
+    } else {
+        SkPDFUtils::WriteUInt8(cmap, SkToU8(gid));
+    }
+}
+
+static void append_bfchar_section(const std::vector<BFChar>& bfchar,
+                                  bool multiByte,
+                                  SkDynamicMemoryWStream* cmap) {
+    // PDF spec defines that every bf* list can have at most 100 entries.
+    for (size_t i = 0; i < bfchar.size(); i += 100) {
+        int count = SkToInt(bfchar.size() - i);
+        count = std::min(count, 100);
+        cmap->writeDecAsText(count);
+        cmap->writeText(" beginbfchar\n");
+        for (int j = 0; j < count; ++j) {
+            cmap->writeText("<");
+            write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId);
+            cmap->writeText("> <");
+            SkPDFUtils::WriteUTF16beHex(cmap, bfchar[i + j].fUnicode);
+            cmap->writeText(">\n");
+        }
+        cmap->writeText("endbfchar\n");
+    }
+}
+
+static void append_bfrange_section(const std::vector<BFRange>& bfrange,
+                                   bool multiByte,
+                                   SkDynamicMemoryWStream* cmap) {
+    // PDF spec defines that every bf* list can have at most 100 entries.
+    for (size_t i = 0; i < bfrange.size(); i += 100) {
+        int count = SkToInt(bfrange.size() - i);
+        count = std::min(count, 100);
+        cmap->writeDecAsText(count);
+        cmap->writeText(" beginbfrange\n");
+        for (int j = 0; j < count; ++j) {
+            cmap->writeText("<");
+            write_glyph(cmap, multiByte, bfrange[i + j].fStart);
+            cmap->writeText("> <");
+            write_glyph(cmap, multiByte, bfrange[i + j].fEnd);
+            cmap->writeText("> <");
+            SkPDFUtils::WriteUTF16beHex(cmap, bfrange[i + j].fUnicode);
+            cmap->writeText(">\n");
+        }
+        cmap->writeText("endbfrange\n");
+    }
+}
+
+// Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe
+// Technote 5014.
+// The function is not static so we can test it in unit tests.
+//
+// Current implementation guarantees bfchar and bfrange entries do not overlap.
+//
+// Current implementation does not attempt aggressive optimizations against
+// following case because the specification is not clear.
+//
+// 4 beginbfchar          1 beginbfchar
+// <0003> <0013>          <0020> <0014>
+// <0005> <0015>    to    endbfchar
+// <0007> <0017>          1 beginbfrange
+// <0020> <0014>          <0003> <0007> <0013>
+// endbfchar              endbfrange
+//
+// Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may
+// overlap, but succeeding maps supersede preceding maps."
+//
+// In case of searching text in PDF, bfrange will have higher precedence so
+// typing char id 0x0014 in search box will get glyph id 0x0004 first.  However,
+// the spec does not mention how will this kind of conflict being resolved.
+//
+// For the worst case (having 65536 continuous unicode and we use every other
+// one of them), the possible savings by aggressive optimization is 416KB
+// pre-compressed and does not provide enough motivation for implementation.
+void SkPDFAppendCmapSections(const SkUnichar* glyphToUnicode,
+                             const SkPDFGlyphUse* subset,
+                             SkDynamicMemoryWStream* cmap,
+                             bool multiByteGlyphs,
+                             SkGlyphID firstGlyphID,
+                             SkGlyphID lastGlyphID) {
+    int glyphOffset = 0;
+    if (!multiByteGlyphs) {
+        glyphOffset = firstGlyphID - 1;
+    }
+
+    std::vector<BFChar> bfcharEntries;
+    std::vector<BFRange> bfrangeEntries;
+
+    BFRange currentRangeEntry = {0, 0, 0};
+    bool rangeEmpty = true;
+    const int limit = (int)lastGlyphID + 1 - glyphOffset;
+
+    for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) {
+        SkGlyphID gid = i + glyphOffset;
+        bool inSubset = i < limit && (subset == nullptr || subset->has(gid));
+        if (!rangeEmpty) {
+            // PDF spec requires bfrange not changing the higher byte,
+            // e.g. <1035> <10FF> <2222> is ok, but
+            //      <1035> <1100> <2222> is no good
+            bool inRange =
+                i == currentRangeEntry.fEnd + 1 &&
+                i >> 8 == currentRangeEntry.fStart >> 8 &&
+                i < limit &&
+                glyphToUnicode[gid] ==
+                    currentRangeEntry.fUnicode + i - currentRangeEntry.fStart;
+            if (!inSubset || !inRange) {
+                if (currentRangeEntry.fEnd > currentRangeEntry.fStart) {
+                    bfrangeEntries.push_back(currentRangeEntry);
+                } else {
+                    bfcharEntries.push_back({currentRangeEntry.fStart, currentRangeEntry.fUnicode});
+                }
+                rangeEmpty = true;
+            }
+        }
+        if (inSubset) {
+            currentRangeEntry.fEnd = i;
+            if (rangeEmpty) {
+              currentRangeEntry.fStart = i;
+              currentRangeEntry.fUnicode = glyphToUnicode[gid];
+              rangeEmpty = false;
+            }
+        }
+    }
+
+    // The spec requires all bfchar entries for a font must come before bfrange
+    // entries.
+    append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap);
+    append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap);
+}
+
+std::unique_ptr<SkStreamAsset> SkPDFMakeToUnicodeCmap(
+        const SkUnichar* glyphToUnicode,
+        const SkPDFGlyphUse* subset,
+        bool multiByteGlyphs,
+        SkGlyphID firstGlyphID,
+        SkGlyphID lastGlyphID) {
+    SkDynamicMemoryWStream cmap;
+    append_tounicode_header(&cmap, multiByteGlyphs);
+    SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
+                            firstGlyphID, lastGlyphID);
+    append_cmap_footer(&cmap);
+    return cmap.detachAsStream();
+}