// Copyright 2013 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Author: dsites@google.com (Dick Sites) // // #include "offsetmap.h" #include // for strcmp #include // for fprintf, stderr, fclose, etc #include // for min using namespace std; namespace CLD2 { // Constructor, destructor OffsetMap::OffsetMap() { Clear(); } OffsetMap::~OffsetMap() { } // Clear the map // After: // next_diff_sub_ is 0 // Windows are the a and a' ranges covered by diffs_[next_diff_sub_-1] // which is a fake range of width 0 mapping 0=>0 void OffsetMap::Clear() { diffs_.clear(); pending_op_ = COPY_OP; pending_length_ = 0; next_diff_sub_ = 0; current_lo_aoffset_ = 0; current_hi_aoffset_ = 0; current_lo_aprimeoffset_ = 0; current_hi_aprimeoffset_ = 0; current_diff_ = 0; max_aoffset_ = 0; // Largest seen so far max_aprimeoffset_ = 0; // Largest seen so far } static inline char OpPart(const char c) { return (c >> 6) & 3; } static inline char LenPart(const char c) { return c & 0x3f; } // Print map to file, for debugging void OffsetMap::Printmap(const char* filename) { FILE* fout; bool needs_close = false; if (strcmp(filename, "stdout") == 0) { fout = stdout; } else if (strcmp(filename, "stderr") == 0) { fout = stderr; } else { fout = fopen(filename, "w"); needs_close = true; } if (fout == NULL) { fprintf(stderr, "%s did not open\n", filename); return; } Flush(); // Make sure any pending entry gets printed fprintf(fout, "Offsetmap: %ld bytes\n", diffs_.size()); for (int i = 0; i < static_cast(diffs_.size()); ++i) { fprintf(fout, "%c%02d ", "&=+-"[OpPart(diffs_[i])], LenPart(diffs_[i])); if ((i % 20) == 19) {fprintf(fout, "\n");} } fprintf(fout, "\n"); if (needs_close) { fclose(fout); } } // Reset to offset 0 void OffsetMap::Reset() { MaybeFlushAll(); next_diff_sub_ = 0; current_lo_aoffset_ = 0; current_hi_aoffset_ = 0; current_lo_aprimeoffset_ = 0; current_hi_aprimeoffset_ = 0; current_diff_ = 0; } // Add to mapping from A to A', specifying how many next bytes are // identical in A and A' void OffsetMap::Copy(int bytes) { if (bytes == 0) {return;} max_aoffset_ += bytes; // Largest seen so far max_aprimeoffset_ += bytes; // Largest seen so far if (pending_op_ == COPY_OP) { pending_length_ += bytes; } else { Flush(); pending_op_ = COPY_OP; pending_length_ = bytes; } } // Add to mapping from A to A', specifying how many next bytes are // inserted in A' while not advancing in A at all void OffsetMap::Insert(int bytes){ if (bytes == 0) {return;} max_aprimeoffset_ += bytes; // Largest seen so far if (pending_op_ == INSERT_OP) { pending_length_ += bytes; } else if ((bytes == 1) && (pending_op_ == DELETE_OP) && (pending_length_ == 1)) { // Special-case exactly delete(1) insert(1) +> copy(1); // all others backmap inserts to after deletes pending_op_ = COPY_OP; } else { Flush(); pending_op_ = INSERT_OP; pending_length_ = bytes; } } // Add to mapping from A to A', specifying how many next bytes are // deleted from A while not advancing in A' at all void OffsetMap::Delete(int bytes){ if (bytes == 0) {return;} max_aoffset_ += bytes; // Largest seen so far if (pending_op_ == DELETE_OP) { pending_length_ += bytes; } else if ((bytes == 1) && (pending_op_ == INSERT_OP) && (pending_length_ == 1)) { // Special-case exactly insert(1) delete(1) => copy(1); // all others backmap deletes to after insertss pending_op_ = COPY_OP; } else { Flush(); pending_op_ = DELETE_OP; pending_length_ = bytes; } } void OffsetMap::Flush() { if (pending_length_ == 0) { return; } // We may be emitting a copy op just after a copy op because +1 -1 cancelled // inbetween. If the lengths don't need a prefix byte, combine them if ((pending_op_ == COPY_OP) && !diffs_.empty()) { char c = diffs_[diffs_.size() - 1]; MapOp prior_op = static_cast(OpPart(c)); int prior_len = LenPart(c); if ((prior_op == COPY_OP) && ((prior_len + pending_length_) <= 0x3f)) { diffs_[diffs_.size() - 1] += pending_length_; pending_length_ = 0; return; } } if (pending_length_ > 0x3f) { bool non_zero_emitted = false; for (int shift = 30; shift > 0; shift -= 6) { int prefix = (pending_length_ >> shift) & 0x3f; if ((prefix > 0) || non_zero_emitted) { Emit(PREFIX_OP, prefix); non_zero_emitted = true; } } } Emit(pending_op_, pending_length_ & 0x3f); pending_length_ = 0; } // Add one more entry to copy one byte off the end, then flush void OffsetMap::FlushAll() { Copy(1); Flush(); } // Flush all if necessary void OffsetMap::MaybeFlushAll() { if ((0 < pending_length_) || diffs_.empty()) { FlushAll(); } } // Len may be 0, for example as the low piece of length=64 void OffsetMap::Emit(MapOp op, int len) { char c = (static_cast(op) << 6) | (len & 0x3f); diffs_.push_back(c); } void OffsetMap::DumpString() { for (int i = 0; i < static_cast(diffs_.size()); ++i) { fprintf(stderr, "%c%02d ", "&=+-"[OpPart(diffs_[i])], LenPart(diffs_[i])); } fprintf(stderr, "\n"); // Print running table of correspondences fprintf(stderr, " op A => A' (A forward-maps to A')\n"); int aoffset = 0; int aprimeoffset = 0; int length = 0; for (int i = 0; i < static_cast(diffs_.size()); ++i) { char c = diffs_[i]; MapOp op = static_cast(OpPart(c)); int len = LenPart(c); length = (length << 6) + len; if (op == COPY_OP) { aoffset += length; aprimeoffset += length; length = 0; } else if (op == INSERT_OP) { aoffset += 0; aprimeoffset += length; length = 0; } else if (op == DELETE_OP) { aoffset += length; aprimeoffset += 0; length = 0; } else { // (op == PREFIX_OP) // Do nothing else } fprintf(stderr, "[%3d] %c%02d %6d %6d%s\n", i, "&=+-"[op], len, aoffset, aprimeoffset, (next_diff_sub_ == i) ? " <==next_diff_sub_" : ""); } fprintf(stderr, "\n"); } void OffsetMap::DumpWindow() { fprintf(stderr, "DumpWindow(A => A'): max_aoffset_ = %d, " "max_aprimeoffset_ = %d, next_diff_sub_ = %d
\n", max_aoffset_, max_aprimeoffset_, next_diff_sub_); fprintf(stderr, "A [%u..%u)\n", current_lo_aoffset_, current_hi_aoffset_); fprintf(stderr, "A' [%u..%u)\n", current_lo_aprimeoffset_, current_hi_aprimeoffset_); fprintf(stderr, " diff = %d\n", current_diff_); DumpString(); } //----------------------------------------------------------------------------// // The guts of the 2013 design // // If there are three ranges a b c in diffs_, we can be in one of five // // states: LEFT of a, in ranges a b c, or RIGHT of c // // In each state, there are windows A[Alo..Ahi), A'[A'lo..A'hi) and diffs_ // // position next_diff_sub_ // // There also are mapping constants max_aoffset_ and max_aprimeoffset_ // // If LEFT, Alo=Ahi=0, A'lo=A'hi=0 and next_diff_sub_=0 // // If RIGHT, Alo=Ahi=max_aoffset_, A'lo=A'hi=max_aprimeoffset_ and // // next_diff_sub_=diffs_.size() // // Otherwise, at least one of A[) and A'[) is non-empty and the first bytes // // correspond to each other. If range i is active, next_diff_sub_ is at // // the first byte of range i+1. Because of the length-prefix operator, // // an individual range item in diffs_ may be multiple bytes // // In all cases aprimeoffset = aoffset + current_diff_ // // i.e. current_diff_ = aprimeoffset - aoffset // // // // In the degenerate case of diffs_.empty(), there are only two states // // LEFT and RIGHT and the mapping is the identity mapping. // // The initial state is LEFT. // // It is an error to move left into LEFT or right into RIGHT, but the code // // below is robust in these cases. // //----------------------------------------------------------------------------// void OffsetMap::SetLeft() { current_lo_aoffset_ = 0; current_hi_aoffset_ = 0; current_lo_aprimeoffset_ = 0; current_hi_aprimeoffset_ = 0; current_diff_ = 0; next_diff_sub_ = 0; } void OffsetMap::SetRight() { current_lo_aoffset_ = max_aoffset_; current_hi_aoffset_ = max_aoffset_; current_lo_aprimeoffset_ = max_aprimeoffset_; current_hi_aprimeoffset_ = max_aprimeoffset_; current_diff_ = max_aprimeoffset_ - max_aoffset_; next_diff_sub_ = 0; } // Back up over previous range, 1..5 bytes // Return subscript at the beginning of that. Pins at 0 int OffsetMap::Backup(int sub) { if (sub <= 0) {return 0;} --sub; while ((0 < sub) && (static_cast(OpPart(diffs_[sub - 1]) == PREFIX_OP))) { --sub; } return sub; } // Parse next range, 1..5 bytes // Return subscript just off the end of that int OffsetMap::ParseNext(int sub, MapOp* op, int* length) { *op = PREFIX_OP; *length = 0; char c; while ((sub < static_cast(diffs_.size())) && (*op == PREFIX_OP)) { c = diffs_[sub++]; *op = static_cast(OpPart(c)); int len = LenPart(c); *length = (*length << 6) + len; } // If mal-formed or in RIGHT, this will return with op = PREFIX_OP // Mal-formed can include a trailing prefix byte with no following op return sub; } // Parse previous range, 1..5 bytes // Return current subscript int OffsetMap::ParsePrevious(int sub, MapOp* op, int* length) { sub = Backup(sub); return ParseNext(sub, op, length); } // Quick debugging dump; does not parse multi-byte items, so just length & 0x3f void OffsetMap::PrintPosition(const char* str) { MapOp op = PREFIX_OP; int length = 0; if ((0 < next_diff_sub_) && (next_diff_sub_ <= static_cast(diffs_.size()))) { op = static_cast(OpPart(diffs_[next_diff_sub_ - 1])); length = LenPart(diffs_[next_diff_sub_ - 1]); } fprintf(stderr, "%s[%d] %c%02d = A[%d..%d) ==> A'[%d..%d)\n", str, next_diff_sub_, "&=+-"[op], length, current_lo_aoffset_, current_hi_aoffset_, current_lo_aprimeoffset_, current_hi_aprimeoffset_); } // Move active window one range to the right // Return true if move was OK bool OffsetMap::MoveRight() { // If at last range or RIGHT, set to RIGHT, return error if (next_diff_sub_ >= static_cast(diffs_.size())) { SetRight(); return false; } // Actually OK to move right MapOp op; int length; bool retval = true; // If mal-formed or in RIGHT, this will return with op = PREFIX_OP next_diff_sub_ = ParseNext(next_diff_sub_, &op, &length); current_lo_aoffset_ = current_hi_aoffset_; current_lo_aprimeoffset_ = current_hi_aprimeoffset_; if (op == COPY_OP) { current_hi_aoffset_ = current_lo_aoffset_ + length; current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + length; } else if (op == INSERT_OP) { current_hi_aoffset_ = current_lo_aoffset_ + 0; current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + length; } else if (op == DELETE_OP) { current_hi_aoffset_ = current_lo_aoffset_ + length; current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + 0; } else { SetRight(); retval = false; } current_diff_ = current_lo_aprimeoffset_ - current_lo_aoffset_; return retval; } // Move active window one range to the left // Return true if move was OK bool OffsetMap::MoveLeft() { // If at first range or LEFT, set to LEFT, return error if (next_diff_sub_ <= 0) { SetLeft(); return false; } // Back up over current active window next_diff_sub_ = Backup(next_diff_sub_); if (next_diff_sub_ <= 0) { SetLeft(); return false; } // Actually OK to move left MapOp op; int length; bool retval = true; // If mal-formed or in LEFT, this will return with op = PREFIX_OP next_diff_sub_ = ParsePrevious(next_diff_sub_, &op, &length); current_hi_aoffset_ = current_lo_aoffset_; current_hi_aprimeoffset_ = current_lo_aprimeoffset_; if (op == COPY_OP) { current_lo_aoffset_ = current_hi_aoffset_ - length; current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - length; } else if (op == INSERT_OP) { current_lo_aoffset_ = current_hi_aoffset_ - 0; current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - length; } else if (op == DELETE_OP) { current_lo_aoffset_ = current_hi_aoffset_ - length; current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - 0; } else { SetLeft(); retval = false; } current_diff_ = current_lo_aprimeoffset_ - current_lo_aoffset_; return true; } // Map an offset in A' to the corresponding offset in A int OffsetMap::MapBack(int aprimeoffset){ MaybeFlushAll(); if (aprimeoffset < 0) {return 0;} if (max_aprimeoffset_ <= aprimeoffset) { return (aprimeoffset - max_aprimeoffset_) + max_aoffset_; } // If current_lo_aprimeoffset_ <= aprimeoffset < current_hi_aprimeoffset_, // use current mapping, else move window left/right bool ok = true; while (ok && (aprimeoffset < current_lo_aprimeoffset_)) { ok = MoveLeft(); } while (ok && (current_hi_aprimeoffset_ <= aprimeoffset)) { ok = MoveRight(); } // So now current_lo_aprimeoffset_ <= aprimeoffset < current_hi_aprimeoffset_ int aoffset = aprimeoffset - current_diff_; if (aoffset >= current_hi_aoffset_) { // A' is in an insert region, all bytes of which backmap to A=hi_aoffset_ aoffset = current_hi_aoffset_; } return aoffset; } // Map an offset in A to the corresponding offset in A' int OffsetMap::MapForward(int aoffset){ MaybeFlushAll(); if (aoffset < 0) {return 0;} if (max_aoffset_ <= aoffset) { return (aoffset - max_aoffset_) + max_aprimeoffset_; } // If current_lo_aoffset_ <= aoffset < current_hi_aoffset_, // use current mapping, else move window left/right bool ok = true; while (ok && (aoffset < current_lo_aoffset_)) { ok = MoveLeft(); } while (ok && (current_hi_aoffset_ <= aoffset)) { ok = MoveRight(); } int aprimeoffset = aoffset + current_diff_; if (aprimeoffset >= current_hi_aprimeoffset_) { // A is in a delete region, all bytes of which map to A'=hi_aprimeoffset_ aprimeoffset = current_hi_aprimeoffset_; } return aprimeoffset; } // static bool OffsetMap::CopyInserts(OffsetMap* source, OffsetMap* dest) { bool ok = true; while (ok && (source->next_diff_sub_ != source->diffs_.size())) { ok = source->MoveRight(); if (source->current_lo_aoffset_ != source->current_hi_aoffset_) { return false; } dest->Insert( source->current_hi_aprimeoffset_ - source->current_lo_aprimeoffset_); } return true; } // static bool OffsetMap::CopyDeletes(OffsetMap* source, OffsetMap* dest) { bool ok = true; while (ok && (source->next_diff_sub_ != source->diffs_.size())) { ok = source->MoveRight(); if (source->current_lo_aprimeoffset_ != source->current_hi_aprimeoffset_) { return false; } dest->Delete(source->current_hi_aoffset_ - source->current_lo_aoffset_); } return true; } // static void OffsetMap::ComposeOffsetMap( OffsetMap* g, OffsetMap* f, OffsetMap* h) { h->Clear(); f->Reset(); g->Reset(); int lo = 0; for (;;) { // Consume delete operations in f. This moves A without moving // A' and A''. if (lo >= g->current_hi_aoffset_ && CopyInserts(g, h)) { if (lo >= f->current_hi_aprimeoffset_ && CopyDeletes(f, h)) { // fprintf(stderr, // "ComposeOffsetMap ERROR, f is longer than g.
\n"); } // FlushAll(), called by Reset(), MapForward() or MapBack(), has // added an extra COPY_OP to f and g, so this function has // composed an extra COPY_OP in h from those. To avoid // FlushAll() adds one more extra COPY_OP to h later, dispatch // Flush() right now. h->Flush(); return; } // Consume insert operations in g. This moves A'' without moving A // and A'. if (lo >= f->current_hi_aprimeoffset_) { if (!CopyDeletes(f, h)) { // fprintf(stderr, // "ComposeOffsetMap ERROR, g is longer than f.
\n"); } } // Compose one operation which moves A' from lo to hi. int hi = min(f->current_hi_aprimeoffset_, g->current_hi_aoffset_); if (f->current_lo_aoffset_ != f->current_hi_aoffset_ && g->current_lo_aprimeoffset_ != g->current_hi_aprimeoffset_) { h->Copy(hi - lo); } else if (f->current_lo_aoffset_ != f->current_hi_aoffset_) { h->Delete(hi - lo); } else if (g->current_lo_aprimeoffset_ != g->current_hi_aprimeoffset_) { h->Insert(hi - lo); } lo = hi; } } // For testing only -- force a mapping void OffsetMap::StuffIt(const string& diffs, int max_aoffset, int max_aprimeoffset) { Clear(); diffs_ = diffs; max_aoffset_ = max_aoffset; max_aprimeoffset_ = max_aprimeoffset; } } // namespace CLD2