Nagram/TMessagesProj/jni/webrtc/base/strings/utf_offset_string_conversions.cc

// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "base/strings/utf_offset_string_conversions.h"

#include <stdint.h>

#include <algorithm>
#include <memory>

#include "base/logging.h"
#include "base/strings/string_piece.h"
#include "base/strings/utf_string_conversion_utils.h"

namespace base {

OffsetAdjuster::Adjustment::Adjustment(size_t original_offset,
                                       size_t original_length,
                                       size_t output_length)
    : original_offset(original_offset),
      original_length(original_length),
      output_length(output_length) {
}

// static
void OffsetAdjuster::AdjustOffsets(const Adjustments& adjustments,
                                   std::vector<size_t>* offsets_for_adjustment,
                                   size_t limit) {
  DCHECK(offsets_for_adjustment);
  for (auto& i : *offsets_for_adjustment)
    AdjustOffset(adjustments, &i, limit);
}

// static
void OffsetAdjuster::AdjustOffset(const Adjustments& adjustments,
                                  size_t* offset,
                                  size_t limit) {
  DCHECK(offset);
  if (*offset == string16::npos)
    return;
  int adjustment = 0;
  for (const auto& i : adjustments) {
    if (*offset <= i.original_offset)
      break;
    if (*offset < (i.original_offset + i.original_length)) {
      *offset = string16::npos;
      return;
    }
    adjustment += static_cast<int>(i.original_length - i.output_length);
  }
  *offset -= adjustment;

  if (*offset > limit)
    *offset = string16::npos;
}

// static
void OffsetAdjuster::UnadjustOffsets(
    const Adjustments& adjustments,
    std::vector<size_t>* offsets_for_unadjustment) {
  if (!offsets_for_unadjustment || adjustments.empty())
    return;
  for (auto& i : *offsets_for_unadjustment)
    UnadjustOffset(adjustments, &i);
}

// static
void OffsetAdjuster::UnadjustOffset(const Adjustments& adjustments,
                                    size_t* offset) {
  if (*offset == string16::npos)
    return;
  int adjustment = 0;
  for (const auto& i : adjustments) {
    if (*offset + adjustment <= i.original_offset)
      break;
    adjustment += static_cast<int>(i.original_length - i.output_length);
    if ((*offset + adjustment) < (i.original_offset + i.original_length)) {
      *offset = string16::npos;
      return;
    }
  }
  *offset += adjustment;
}

// static
void OffsetAdjuster::MergeSequentialAdjustments(
    const Adjustments& first_adjustments,
    Adjustments* adjustments_on_adjusted_string) {
  auto adjusted_iter = adjustments_on_adjusted_string->begin();
  auto first_iter = first_adjustments.begin();
  // Simultaneously iterate over all |adjustments_on_adjusted_string| and
  // |first_adjustments|, pushing adjustments at the end of
  // |adjustments_builder| as we go.  |shift| keeps track of the current number
  // of characters collapsed by |first_adjustments| up to this point.
  // |currently_collapsing| keeps track of the number of characters collapsed by
  // |first_adjustments| into the current |adjusted_iter|'s length.  These are
  // characters that will change |shift| as soon as we're done processing the
  // current |adjusted_iter|; they are not yet reflected in |shift|.
  size_t shift = 0;
  size_t currently_collapsing = 0;
  // While we *could* update |adjustments_on_adjusted_string| in place by
  // inserting new adjustments into the middle, we would be repeatedly calling
  // |std::vector::insert|. That would cost O(n) time per insert, relative to
  // distance from end of the string.  By instead allocating
  // |adjustments_builder| and calling |std::vector::push_back|, we only pay
  // amortized constant time per push. We are trading space for time.
  Adjustments adjustments_builder;
  while (adjusted_iter != adjustments_on_adjusted_string->end()) {
    if ((first_iter == first_adjustments.end()) ||
        ((adjusted_iter->original_offset + shift +
          adjusted_iter->original_length) <= first_iter->original_offset)) {
      // Entire |adjusted_iter| (accounting for its shift and including its
      // whole original length) comes before |first_iter|.
      //
      // Correct the offset at |adjusted_iter| and move onto the next
      // adjustment that needs revising.
      adjusted_iter->original_offset += shift;
      shift += currently_collapsing;
      currently_collapsing = 0;
      adjustments_builder.push_back(*adjusted_iter);
      ++adjusted_iter;
    } else if ((adjusted_iter->original_offset + shift) >
               first_iter->original_offset) {
      // |first_iter| comes before the |adjusted_iter| (as adjusted by |shift|).

      // It's not possible for the adjustments to overlap.  (It shouldn't
      // be possible that we have an |adjusted_iter->original_offset| that,
      // when adjusted by the computed |shift|, is in the middle of
      // |first_iter|'s output's length.  After all, that would mean the
      // current adjustment_on_adjusted_string somehow points to an offset
      // that was supposed to have been eliminated by the first set of
      // adjustments.)
      DCHECK_LE(first_iter->original_offset + first_iter->output_length,
                adjusted_iter->original_offset + shift);

      // Add the |first_iter| to the full set of adjustments.
      shift += first_iter->original_length - first_iter->output_length;
      adjustments_builder.push_back(*first_iter);
      ++first_iter;
    } else {
      // The first adjustment adjusted something that then got further adjusted
      // by the second set of adjustments.  In other words, |first_iter| points
      // to something in the range covered by |adjusted_iter|'s length (after
      // accounting for |shift|).  Precisely,
      //   adjusted_iter->original_offset + shift
      //   <=
      //   first_iter->original_offset
      //   <=
      //   adjusted_iter->original_offset + shift +
      //       adjusted_iter->original_length

      // Modify the current |adjusted_iter| to include whatever collapsing
      // happened in |first_iter|, then advance to the next |first_adjustments|
      // because we dealt with the current one.
      const int collapse = static_cast<int>(first_iter->original_length) -
          static_cast<int>(first_iter->output_length);
      // This function does not know how to deal with a string that expands and
      // then gets modified, only strings that collapse and then get modified.
      DCHECK_GT(collapse, 0);
      adjusted_iter->original_length += collapse;
      currently_collapsing += collapse;
      ++first_iter;
    }
  }
  DCHECK_EQ(0u, currently_collapsing);
  if (first_iter != first_adjustments.end()) {
    // Only first adjustments are left.  These do not need to be modified.
    // (Their offsets are already correct with respect to the original string.)
    // Append them all.
    DCHECK(adjusted_iter == adjustments_on_adjusted_string->end());
    adjustments_builder.insert(adjustments_builder.end(), first_iter,
                               first_adjustments.end());
  }
  *adjustments_on_adjusted_string = std::move(adjustments_builder);
}

// Converts the given source Unicode character type to the given destination
// Unicode character type as a STL string. The given input buffer and size
// determine the source, and the given output STL string will be replaced by
// the result.  If non-NULL, |adjustments| is set to reflect the all the
// alterations to the string that are not one-character-to-one-character.
// It will always be sorted by increasing offset.
template<typename SrcChar, typename DestStdString>
bool ConvertUnicode(const SrcChar* src,
                    size_t src_len,
                    DestStdString* output,
                    OffsetAdjuster::Adjustments* adjustments) {
  if (adjustments)
    adjustments->clear();
  // ICU requires 32-bit numbers.
  bool success = true;
  int32_t src_len32 = static_cast<int32_t>(src_len);
  for (int32_t i = 0; i < src_len32; i++) {
    uint32_t code_point;
    size_t original_i = i;
    size_t chars_written = 0;
    if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
      chars_written = WriteUnicodeCharacter(code_point, output);
    } else {
      chars_written = WriteUnicodeCharacter(0xFFFD, output);
      success = false;
    }

    // Only bother writing an adjustment if this modification changed the
    // length of this character.
    // NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last
    // character read, not after it (so that incrementing it in the loop
    // increment will place it at the right location), so we need to account
    // for that in determining the amount that was read.
    if (adjustments && ((i - original_i + 1) != chars_written)) {
      adjustments->push_back(OffsetAdjuster::Adjustment(
          original_i, i - original_i + 1, chars_written));
    }
  }
  return success;
}

bool UTF8ToUTF16WithAdjustments(
    const char* src,
    size_t src_len,
    string16* output,
    base::OffsetAdjuster::Adjustments* adjustments) {
  PrepareForUTF16Or32Output(src, src_len, output);
  return ConvertUnicode(src, src_len, output, adjustments);
}

string16 UTF8ToUTF16WithAdjustments(
    const base::StringPiece& utf8,
    base::OffsetAdjuster::Adjustments* adjustments) {
  string16 result;
  UTF8ToUTF16WithAdjustments(utf8.data(), utf8.length(), &result, adjustments);
  return result;
}

string16 UTF8ToUTF16AndAdjustOffsets(
    const base::StringPiece& utf8,
    std::vector<size_t>* offsets_for_adjustment) {
  for (size_t& offset : *offsets_for_adjustment) {
    if (offset > utf8.length())
      offset = string16::npos;
  }
  OffsetAdjuster::Adjustments adjustments;
  string16 result = UTF8ToUTF16WithAdjustments(utf8, &adjustments);
  OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment);
  return result;
}

std::string UTF16ToUTF8AndAdjustOffsets(
    const base::StringPiece16& utf16,
    std::vector<size_t>* offsets_for_adjustment) {
  for (size_t& offset : *offsets_for_adjustment) {
    if (offset > utf16.length())
      offset = string16::npos;
  }
  std::string result;
  PrepareForUTF8Output(utf16.data(), utf16.length(), &result);
  OffsetAdjuster::Adjustments adjustments;
  ConvertUnicode(utf16.data(), utf16.length(), &result, &adjustments);
  OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment);
  return result;
}

}  // namespace base
Update to 7.0.0 (2060) 2020-08-14 16:58:22 +00:00			`// Copyright (c) 2011 The Chromium Authors. All rights reserved.`
			`// Use of this source code is governed by a BSD-style license that can be`
			`// found in the LICENSE file.`

			`#include "base/strings/utf_offset_string_conversions.h"`

			`#include <stdint.h>`

			`#include <algorithm>`
			`#include <memory>`

			`#include "base/logging.h"`
			`#include "base/strings/string_piece.h"`
			`#include "base/strings/utf_string_conversion_utils.h"`

			`namespace base {`

			`OffsetAdjuster::Adjustment::Adjustment(size_t original_offset,`
			`size_t original_length,`
			`size_t output_length)`
			`: original_offset(original_offset),`
			`original_length(original_length),`
			`output_length(output_length) {`
			`}`

			`// static`
			`void OffsetAdjuster::AdjustOffsets(const Adjustments& adjustments,`
			`std::vector<size_t>* offsets_for_adjustment,`
			`size_t limit) {`
			`DCHECK(offsets_for_adjustment);`
			`for (auto& i : *offsets_for_adjustment)`
			`AdjustOffset(adjustments, &i, limit);`
			`}`

			`// static`
			`void OffsetAdjuster::AdjustOffset(const Adjustments& adjustments,`
			`size_t* offset,`
			`size_t limit) {`
			`DCHECK(offset);`
			`if (*offset == string16::npos)`
			`return;`
			`int adjustment = 0;`
			`for (const auto& i : adjustments) {`
			`if (*offset <= i.original_offset)`
			`break;`
			`if (*offset < (i.original_offset + i.original_length)) {`
			`*offset = string16::npos;`
			`return;`
			`}`
			`adjustment += static_cast<int>(i.original_length - i.output_length);`
			`}`
			`*offset -= adjustment;`

			`if (*offset > limit)`
			`*offset = string16::npos;`
			`}`

			`// static`
			`void OffsetAdjuster::UnadjustOffsets(`
			`const Adjustments& adjustments,`
			`std::vector<size_t>* offsets_for_unadjustment) {`
			`if (!offsets_for_unadjustment \|\| adjustments.empty())`
			`return;`
			`for (auto& i : *offsets_for_unadjustment)`
			`UnadjustOffset(adjustments, &i);`
			`}`

			`// static`
			`void OffsetAdjuster::UnadjustOffset(const Adjustments& adjustments,`
			`size_t* offset) {`
			`if (*offset == string16::npos)`
			`return;`
			`int adjustment = 0;`
			`for (const auto& i : adjustments) {`
			`if (*offset + adjustment <= i.original_offset)`
			`break;`
			`adjustment += static_cast<int>(i.original_length - i.output_length);`
			`if ((*offset + adjustment) < (i.original_offset + i.original_length)) {`
			`*offset = string16::npos;`
			`return;`
			`}`
			`}`
			`*offset += adjustment;`
			`}`

			`// static`
			`void OffsetAdjuster::MergeSequentialAdjustments(`
			`const Adjustments& first_adjustments,`
			`Adjustments* adjustments_on_adjusted_string) {`
			`auto adjusted_iter = adjustments_on_adjusted_string->begin();`
			`auto first_iter = first_adjustments.begin();`
			`// Simultaneously iterate over all \|adjustments_on_adjusted_string\| and`
			`// \|first_adjustments\|, pushing adjustments at the end of`
			`// \|adjustments_builder\| as we go. \|shift\| keeps track of the current number`
			`// of characters collapsed by \|first_adjustments\| up to this point.`
			`// \|currently_collapsing\| keeps track of the number of characters collapsed by`
			`// \|first_adjustments\| into the current \|adjusted_iter\|'s length. These are`
			`// characters that will change \|shift\| as soon as we're done processing the`
			`// current \|adjusted_iter\|; they are not yet reflected in \|shift\|.`
			`size_t shift = 0;`
			`size_t currently_collapsing = 0;`
			`// While we could update \|adjustments_on_adjusted_string\| in place by`
			`// inserting new adjustments into the middle, we would be repeatedly calling`
			`// \|std::vector::insert\|. That would cost O(n) time per insert, relative to`
			`// distance from end of the string. By instead allocating`
			`// \|adjustments_builder\| and calling \|std::vector::push_back\|, we only pay`
			`// amortized constant time per push. We are trading space for time.`
			`Adjustments adjustments_builder;`
			`while (adjusted_iter != adjustments_on_adjusted_string->end()) {`
			`if ((first_iter == first_adjustments.end()) \|\|`
			`((adjusted_iter->original_offset + shift +`
			`adjusted_iter->original_length) <= first_iter->original_offset)) {`
			`// Entire \|adjusted_iter\| (accounting for its shift and including its`
			`// whole original length) comes before \|first_iter\|.`
			`//`
			`// Correct the offset at \|adjusted_iter\| and move onto the next`
			`// adjustment that needs revising.`
			`adjusted_iter->original_offset += shift;`
			`shift += currently_collapsing;`
			`currently_collapsing = 0;`
			`adjustments_builder.push_back(*adjusted_iter);`
			`++adjusted_iter;`
			`} else if ((adjusted_iter->original_offset + shift) >`
			`first_iter->original_offset) {`
			`// \|first_iter\| comes before the \|adjusted_iter\| (as adjusted by \|shift\|).`

			`// It's not possible for the adjustments to overlap. (It shouldn't`
			`// be possible that we have an \|adjusted_iter->original_offset\| that,`
			`// when adjusted by the computed \|shift\|, is in the middle of`
			`// \|first_iter\|'s output's length. After all, that would mean the`
			`// current adjustment_on_adjusted_string somehow points to an offset`
			`// that was supposed to have been eliminated by the first set of`
			`// adjustments.)`
			`DCHECK_LE(first_iter->original_offset + first_iter->output_length,`
			`adjusted_iter->original_offset + shift);`

			`// Add the \|first_iter\| to the full set of adjustments.`
			`shift += first_iter->original_length - first_iter->output_length;`
			`adjustments_builder.push_back(*first_iter);`
			`++first_iter;`
			`} else {`
			`// The first adjustment adjusted something that then got further adjusted`
			`// by the second set of adjustments. In other words, \|first_iter\| points`
			`// to something in the range covered by \|adjusted_iter\|'s length (after`
			`// accounting for \|shift\|). Precisely,`
			`// adjusted_iter->original_offset + shift`
			`// <=`
			`// first_iter->original_offset`
			`// <=`
			`// adjusted_iter->original_offset + shift +`
			`// adjusted_iter->original_length`

			`// Modify the current \|adjusted_iter\| to include whatever collapsing`
			`// happened in \|first_iter\|, then advance to the next \|first_adjustments\|`
			`// because we dealt with the current one.`
			`const int collapse = static_cast<int>(first_iter->original_length) -`
			`static_cast<int>(first_iter->output_length);`
			`// This function does not know how to deal with a string that expands and`
			`// then gets modified, only strings that collapse and then get modified.`
			`DCHECK_GT(collapse, 0);`
			`adjusted_iter->original_length += collapse;`
			`currently_collapsing += collapse;`
			`++first_iter;`
			`}`
			`}`
			`DCHECK_EQ(0u, currently_collapsing);`
			`if (first_iter != first_adjustments.end()) {`
			`// Only first adjustments are left. These do not need to be modified.`
			`// (Their offsets are already correct with respect to the original string.)`
			`// Append them all.`
			`DCHECK(adjusted_iter == adjustments_on_adjusted_string->end());`
			`adjustments_builder.insert(adjustments_builder.end(), first_iter,`
			`first_adjustments.end());`
			`}`
			`*adjustments_on_adjusted_string = std::move(adjustments_builder);`
			`}`

			`// Converts the given source Unicode character type to the given destination`
			`// Unicode character type as a STL string. The given input buffer and size`
			`// determine the source, and the given output STL string will be replaced by`
			`// the result. If non-NULL, \|adjustments\| is set to reflect the all the`
			`// alterations to the string that are not one-character-to-one-character.`
			`// It will always be sorted by increasing offset.`
			`template<typename SrcChar, typename DestStdString>`
			`bool ConvertUnicode(const SrcChar* src,`
			`size_t src_len,`
			`DestStdString* output,`
			`OffsetAdjuster::Adjustments* adjustments) {`
			`if (adjustments)`
			`adjustments->clear();`
			`// ICU requires 32-bit numbers.`
			`bool success = true;`
			`int32_t src_len32 = static_cast<int32_t>(src_len);`
			`for (int32_t i = 0; i < src_len32; i++) {`
			`uint32_t code_point;`
			`size_t original_i = i;`
			`size_t chars_written = 0;`
			`if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {`
			`chars_written = WriteUnicodeCharacter(code_point, output);`
			`} else {`
			`chars_written = WriteUnicodeCharacter(0xFFFD, output);`
			`success = false;`
			`}`

			`// Only bother writing an adjustment if this modification changed the`
			`// length of this character.`
			`// NOTE: ReadUnicodeCharacter() adjusts \|i\| to point _at_ the last`
			`// character read, not after it (so that incrementing it in the loop`
			`// increment will place it at the right location), so we need to account`
			`// for that in determining the amount that was read.`
			`if (adjustments && ((i - original_i + 1) != chars_written)) {`
			`adjustments->push_back(OffsetAdjuster::Adjustment(`
			`original_i, i - original_i + 1, chars_written));`
			`}`
			`}`
			`return success;`
			`}`

			`bool UTF8ToUTF16WithAdjustments(`
			`const char* src,`
			`size_t src_len,`
			`string16* output,`
			`base::OffsetAdjuster::Adjustments* adjustments) {`
			`PrepareForUTF16Or32Output(src, src_len, output);`
			`return ConvertUnicode(src, src_len, output, adjustments);`
			`}`

			`string16 UTF8ToUTF16WithAdjustments(`
			`const base::StringPiece& utf8,`
			`base::OffsetAdjuster::Adjustments* adjustments) {`
			`string16 result;`
			`UTF8ToUTF16WithAdjustments(utf8.data(), utf8.length(), &result, adjustments);`
			`return result;`
			`}`

			`string16 UTF8ToUTF16AndAdjustOffsets(`
			`const base::StringPiece& utf8,`
			`std::vector<size_t>* offsets_for_adjustment) {`
			`for (size_t& offset : *offsets_for_adjustment) {`
			`if (offset > utf8.length())`
			`offset = string16::npos;`
			`}`
			`OffsetAdjuster::Adjustments adjustments;`
			`string16 result = UTF8ToUTF16WithAdjustments(utf8, &adjustments);`
			`OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment);`
			`return result;`
			`}`

			`std::string UTF16ToUTF8AndAdjustOffsets(`
			`const base::StringPiece16& utf16,`
			`std::vector<size_t>* offsets_for_adjustment) {`
			`for (size_t& offset : *offsets_for_adjustment) {`
			`if (offset > utf16.length())`
			`offset = string16::npos;`
			`}`
			`std::string result;`
			`PrepareForUTF8Output(utf16.data(), utf16.length(), &result);`
			`OffsetAdjuster::Adjustments adjustments;`
			`ConvertUnicode(utf16.data(), utf16.length(), &result, &adjustments);`
			`OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment);`
			`return result;`
			`}`

			`} // namespace base`