60 lines
1.9 KiB
C++
60 lines
1.9 KiB
C++
// Copyright 2014 The Chromium Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file.
|
|
|
|
// This implementation doesn't use ICU. The ICU macros are oriented towards
|
|
// character-at-a-time processing, whereas byte-at-a-time processing is easier
|
|
// with streaming input.
|
|
|
|
#include "base/i18n/streaming_utf8_validator.h"
|
|
|
|
#include "base/i18n/utf8_validator_tables.h"
|
|
#include "base/logging.h"
|
|
|
|
namespace base {
|
|
namespace {
|
|
|
|
uint8_t StateTableLookup(uint8_t offset) {
|
|
DCHECK_LT(offset, internal::kUtf8ValidatorTablesSize);
|
|
return internal::kUtf8ValidatorTables[offset];
|
|
}
|
|
|
|
} // namespace
|
|
|
|
StreamingUtf8Validator::State StreamingUtf8Validator::AddBytes(const char* data,
|
|
size_t size) {
|
|
// Copy |state_| into a local variable so that the compiler doesn't have to be
|
|
// careful of aliasing.
|
|
uint8_t state = state_;
|
|
for (const char* p = data; p != data + size; ++p) {
|
|
if ((*p & 0x80) == 0) {
|
|
if (state == 0)
|
|
continue;
|
|
state = internal::I18N_UTF8_VALIDATOR_INVALID_INDEX;
|
|
break;
|
|
}
|
|
const uint8_t shift_amount = StateTableLookup(state);
|
|
const uint8_t shifted_char = (*p & 0x7F) >> shift_amount;
|
|
state = StateTableLookup(state + shifted_char + 1);
|
|
// State may be INVALID here, but this code is optimised for the case of
|
|
// valid UTF-8 and it is more efficient (by about 2%) to not attempt an
|
|
// early loop exit unless we hit an ASCII character.
|
|
}
|
|
state_ = state;
|
|
return state == 0 ? VALID_ENDPOINT
|
|
: state == internal::I18N_UTF8_VALIDATOR_INVALID_INDEX
|
|
? INVALID
|
|
: VALID_MIDPOINT;
|
|
}
|
|
|
|
void StreamingUtf8Validator::Reset() {
|
|
state_ = 0u;
|
|
}
|
|
|
|
bool StreamingUtf8Validator::Validate(const std::string& string) {
|
|
return StreamingUtf8Validator().AddBytes(string.data(), string.size()) ==
|
|
VALID_ENDPOINT;
|
|
}
|
|
|
|
} // namespace base
|