diff --git a/common.gypi b/common.gypi index 5adfd888711ae4..c5a7dc9cacf8b9 100644 --- a/common.gypi +++ b/common.gypi @@ -38,7 +38,7 @@ # Reset this number to 0 on major V8 upgrades. # Increment by one for each non-official patch applied to deps/v8. - 'v8_embedder_string': '-node.40', + 'v8_embedder_string': '-node.41', ##### V8 defaults for Node.js ##### diff --git a/deps/v8/src/strings/unicode-inl.h b/deps/v8/src/strings/unicode-inl.h index 4aa7e8090ca39a..26f77adf28fd2f 100644 --- a/deps/v8/src/strings/unicode-inl.h +++ b/deps/v8/src/strings/unicode-inl.h @@ -10,6 +10,7 @@ #include "src/base/logging.h" #include "src/utils/utils.h" +#include "third_party/simdutf/simdutf.h" namespace unibrow { @@ -219,6 +220,16 @@ bool Utf8::IsValidCharacter(uchar c) { c != kBadChar); } +template <> +bool Utf8::IsAsciiOneByteString(const uint8_t* buffer, size_t size) { + return simdutf::validate_ascii(reinterpret_cast(buffer), size); +} + +template <> +bool Utf8::IsAsciiOneByteString(const uint16_t* buffer, size_t size) { + return false; +} + template Utf8::EncodingResult Utf8::Encode(v8::base::Vector string, char* buffer, size_t capacity, @@ -234,8 +245,17 @@ Utf8::EncodingResult Utf8::Encode(v8::base::Vector string, const Char* characters = string.begin(); size_t content_capacity = capacity - write_null; CHECK_LE(content_capacity, capacity); - uint16_t last = Utf16::kNoPreviousCharacter; size_t read_index = 0; + if (kSourceIsOneByte) { + size_t writeable = std::min(string.size(), content_capacity); + // Just memcpy when possible. + if (writeable > 0 && Utf8::IsAsciiOneByteString(characters, writeable)) { + memcpy(buffer, characters, writeable); + read_index = writeable; + write_index = writeable; + } + } + uint16_t last = Utf16::kNoPreviousCharacter; for (; read_index < string.size(); read_index++) { Char character = characters[read_index]; diff --git a/deps/v8/src/strings/unicode.h b/deps/v8/src/strings/unicode.h index ef1e717b1ea857..32a0b84a8399b2 100644 --- a/deps/v8/src/strings/unicode.h +++ b/deps/v8/src/strings/unicode.h @@ -212,6 +212,16 @@ class V8_EXPORT_PRIVATE Utf8 { // - valid code point range. static bool ValidateEncoding(const uint8_t* str, size_t length); + template + static bool IsAsciiOneByteString(const Char* buffer, size_t size); + + template <> + inline bool IsAsciiOneByteString(const uint8_t* buffer, size_t size); + + template <> + inline bool IsAsciiOneByteString(const uint16_t* buffer, + size_t size); + // Encode the given characters as Utf8 into the provided output buffer. struct EncodingResult { size_t bytes_written;