From db693c2501487e238c463127589c3b14225e059c Mon Sep 17 00:00:00 2001
From: Luca Casonato <hello@lcas.dev>
Date: Fri, 18 Oct 2024 16:25:19 +0200
Subject: [PATCH 1/3] perf: speed up v8::String::to_rust_*_lossy()

This commit speeds up this common conversion
method between by 2x for many common cases. Short
one byte ASCII strings are now 20% faster. Longer
one byte ASCII strings are 2.5x faster. Short UTF8
strings are marginally slower (5%) but longer UTF8
strings are upwards of 2x faster.

A follow up will make the short UTF8 strings about
2x faster than the current implementation as well.
---
 Cargo.lock    |  11 ++
 Cargo.toml    |   1 +
 src/string.rs | 437 +++++++++++++++++++++++++++++++++-----------------
 3 files changed, 305 insertions(+), 144 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index e38c06f2e0..91bc94c6b1 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1260,6 +1260,16 @@ version = "1.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
 
+[[package]]
+name = "simdutf"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1945a45633804474a6f1aef87f072d7564c6421025a865f6777709a571fdfae"
+dependencies = [
+ "bitflags 2.5.0",
+ "cc",
+]
+
 [[package]]
 name = "slotmap"
 version = "1.0.7"
@@ -1456,6 +1466,7 @@ dependencies = [
  "once_cell",
  "paste",
  "rustversion",
+ "simdutf",
  "trybuild",
  "which",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index 7363d45409..cf74cc84cf 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -91,6 +91,7 @@ use_custom_libcxx = []
 bitflags = "2.5"
 once_cell = "1.19"
 paste = "1.0"
+simdutf = "0.5.1"
 
 [build-dependencies]
 miniz_oxide = "0.7.2"
diff --git a/src/string.rs b/src/string.rs
index 99d92381c9..6ee1183e75 100644
--- a/src/string.rs
+++ b/src/string.rs
@@ -11,6 +11,7 @@ use std::borrow::Cow;
 use std::convert::TryInto;
 use std::default::Default;
 use std::ffi::c_void;
+use std::hint::unreachable_unchecked;
 use std::marker::PhantomData;
 use std::mem::MaybeUninit;
 use std::ptr::NonNull;
@@ -768,62 +769,12 @@ impl String {
     &self,
     scope: &mut Isolate,
   ) -> std::string::String {
-    let len_utf16 = self.length();
-
-    // No need to allocate or do any work for zero-length strings
-    if len_utf16 == 0 {
-      return std::string::String::new();
-    }
-
-    let len_utf8 = self.utf8_length(scope);
-
-    // If len_utf8 == len_utf16 and the string is one-byte, we can take the fast memcpy path. This is true iff the
-    // string is 100% 7-bit ASCII.
-    if self.is_onebyte() && len_utf8 == len_utf16 {
-      unsafe {
-        // Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
-        // accidentally creating a slice of u8 which would be invalid.
-        let layout = std::alloc::Layout::from_size_align(len_utf16, 1).unwrap();
-        let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
-        let buffer = std::ptr::slice_from_raw_parts_mut(data, len_utf16);
-
-        // Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
-        let length = self.write_one_byte_uninit(
-          scope,
-          &mut *buffer,
-          0,
-          WriteOptions::NO_NULL_TERMINATION
-            | WriteOptions::REPLACE_INVALID_UTF8,
-        );
-        debug_assert!(length == len_utf16);
-
-        // Return an owned string from this guaranteed now-initialized data
-        let buffer = data as *mut u8;
-        return std::string::String::from_raw_parts(buffer, length, len_utf16);
-      }
-    }
-
-    // SAFETY: This allocates a buffer manually using the default allocator using the string's capacity.
-    // We have a large number of invariants to uphold, so please check changes to this code carefully
-    unsafe {
-      // Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
-      // accidentally creating a slice of u8 which would be invalid.
-      let layout = std::alloc::Layout::from_size_align(len_utf8, 1).unwrap();
-      let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
-      let buffer = std::ptr::slice_from_raw_parts_mut(data, len_utf8);
-
-      // Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
-      let length = self.write_utf8_uninit(
-        scope,
-        &mut *buffer,
-        None,
-        WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
-      );
-      debug_assert!(length == len_utf8);
-
-      // Return an owned string from this guaranteed now-initialized data
-      let buffer = data as *mut u8;
-      std::string::String::from_raw_parts(buffer, length, len_utf8)
+    // SAFETY: @devsnek said it is fine.
+    let string = unsafe { Local::from_raw(self).unwrap_unchecked() };
+    let view = ValueView::new(scope, string);
+    match view.data() {
+      ValueViewData::OneByte(bytes) => latin1_to_string(bytes),
+      ValueViewData::TwoByte(code_points) => wtf16_to_string(code_points),
     }
   }
 
@@ -834,108 +785,306 @@ impl String {
     scope: &mut Isolate,
     buffer: &'a mut [MaybeUninit<u8>; N],
   ) -> Cow<'a, str> {
-    let len_utf16 = self.length();
-
-    // No need to allocate or do any work for zero-length strings
-    if len_utf16 == 0 {
-      return "".into();
+    // SAFETY: @devsnek said it is fine.
+    let string = unsafe { Local::from_raw(self).unwrap_unchecked() };
+    let view = ValueView::new(scope, string);
+    match view.data() {
+      ValueViewData::OneByte(bytes) => latin1_to_cow_str(bytes, buffer),
+      ValueViewData::TwoByte(code_points) => {
+        wtf16_to_cow_str(code_points, buffer)
+      }
     }
+  }
+}
 
-    // TODO(mmastrac): Ideally we should be able to access the string's internal representation
-    let len_utf8 = self.utf8_length(scope);
+#[inline(always)]
+fn latin1_to_string(bytes: &[u8]) -> std::string::String {
+  // Perf: it seems to be faster to check if the string is ASCII first and
+  // then do a memcpy if it is, rather than checking and copying each byte
+  // individually.
+  if bytes.is_ascii() {
+    // SAFETY: The string is ASCII, so it's valid UTF-8.
+    (unsafe { std::str::from_utf8_unchecked(bytes) }).to_owned()
+  } else {
+    // TODO: this could likely be optimized for large strings by using SIMD to
+    // calculate the length of the resulting string and then allocating once,
+    // and then converting the string using SIMD.
+    std::string::String::from_utf8_lossy(bytes).into_owned()
+  }
+}
 
-    // If len_utf8 == len_utf16 and the string is one-byte, we can take the fast memcpy path. This is true iff the
-    // string is 100% 7-bit ASCII.
-    if self.is_onebyte() && len_utf8 == len_utf16 {
-      if len_utf16 <= N {
-        let length = self.write_one_byte_uninit(
-          scope,
-          buffer,
-          0,
-          WriteOptions::NO_NULL_TERMINATION,
-        );
-        debug_assert!(length == len_utf16);
-        unsafe {
-          // Get a slice of &[u8] of what we know is initialized now
-          let buffer = &mut buffer[..length];
-          let buffer = &mut *(buffer as *mut [_] as *mut [u8]);
-
-          // We know it's valid UTF-8, so make a string
-          return Cow::Borrowed(std::str::from_utf8_unchecked(buffer));
-        }
-      }
+/// The cutoff for when to use SIMD for converting WTF-16 to UTF-8. Any slice of
+/// code points longer than this will use SIMD, and any shorter will use the
+/// scalar implementation.
+const WTF16_CODE_POINT_LENGTH_CUTOFF_FOR_SIMD: usize = 96;
+
+#[inline(always)]
+fn wtf16_to_string(code_points: &[u16]) -> std::string::String {
+  // If the code points are longer than the cutoff and are valid UTF-16, use
+  // SIMD to convert them to UTF-8. Otherwise we use the scalar implementation.
+  if code_points.len() > WTF16_CODE_POINT_LENGTH_CUTOFF_FOR_SIMD
+    && simdutf::validate_utf16(code_points)
+  {
+    let len_utf8 = simdutf::utf8_length_from_utf16(code_points);
+
+    let buffer = allocate_byte_buffer(len_utf8);
+
+    // SAFETY: The buffer is large enough to hold the UTF-8 data.
+    let written = unsafe {
+      simdutf::convert_utf16_to_utf8(
+        code_points.as_ptr(),
+        code_points.len(),
+        buffer as *mut u8,
+      )
+    };
+    debug_assert_eq!(written, len_utf8);
 
-      unsafe {
-        // Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
-        // accidentally creating a slice of u8 which would be invalid.
-        let layout = std::alloc::Layout::from_size_align(len_utf16, 1).unwrap();
-        let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
-        let buffer = std::ptr::slice_from_raw_parts_mut(data, len_utf16);
-
-        // Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
-        let length = self.write_one_byte_uninit(
-          scope,
-          &mut *buffer,
-          0,
-          WriteOptions::NO_NULL_TERMINATION
-            | WriteOptions::REPLACE_INVALID_UTF8,
-        );
-        debug_assert!(length == len_utf16);
-
-        // Return an owned string from this guaranteed now-initialized data
-        let buffer = data as *mut u8;
-        return Cow::Owned(std::string::String::from_raw_parts(
-          buffer, length, len_utf16,
-        ));
-      }
+    // SAFETY: The buffer is filled with valid UTF-8 data.
+    unsafe {
+      std::string::String::from_raw_parts(buffer as *mut u8, written, len_utf8)
     }
+  } else {
+    let len_utf8 = utf8_length_from_utf16_vectorized(code_points);
 
-    if len_utf8 <= N {
-      // No malloc path
-      let length = self.write_utf8_uninit(
-        scope,
-        buffer,
-        None,
-        WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
-      );
-      debug_assert!(length == len_utf8);
+    let buffer = allocate_byte_buffer(len_utf8);
 
-      // SAFETY: We know that we wrote `length` UTF-8 bytes. See `slice_assume_init_mut` for additional guarantee information.
-      unsafe {
-        // Get a slice of &[u8] of what we know is initialized now
-        let buffer = &mut buffer[..length];
-        let buffer = &mut *(buffer as *mut [_] as *mut [u8]);
+    // SAFETY: The buffer is large enough to hold the UTF-8 data.
+    let written =
+      unsafe { wtf16_to_utf8_lossy(code_points, buffer as *mut u8) };
 
-        // We know it's valid UTF-8, so make a string
-        return Cow::Borrowed(std::str::from_utf8_unchecked(buffer));
-      }
+    // SAFETY: The buffer is filled with valid UTF-8 data.
+    unsafe {
+      std::string::String::from_raw_parts(buffer as *mut u8, written, len_utf8)
     }
+  }
+}
 
-    // SAFETY: This allocates a buffer manually using the default allocator using the string's capacity.
-    // We have a large number of invariants to uphold, so please check changes to this code carefully
-    unsafe {
-      // Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
-      // accidentally creating a slice of u8 which would be invalid.
-      let layout = std::alloc::Layout::from_size_align(len_utf8, 1).unwrap();
-      let data = std::alloc::alloc(layout) as *mut MaybeUninit<u8>;
-      let buffer = std::ptr::slice_from_raw_parts_mut(data, len_utf8);
-
-      // Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
-      let length = self.write_utf8_uninit(
-        scope,
-        &mut *buffer,
-        None,
-        WriteOptions::NO_NULL_TERMINATION | WriteOptions::REPLACE_INVALID_UTF8,
+#[inline(always)]
+fn latin1_to_cow_str<'a, const N: usize>(
+  bytes: &[u8],
+  buffer: &'a mut [MaybeUninit<u8>; N],
+) -> Cow<'a, str> {
+  let is_ascii = bytes.is_ascii();
+  if is_ascii && bytes.len() <= N {
+    // SAFETY: The string is ASCII, so it's valid UTF-8. We know that the
+    // buffer can not be overlapping, as we never expose a &mut to the
+    // v8::ValueViewData buffer.
+    let str = unsafe {
+      std::ptr::copy_nonoverlapping(
+        bytes.as_ptr(),
+        buffer.as_mut_ptr() as *mut u8,
+        bytes.len(),
       );
-      debug_assert!(length == len_utf8);
+      std::str::from_utf8_unchecked(std::slice::from_raw_parts(
+        buffer.as_ptr() as *const u8,
+        bytes.len(),
+      ))
+    };
+    Cow::Borrowed(str)
+  } else if bytes.len() * 2 < N {
+    // SAFETY: The string is Latin1 - we need to convert to UTF-8. But it
+    // is short enough to fit into the buffer, because the buffer is at
+    // least twice as large as the string and any non-ASCII one-byte
+    // character will be encoded as exactly two bytes in UTF-8.
+    let written = unsafe {
+      latin1_to_utf8(
+        bytes.len(),
+        bytes.as_ptr(),
+        buffer.as_mut_ptr() as *mut u8,
+      )
+    };
+    debug_assert!(written <= buffer.len());
 
-      // Return an owned string from this guaranteed now-initialized data
-      let buffer = data as *mut u8;
-      Cow::Owned(std::string::String::from_raw_parts(
-        buffer, length, len_utf8,
+    // SAFETY: The buffer is filled with valid UTF-8 data.
+    let str = unsafe {
+      std::str::from_utf8_unchecked(std::slice::from_raw_parts(
+        buffer.as_ptr() as *const u8,
+        written,
       ))
+    };
+    Cow::Borrowed(str)
+  } else if is_ascii {
+    // Perf: it seems to be faster to check if the string is ASCII first and
+    // then do a memcpy if it is, rather than checking and copying each byte
+    // individually.
+
+    // SAFETY: The string is ASCII, so it's valid UTF-8.
+    Cow::Owned((unsafe { std::str::from_utf8_unchecked(bytes) }).to_owned())
+  } else {
+    // TODO: this could likely be optimized for large strings by using SIMD to
+    // calculate the length of the resulting string and then allocating once,
+    // and then converting the string using SIMD.
+    Cow::Owned(std::string::String::from_utf8_lossy(bytes).into_owned())
+  }
+}
+
+#[inline(always)]
+fn wtf16_to_cow_str<'a, const N: usize>(
+  code_points: &[u16],
+  buffer: &'a mut [MaybeUninit<u8>; N],
+) -> Cow<'a, str> {
+  if code_points.len() >= WTF16_CODE_POINT_LENGTH_CUTOFF_FOR_SIMD
+    && simdutf::validate_utf16(code_points)
+  {
+    let len_utf8 = simdutf::utf8_length_from_utf16(code_points);
+
+    let (buffer, owned) = if buffer.len() >= len_utf8 {
+      (buffer.as_mut_ptr(), false)
+    } else {
+      let buffer = allocate_byte_buffer(len_utf8);
+      (buffer, true)
+    };
+
+    // SAFETY: The buffer is large enough to hold the UTF-8 data.
+    let written = unsafe {
+      simdutf::convert_utf16_to_utf8(
+        code_points.as_ptr(),
+        code_points.len(),
+        buffer as *mut u8,
+      )
+    };
+
+    if owned {
+      // SAFETY: The buffer is filled with valid UTF-8 data.
+      let str = unsafe {
+        std::string::String::from_raw_parts(
+          buffer as *mut u8,
+          written,
+          len_utf8,
+        )
+      };
+      Cow::Owned(str)
+    } else {
+      // SAFETY: The buffer is filled with valid UTF-8 data.
+      let str = unsafe {
+        std::str::from_utf8_unchecked(std::slice::from_raw_parts(
+          buffer as *const u8,
+          written,
+        ))
+      };
+      Cow::Borrowed(str)
+    }
+  } else {
+    let len_utf8 = utf8_length_from_utf16_vectorized(code_points);
+
+    let (buffer, owned) = if buffer.len() >= len_utf8 {
+      (buffer.as_mut_ptr(), false)
+    } else {
+      let buffer = allocate_byte_buffer(len_utf8);
+      (buffer, true)
+    };
+
+    // SAFETY: The buffer is large enough to hold the UTF-8 data.
+    let written =
+      unsafe { wtf16_to_utf8_lossy(code_points, buffer as *mut u8) };
+
+    if owned {
+      // SAFETY: The buffer is filled with valid UTF-8 data.
+      let str = unsafe {
+        std::string::String::from_raw_parts(
+          buffer as *mut u8,
+          written,
+          len_utf8,
+        )
+      };
+      Cow::Owned(str)
+    } else {
+      // SAFETY: The buffer is filled with valid UTF-8 data.
+      let str = unsafe {
+        std::str::from_utf8_unchecked(std::slice::from_raw_parts(
+          buffer as *const u8,
+          written,
+        ))
+      };
+      Cow::Borrowed(str)
+    }
+  }
+}
+
+#[inline(always)]
+fn allocate_byte_buffer(len: usize) -> *mut MaybeUninit<u8> {
+  debug_assert!(len > 0);
+  let layout = std::alloc::Layout::from_size_align(len, 1).unwrap();
+  // SAFETY: The layout is valid.
+  (unsafe { std::alloc::alloc(layout) }) as *mut MaybeUninit<u8>
+}
+
+#[inline(always)]
+fn utf8_length_from_utf16_vectorized(code_points: &[u16]) -> usize {
+  std::char::decode_utf16(code_points.into_iter().copied())
+    .map(|c| c.unwrap_or(std::char::REPLACEMENT_CHARACTER))
+    .map(|c| c.len_utf8())
+    .sum()
+}
+
+/// Expands `inbuf` to `outbuf`, assuming that `outbuf` has at least 2x `input_length`.
+#[inline(always)]
+unsafe fn latin1_to_utf8(
+  input_length: usize,
+  inbuf: *const u8,
+  outbuf: *mut u8,
+) -> usize {
+  let mut output = 0;
+  let mut input = 0;
+  while input < input_length {
+    let char = *(inbuf.add(input));
+    if char < 0x80 {
+      *(outbuf.add(output)) = char;
+      output += 1;
+    } else {
+      // Top two bits
+      *(outbuf.add(output)) = (char >> 6) | 0b1100_0000;
+      // Bottom six bits
+      *(outbuf.add(output + 1)) = (char & 0b0011_1111) | 0b1000_0000;
+      output += 2;
+    }
+    input += 1;
+  }
+  output
+}
+
+#[inline(always)]
+unsafe fn wtf16_to_utf8_lossy(input: &[u16], outbuf: *mut u8) -> usize {
+  let utf8 = std::char::decode_utf16(input.into_iter().copied());
+  let mut output = 0;
+  for c in utf8 {
+    let c = c.unwrap_or(std::char::REPLACEMENT_CHARACTER);
+    let len = c.len_utf8();
+    let code = c as u32;
+    const TAG_TWO_BYTE: u8 = 0xC0;
+    const TAG_THREE_BYTE: u8 = 0xE0;
+    const TAG_FOUR_BYTE: u8 = 0xF0;
+    const TAG_CONT: u8 = 0x80;
+    match len {
+      1 => {
+        *(outbuf.add(output)) = c as u8;
+        output += 1;
+      }
+      2 => {
+        *(outbuf.add(output)) = TAG_TWO_BYTE | ((code >> 6) as u8);
+        *(outbuf.add(output + 1)) = TAG_CONT | ((code & 0x3F) as u8);
+        output += 2;
+      }
+      3 => {
+        *(outbuf.add(output)) = TAG_THREE_BYTE | ((code >> 12) as u8);
+        *(outbuf.add(output + 1)) = TAG_CONT | (((code >> 6) & 0x3F) as u8);
+        *(outbuf.add(output + 2)) = TAG_CONT | ((code & 0x3F) as u8);
+        output += 3;
+      }
+      4 => {
+        *(outbuf.add(output)) = TAG_FOUR_BYTE | ((code >> 18) as u8);
+        *(outbuf.add(output + 1)) = TAG_CONT | (((code >> 12) & 0x3F) as u8);
+        *(outbuf.add(output + 2)) = TAG_CONT | (((code >> 6) & 0x3F) as u8);
+        *(outbuf.add(output + 3)) = TAG_CONT | ((code & 0x3F) as u8);
+        output += 4;
+      }
+      _ => {
+        // SAFETY: We know that the length is 1, 2, 3, or 4.
+        unsafe { unreachable_unchecked() }
+      }
     }
   }
+  output
 }
 
 pub extern "C" fn free_rust_external_onebyte(s: *mut char, len: usize) {
@@ -970,7 +1119,7 @@ pub struct ValueView<'s>(
 impl<'s> ValueView<'s> {
   #[inline(always)]
   pub fn new(isolate: &mut Isolate, string: Local<'s, String>) -> Self {
-    let mut v = std::mem::MaybeUninit::uninit();
+    let mut v: MaybeUninit<ValueView<'_>> = std::mem::MaybeUninit::uninit();
     unsafe {
       v8__String__ValueView__CONSTRUCT(v.as_mut_ptr(), isolate, &*string);
       v.assume_init()

From 5b2734d3c2a133dd86308d3fc6a02c0ed69615b4 Mon Sep 17 00:00:00 2001
From: Luca Casonato <hello@lcas.dev>
Date: Fri, 25 Oct 2024 19:40:20 +0200
Subject: [PATCH 2/3] try

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 66308ef715..899b8d79ec 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -112,7 +112,7 @@ jobs:
           sudo apt install -yq --no-install-suggests --no-install-recommends \
             binfmt-support g++-10-aarch64-linux-gnu g++-10-multilib \
             gcc-10-aarch64-linux-gnu libc6-arm64-cross qemu qemu-user \
-            qemu-user-binfmt
+            qemu-user-binfmt aarch64-linux-gnu-g++
 
           sudo ln -s /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 \
                      /lib/ld-linux-aarch64.so.1

From 1eba6105459a46c8160137c660d3a7cb6a4c9cfd Mon Sep 17 00:00:00 2001
From: Luca Casonato <hello@lcas.dev>
Date: Mon, 28 Oct 2024 15:32:17 +0100
Subject: [PATCH 3/3] add method to ValueView

---
 src/string.rs | 77 +++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 75 insertions(+), 2 deletions(-)

diff --git a/src/string.rs b/src/string.rs
index 6ee1183e75..4b60960f08 100644
--- a/src/string.rs
+++ b/src/string.rs
@@ -778,7 +778,8 @@ impl String {
     }
   }
 
-  /// Converts a [`crate::String`] to either an owned [`std::string::String`], or a borrowed [`str`], depending on whether it fits into the
+  /// Converts a [`crate::String`] to either an owned [`std::string::String`],
+  /// or a borrowed [`str`], depending on whether it fits into the
   /// provided buffer.
   pub fn to_rust_cow_lossy<'a, const N: usize>(
     &self,
@@ -789,7 +790,9 @@ impl String {
     let string = unsafe { Local::from_raw(self).unwrap_unchecked() };
     let view = ValueView::new(scope, string);
     match view.data() {
-      ValueViewData::OneByte(bytes) => latin1_to_cow_str(bytes, buffer),
+      ValueViewData::OneByte(bytes) => {
+        latin1_to_cow_str_always_copy(bytes, buffer)
+      }
       ValueViewData::TwoByte(code_points) => {
         wtf16_to_cow_str(code_points, buffer)
       }
@@ -861,6 +864,44 @@ fn wtf16_to_string(code_points: &[u16]) -> std::string::String {
 
 #[inline(always)]
 fn latin1_to_cow_str<'a, const N: usize>(
+  bytes: &'a [u8],
+  buffer: &'a mut [MaybeUninit<u8>; N],
+) -> Cow<'a, str> {
+  if bytes.is_ascii() {
+    // SAFETY: The string is ASCII, so it's valid UTF-8.
+    Cow::Borrowed(unsafe { std::str::from_utf8_unchecked(bytes) })
+  } else if bytes.len() * 2 < N {
+    // SAFETY: The string is Latin1 - we need to convert to UTF-8. But it
+    // is short enough to fit into the buffer, because the buffer is at
+    // least twice as large as the string and any non-ASCII one-byte
+    // character will be encoded as exactly two bytes in UTF-8.
+    let written = unsafe {
+      latin1_to_utf8(
+        bytes.len(),
+        bytes.as_ptr(),
+        buffer.as_mut_ptr() as *mut u8,
+      )
+    };
+    debug_assert!(written <= buffer.len());
+
+    // SAFETY: The buffer is filled with valid UTF-8 data.
+    let str = unsafe {
+      std::str::from_utf8_unchecked(std::slice::from_raw_parts(
+        buffer.as_ptr() as *const u8,
+        written,
+      ))
+    };
+    Cow::Borrowed(str)
+  } else {
+    // TODO: this could likely be optimized for large strings by using SIMD to
+    // calculate the length of the resulting string and then allocating once,
+    // and then converting the string using SIMD.
+    Cow::Owned(std::string::String::from_utf8_lossy(bytes).into_owned())
+  }
+}
+
+#[inline(always)]
+fn latin1_to_cow_str_always_copy<'a, const N: usize>(
   bytes: &[u8],
   buffer: &'a mut [MaybeUninit<u8>; N],
 ) -> Cow<'a, str> {
@@ -1145,3 +1186,35 @@ impl<'s> Drop for ValueView<'s> {
     unsafe { v8__String__ValueView__DESTRUCT(self) }
   }
 }
+
+impl ValueView<'_> {
+  /// Creates a copy of a [`ValueView`] in a [`std::string::String`].
+  /// Convenience function not present in the original V8 API.
+  pub fn to_rust_string_lossy(&self) -> std::string::String {
+    match self.data() {
+      ValueViewData::OneByte(bytes) => latin1_to_string(bytes),
+      ValueViewData::TwoByte(code_points) => wtf16_to_string(code_points),
+    }
+  }
+
+  /// Converts a [`ValueView`] to either an owned [`std::string::String`],
+  /// or a borrowed [`str`].
+  ///
+  /// If the [`ValueView`] is an ASCII one-byte string, a reference to the
+  /// string is returned and no copies are performed. If the string is not
+  /// ASCII, but fits into the provided buffer, it is copied into the buffer
+  /// and a reference to the buffer is returned. If the string does not fit
+  /// into the buffer, it is copied into a newly allocated
+  /// [`std::string::String`] and returned.
+  pub fn to_rust_cow_lossy<'a, const N: usize>(
+    &'a self,
+    buffer: &'a mut [MaybeUninit<u8>; N],
+  ) -> Cow<'a, str> {
+    match self.data() {
+      ValueViewData::OneByte(bytes) => latin1_to_cow_str(bytes, buffer),
+      ValueViewData::TwoByte(code_points) => {
+        wtf16_to_cow_str(code_points, buffer)
+      }
+    }
+  }
+}