Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
* Fixes oven-sh#1675

* Add fallback for Bun.write

* Update blob.zig

* Fix test

---------

Co-authored-by: Jarred Sumner <[email protected]>
Jarred-Sumner and Jarred-Sumner authored Aug 21, 2023
1 parent 3de9ce5 commit f75b949
Showing 8 changed files with 297 additions and 70 deletions.
154 changes: 88 additions & 66 deletions src/bun.js/node/node_fs.zig
Original file line number Diff line number Diff line change
@@ -3129,6 +3129,77 @@ pub const NodeFS = struct {
return .{ .err = Syscall.Error.todo };
}

// since we use a 64 KB stack buffer, we should not let this function get inlined
pub noinline fn copyFileUsingReadWriteLoop(src: [:0]const u8, dest: [:0]const u8, src_fd: FileDescriptor, dest_fd: FileDescriptor, stat_size: usize, wrote: *u64) Maybe(Return.CopyFile) {
var stack_buf: [64 * 1024]u8 = undefined;
var buf_to_free: []u8 = &[_]u8{};
var buf: []u8 = &stack_buf;

maybe_allocate_large_temp_buf: {
if (stat_size > stack_buf.len * 16) {
// Don't allocate more than 8 MB at a time
const clamped_size: usize = @min(stat_size, 8 * 1024 * 1024);

var buf_ = bun.default_allocator.alloc(u8, clamped_size) catch break :maybe_allocate_large_temp_buf;
buf = buf_;
buf_to_free = buf_;
}
}

defer {
if (buf_to_free.len > 0) bun.default_allocator.free(buf_to_free);
}

var remain = @as(u64, @intCast(@max(stat_size, 0)));
toplevel: while (remain > 0) {
const amt = switch (Syscall.read(src_fd, buf[0..@min(buf.len, remain)])) {
.result => |result| result,
.err => |err| return Maybe(Return.CopyFile){ .err = if (src.len > 0) err.withPath(src) else err },
};
// 0 == EOF
if (amt == 0) {
break :toplevel;
}
wrote.* += amt;
remain -|= amt;

var slice = buf[0..amt];
while (slice.len > 0) {
const written = switch (Syscall.write(dest_fd, slice)) {
.result => |result| result,
.err => |err| return Maybe(Return.CopyFile){ .err = if (dest.len > 0) err.withPath(dest) else err },
};
if (written == 0) break :toplevel;
slice = slice[written..];
}
} else {
outer: while (true) {
const amt = switch (Syscall.read(src_fd, buf)) {
.result => |result| result,
.err => |err| return Maybe(Return.CopyFile){ .err = if (src.len > 0) err.withPath(src) else err },
};
// we don't know the size
// so we just go forever until we get an EOF
if (amt == 0) {
break;
}
wrote.* += amt;

var slice = buf[0..amt];
while (slice.len > 0) {
const written = switch (Syscall.write(dest_fd, slice)) {
.result => |result| result,
.err => |err| return Maybe(Return.CopyFile){ .err = if (dest.len > 0) err.withPath(dest) else err },
};
slice = slice[written..];
if (written == 0) break :outer;
}
}
}

return Maybe(Return.CopyFile).success;
}

/// https://github.com/libuv/libuv/pull/2233
/// https://github.com/pnpm/pnpm/issues/2761
/// https://github.com/libuv/libuv/pull/2578
@@ -3191,65 +3262,11 @@ pub const NodeFS = struct {
};
defer {
_ = std.c.ftruncate(dest_fd, @as(std.c.off_t, @intCast(@as(u63, @truncate(wrote)))));
_ = C.fchmod(dest_fd, stat_.mode);
_ = Syscall.close(dest_fd);
}

// stack buffer of 16 KB
// this code path isn't hit unless the buffer is < 128 KB
// 16 writes is ok
// 16 KB is high end of what is okay to use for stack space
// good thing we ask for absurdly large stack sizes
var buf: [16384]u8 = undefined;
var remain = @as(u64, @intCast(@max(stat_.size, 0)));
toplevel: while (remain > 0) {
const amt = switch (Syscall.read(src_fd, buf[0..@min(buf.len, remain)])) {
.result => |result| result,
.err => |err| return Maybe(Return.CopyFile){ .err = err.withPath(src) },
};
// 0 == EOF
if (amt == 0) {
break :toplevel;
}
wrote += amt;
remain -|= amt;

var slice = buf[0..amt];
while (slice.len > 0) {
const written = switch (Syscall.write(dest_fd, slice)) {
.result => |result| result,
.err => |err| return Maybe(Return.CopyFile){ .err = err.withPath(dest) },
};
if (written == 0) break :toplevel;
slice = slice[written..];
}
} else {
outer: while (true) {
const amt = switch (Syscall.read(src_fd, &buf)) {
.result => |result| result,
.err => |err| return Maybe(Return.CopyFile){ .err = err.withPath(src) },
};
// we don't know the size
// so we just go forever until we get an EOF
if (amt == 0) {
break;
}
wrote += amt;

var slice = buf[0..amt];
while (slice.len > 0) {
const written = switch (Syscall.write(dest_fd, slice)) {
.result => |result| result,
.err => |err| return Maybe(Return.CopyFile){ .err = err.withPath(dest) },
};
slice = slice[written..];
if (written == 0) break :outer;
}
}
}
// can't really do anything with this error
_ = C.fchmod(dest_fd, stat_.mode);

return ret.success;
return copyFileUsingReadWriteLoop(src, dest, src_fd, dest_fd, @intCast(@max(stat_.size, 0)), &wrote);
}
}

@@ -3298,27 +3315,32 @@ pub const NodeFS = struct {
.err => |err| return Maybe(Return.CopyFile){ .err = err },
};

var size = @as(usize, @intCast(@max(stat_.size, 0)));
var size: usize = @intCast(@max(stat_.size, 0));

defer {
_ = linux.ftruncate(dest_fd, @as(i64, @intCast(@as(u63, @truncate(wrote)))));
_ = linux.fchmod(dest_fd, stat_.mode);
_ = Syscall.close(dest_fd);
}

var off_in_copy = @as(i64, @bitCast(@as(u64, 0)));
var off_out_copy = @as(i64, @bitCast(@as(u64, 0)));

if (!bun.canUseCopyFileRangeSyscall()) {
return copyFileUsingReadWriteLoop(src, dest, src_fd, dest_fd, size, &wrote);
}

if (size == 0) {
// copy until EOF
while (true) {

// Linux Kernel 5.3 or later
const written = linux.copy_file_range(src_fd, &off_in_copy, dest_fd, &off_out_copy, std.mem.page_size, 0);
if (ret.errnoSysP(written, .copy_file_range, dest)) |err| {
// TODO: handle EXDEV
// seems like zfs does not support copy_file_range across devices
// see https://discord.com/channels/876711213126520882/876711213126520885/1006465112707698770
return err;
return switch (err.getErrno()) {
.XDEV, .NOSYS => copyFileUsingReadWriteLoop(src, dest, src_fd, dest_fd, size, &wrote),
else => return err,
};
}
// wrote zero bytes means EOF
if (written == 0) break;
@@ -3329,18 +3351,18 @@ pub const NodeFS = struct {
// Linux Kernel 5.3 or later
const written = linux.copy_file_range(src_fd, &off_in_copy, dest_fd, &off_out_copy, size, 0);
if (ret.errnoSysP(written, .copy_file_range, dest)) |err| {
// TODO: handle EXDEV
// seems like zfs does not support copy_file_range across devices
// see https://discord.com/channels/876711213126520882/876711213126520885/1006465112707698770
return err;
return switch (err.getErrno()) {
.XDEV, .NOSYS => copyFileUsingReadWriteLoop(src, dest, src_fd, dest_fd, size, &wrote),
else => return err,
};
}
// wrote zero bytes means EOF
if (written == 0) break;
wrote +|= written;
size -|= written;
}
}
_ = linux.fchmod(dest_fd, stat_.mode);

return ret.success;
}
},
48 changes: 47 additions & 1 deletion src/bun.js/webcore/blob.zig
Original file line number Diff line number Diff line change
@@ -2134,7 +2134,8 @@ pub const Blob = struct {
this.read_off += this.offset;

var remain = @as(usize, this.max_length);
if (remain == max_size or remain == 0) {
const unknown_size = remain == max_size or remain == 0;
if (unknown_size) {
// sometimes stat lies
// let's give it 4096 and see how it goes
remain = 4096;
@@ -2150,6 +2151,21 @@ pub const Blob = struct {

var has_unset_append = false;

// If they can't use copy_file_range, they probably also can't
// use sendfile() or splice()
if (!bun.canUseCopyFileRangeSyscall()) {
switch (JSC.Node.NodeFS.copyFileUsingReadWriteLoop("", "", src_fd, dest_fd, if (unknown_size) 0 else remain, &total_written)) {
.err => |err| {
this.system_error = err.toSystemError();
return AsyncIO.asError(err.errno);
},
.result => {
_ = linux.ftruncate(dest_fd, @as(std.os.off_t, @intCast(total_written)));
return;
},
}
}

while (true) {
const written = switch (comptime use) {
.copy_file_range => linux.copy_file_range(src_fd, null, dest_fd, null, remain, 0),
@@ -2160,6 +2176,19 @@ pub const Blob = struct {
switch (linux.getErrno(written)) {
.SUCCESS => {},

.NOSYS, .XDEV => {
switch (JSC.Node.NodeFS.copyFileUsingReadWriteLoop("", "", src_fd, dest_fd, if (unknown_size) 0 else remain, &total_written)) {
.err => |err| {
this.system_error = err.toSystemError();
return AsyncIO.asError(err.errno);
},
.result => {
_ = linux.ftruncate(dest_fd, @as(std.os.off_t, @intCast(total_written)));
return;
},
}
},

.INVAL => {
if (comptime clear_append_if_invalid) {
if (!has_unset_append) {
@@ -2175,6 +2204,23 @@ pub const Blob = struct {
}
}

// If the Linux machine doesn't support
// copy_file_range or the file descrpitor is
// incompatible with the chosen syscall, fall back
// to a read/write loop
if (total_written == 0) {
switch (JSC.Node.NodeFS.copyFileUsingReadWriteLoop("", "", src_fd, dest_fd, if (unknown_size) 0 else remain, &total_written)) {
.err => |err| {
this.system_error = err.toSystemError();
return AsyncIO.asError(err.errno);
},
.result => {
_ = linux.ftruncate(dest_fd, @as(std.os.off_t, @intCast(total_written)));
return;
},
}
}

this.system_error = (JSC.Node.Syscall.Error{
.errno = @as(JSC.Node.Syscall.Error.Int, @intCast(@intFromEnum(linux.E.INVAL))),
.syscall = TryWith.tag.get(use).?,
1 change: 1 addition & 0 deletions src/bun.zig
Original file line number Diff line number Diff line change
@@ -873,6 +873,7 @@ pub fn FDHashMap(comptime Type: type) type {

const CopyFile = @import("./copy_file.zig");
pub const copyFileRange = CopyFile.copyFileRange;
pub const canUseCopyFileRangeSyscall = CopyFile.canUseCopyFileRangeSyscall;
pub const copyFile = CopyFile.copyFile;

pub fn parseDouble(input: []const u8) !f64 {
9 changes: 8 additions & 1 deletion src/copy_file.zig
Original file line number Diff line number Diff line change
@@ -67,9 +67,16 @@ pub fn copyFile(fd_in: os.fd_t, fd_out: os.fd_t) CopyFileError!void {
const Platform = @import("root").bun.analytics.GenerateHeader.GeneratePlatform;

var can_use_copy_file_range = std.atomic.Atomic(i32).init(0);
fn canUseCopyFileRangeSyscall() bool {
pub fn canUseCopyFileRangeSyscall() bool {
const result = can_use_copy_file_range.load(.Monotonic);
if (result == 0) {
// This flag mostly exists to make other code more easily testable.
if (bun.getenvZ("BUN_CONFIG_DISABLE_COPY_FILE_RANGE") != null) {
bun.Output.debug("copy_file_range is disabled by BUN_CONFIG_DISABLE_COPY_FILE_RANGE", .{});
can_use_copy_file_range.store(-1, .Monotonic);
return false;
}

const kernel = Platform.kernelVersion();
if (kernel.orderWithoutTag(.{ .major = 4, .minor = 5 }).compare(.gte)) {
bun.Output.debug("copy_file_range is supported", .{});
1 change: 1 addition & 0 deletions test/js/bun/io/bun-write-exdev-fixture.js
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
await Bun.write(Bun.file(process.argv.at(-1)), Bun.file(process.argv.at(-2)));
78 changes: 76 additions & 2 deletions test/js/bun/io/bun-write.test.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import fs from "fs";
import fs, { mkdirSync } from "fs";
import { it, expect, describe } from "bun:test";
import path from "path";
import path, { join } from "path";
import { gcTick, withoutAggressiveGC, bunExe, bunEnv } from "harness";
import { tmpdir } from "os";

@@ -307,3 +307,77 @@ it("#2674", async () => {
expect(error?.length).toBeFalsy();
expect(exitCode).toBe(0);
});

if (process.platform === "linux") {
describe("should work when copyFileRange is not available", () => {
it("on large files", () => {
var tempdir = `${tmpdir()}/fs.test.js/${Date.now()}-1/bun-write/large`;
expect(fs.existsSync(tempdir)).toBe(false);
expect(tempdir.includes(mkdirSync(tempdir, { recursive: true }))).toBe(true);
var buffer = new Int32Array(1024 * 1024 * 64);
for (let i = 0; i < buffer.length; i++) {
buffer[i] = i % 256;
}

const hash = Bun.hash(buffer.buffer);
const src = join(tempdir, "Bun.write.src.blob");
const dest = join(tempdir, "Bun.write.dest.blob");

try {
fs.writeFileSync(src, buffer.buffer);

expect(fs.existsSync(dest)).toBe(false);

const { exitCode } = Bun.spawnSync({
stdio: ["inherit", "inherit", "inherit"],
cmd: [bunExe(), join(import.meta.dir, "./bun-write-exdev-fixture.js"), src, dest],
env: {
...bunEnv,
BUN_CONFIG_DISABLE_COPY_FILE_RANGE: "1",
},
});
expect(exitCode).toBe(0);

expect(Bun.hash(fs.readFileSync(dest))).toBe(hash);
} finally {
fs.rmSync(src, { force: true });
fs.rmSync(dest, { force: true });
}
});

it("on small files", () => {
const tempdir = `${tmpdir()}/fs.test.js/${Date.now()}-1/bun-write/small`;
expect(fs.existsSync(tempdir)).toBe(false);
expect(tempdir.includes(mkdirSync(tempdir, { recursive: true }))).toBe(true);
var buffer = new Int32Array(1 * 1024);
for (let i = 0; i < buffer.length; i++) {
buffer[i] = i % 256;
}

const hash = Bun.hash(buffer.buffer);
const src = join(tempdir, "Bun.write.src.blob");
const dest = join(tempdir, "Bun.write.dest.blob");

try {
fs.writeFileSync(src, buffer.buffer);

expect(fs.existsSync(dest)).toBe(false);

const { exitCode } = Bun.spawnSync({
stdio: ["inherit", "inherit", "inherit"],
cmd: [bunExe(), join(import.meta.dir, "./bun-write-exdev-fixture.js"), src, dest],
env: {
...bunEnv,
BUN_CONFIG_DISABLE_COPY_FILE_RANGE: "1",
},
});
expect(exitCode).toBe(0);

expect(Bun.hash(fs.readFileSync(dest))).toBe(hash);
} finally {
fs.rmSync(src, { force: true });
fs.rmSync(dest, { force: true });
}
});
});
}
Loading

0 comments on commit f75b949

Please sign in to comment.