Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Improve File.Copy by using the block copy operation for ReFS on Windows #88695

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,43 @@ internal static partial class Kernel32
// https://docs.microsoft.com/windows-hardware/drivers/ddi/ntddstor/ni-ntddstor-ioctl_storage_read_capacity
internal const int IOCTL_STORAGE_READ_CAPACITY = 0x002D5140;

// https://learn.microsoft.com/windows/win32/api/winioctl/ni-winioctl-fsctl_set_sparse
internal const int FSCTL_SET_SPARSE = 0x000900c4;
internal struct FILE_SET_SPARSE_BUFFER
hamarb123 marked this conversation as resolved.
Show resolved Hide resolved
{
internal int SetSparse;
hamarb123 marked this conversation as resolved.
Show resolved Hide resolved
}

// https://learn.microsoft.com/windows/win32/api/winioctl/ni-winioctl-fsctl_get_integrity_information
internal const int FSCTL_GET_INTEGRITY_INFORMATION = 0x0009027C;
internal struct FSCTL_GET_INTEGRITY_INFORMATION_BUFFER
{
internal ushort ChecksumAlgorithm;
internal ushort Reserved;
internal uint Flags;
internal uint ChecksumChunkSizeInBytes;
internal uint ClusterSizeInBytes;
}

// https://learn.microsoft.com/windows/win32/api/winioctl/ni-winioctl-fsctl_set_integrity_information
internal const int FSCTL_SET_INTEGRITY_INFORMATION = 0x0009C280;
internal struct FSCTL_SET_INTEGRITY_INFORMATION_BUFFER
{
internal ushort ChecksumAlgorithm;
internal ushort Reserved;
internal uint Flags;
}

// https://learn.microsoft.com/windows/win32/api/winioctl/ni-winioctl-fsctl_duplicate_extents_to_file
internal const int FSCTL_DUPLICATE_EXTENTS_TO_FILE = 0x00098344;
internal struct DUPLICATE_EXTENTS_DATA
{
internal IntPtr FileHandle;
internal long SourceFileOffset;
internal long TargetFileOffset;
internal long ByteCount;
}

[LibraryImport(Libraries.Kernel32, EntryPoint = "DeviceIoControl", SetLastError = true)]
[return: MarshalAs(UnmanagedType.Bool)]
internal static unsafe partial bool DeviceIoControl(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

internal static partial class Interop
{
internal static partial class Kernel32
{
// From FILE_INFO_BY_HANDLE_CLASS
// Use for SetFileInformationByHandle
internal const int FileDispositionInfo = 4;

internal struct FILE_DISPOSITION_INFO
{
internal int DeleteFile;
hamarb123 marked this conversation as resolved.
Show resolved Hide resolved
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ internal static partial class FileAttributes
internal const int FILE_ATTRIBUTE_NORMAL = 0x00000080;
internal const int FILE_ATTRIBUTE_READONLY = 0x00000001;
internal const int FILE_ATTRIBUTE_DIRECTORY = 0x00000010;
internal const int FILE_ATTRIBUTE_SPARSE_FILE = 0x00000200;
internal const int FILE_ATTRIBUTE_REPARSE_POINT = 0x00000400;
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Runtime.InteropServices;

internal static partial class Interop
{
internal static partial class Kernel32
{
[LibraryImport(Libraries.Kernel32, EntryPoint = "GetDiskFreeSpaceW", SetLastError = true, StringMarshalling = StringMarshalling.Utf16)]
[return: MarshalAs(UnmanagedType.Bool)]
internal static partial bool GetDiskFreeSpace(string drive, out int sectorsPerCluster, out int bytesPerSector, out int numberOfFreeClusters, out int totalNumberOfClusters);
hamarb123 marked this conversation as resolved.
Show resolved Hide resolved
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,6 @@ internal static unsafe partial bool GetVolumeInformation(
int fileSystemNameBufLen);

internal const uint FILE_SUPPORTS_ENCRYPTION = 0x00020000;
internal const uint FILE_SUPPORTS_BLOCK_REFCOUNTING = 0x08000000;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.IO;
using System.Runtime.InteropServices;
using System.Text;
using Microsoft.Win32.SafeHandles;

internal static partial class Interop
{
internal static partial class Kernel32
{
[LibraryImport(Libraries.Kernel32, EntryPoint = "GetVolumeInformationByHandleW", SetLastError = true, StringMarshalling = StringMarshalling.Utf16)]
[return: MarshalAs(UnmanagedType.Bool)]
private static unsafe partial bool GetVolumeInformationByHandleW(
SafeFileHandle hFile,
char* volumeName,
int volumeNameBufLen,
hamarb123 marked this conversation as resolved.
Show resolved Hide resolved
int* volSerialNumber,
int* maxFileNameLen,
int* fileSystemFlags,
char* fileSystemName,
int fileSystemNameBufLen);

public static unsafe int GetVolumeInformationByHandle(
SafeFileHandle hFile,
out string? volumePath,
bool wantsVolumePath,
int* volSerialNumber,
int* maxFileNameLen,
out int fileSystemFlags,
char* fileSystemName,
int fileSystemNameBufLen)
{
// Allocate output buffer on the stack initially.
const int stackAllocation = 512;
Span<char> volumePathBuffer = wantsVolumePath ? stackalloc char[stackAllocation + 1] : stackalloc char[0]; // +1 to ensure a \0 at the end, todo: is this necessary
hamarb123 marked this conversation as resolved.
Show resolved Hide resolved
hamarb123 marked this conversation as resolved.
Show resolved Hide resolved
int bufferSize = stackAllocation;

// Loop until the buffer's big enough.
while (true)
{
fixed (char* lpszVolumePathName = volumePathBuffer)
{
// Try to get the volume name, will succeed if the buffer's big enough.
fixed (int* pFileSystemFlags = &fileSystemFlags)
{
if (GetVolumeInformationByHandleW(hFile, lpszVolumePathName, Math.Max(volumePathBuffer.Length - 1, 0), volSerialNumber, maxFileNameLen, pFileSystemFlags, fileSystemName, fileSystemNameBufLen))
{
if (wantsVolumePath) volumePath = new string(lpszVolumePathName);
else volumePath = null;
return 0;
}
}

// Check if the error was that the buffer is not large enough.
int error = Marshal.GetLastWin32Error();
if (wantsVolumePath && error == Interop.Errors.ERROR_INSUFFICIENT_BUFFER) //todo: check this is the correct error
{
// Create a new buffer and try again.
// todo: use array pool and check for overflow
volumePathBuffer = new char[bufferSize *= 2];
hamarb123 marked this conversation as resolved.
Show resolved Hide resolved
continue;
}
else
{
// Return our error.
volumePath = null;
return error;
}
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.IO;
using System.Runtime.InteropServices;
using System.Text;

internal static partial class Interop
{
internal static partial class Kernel32
{
[LibraryImport(Libraries.Kernel32, EntryPoint = "GetVolumePathNameW", SetLastError = true, StringMarshalling = StringMarshalling.Utf16)]
[return: MarshalAs(UnmanagedType.Bool)]
private static unsafe partial bool _GetVolumePathName(char* lpszFileName, char* lpszVolumePathName, int cchBufferLength);
hamarb123 marked this conversation as resolved.
Show resolved Hide resolved

public static unsafe int GetVolumePathName(string fileName, out string? volumePath)
{
fileName = PathInternal.EnsureExtendedPrefixIfNeeded(fileName); //todo: unsure if this is needed for this API
fixed (char* lpszFileName = fileName)
{
// Allocate output buffer on the stack initially.
const int stackAllocation = 512;
Span<char> volumePathBuffer = stackalloc char[stackAllocation + 1]; // +1 to ensure a \0 at the end, todo: is this necessary
int bufferSize = stackAllocation;

// Loop until the buffer's big enough.
while (true)
{
fixed (char* lpszVolumePathName = volumePathBuffer)
{
// Try to get the volume name, will succeed if the buffer's big enough.
if (_GetVolumePathName(lpszFileName, lpszVolumePathName, volumePathBuffer.Length - 1))
{
volumePath = new string(lpszVolumePathName);
return 0;
}

// Check if the error was that the buffer is not large enough.
int error = Marshal.GetLastWin32Error();
hamarb123 marked this conversation as resolved.
Show resolved Hide resolved
if (error == Interop.Errors.ERROR_INSUFFICIENT_BUFFER) //todo: check this is the correct error
{
// Create a new buffer and try again.
// todo: use array pool and check for overflow
volumePathBuffer = new char[bufferSize *= 2];
hamarb123 marked this conversation as resolved.
Show resolved Hide resolved
continue;
}
else
{
// Return our error.
volumePath = null;
return error;
}
}
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using Microsoft.Win32.SafeHandles;
using System.Runtime.InteropServices;

internal static partial class Interop
{
internal static partial class Kernel32
{
[LibraryImport(Libraries.Kernel32, SetLastError = true)]
[return: MarshalAs(UnmanagedType.Bool)]
internal static partial bool SetEndOfFile(SafeFileHandle hFile);
}
}
88 changes: 88 additions & 0 deletions src/libraries/System.IO.FileSystem/tests/File/Copy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

using System.Collections.Generic;
using System.Linq;
using System.Runtime.InteropServices;
using System.Security.Cryptography;
using Xunit;

Expand Down Expand Up @@ -418,5 +419,92 @@ public void EnsureThrowWhenCopyToNonSharedFile()
using var stream = new FileStream(file1, FileMode.Open, FileAccess.Read, FileShare.None);
Assert.Throws<IOException>(() => File.Copy(file2, file1, overwrite: true));
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to make it so tests (at least some of them) run on a virtual ReFS drive also? That would be ideal, since it runs in the temporary directory currently, and thus we would never be checking if the code works on ReFS in CI or when running it locally. It would also be useful for other OSes like macOS if we could to this, so we can check logic there properly also.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the issue here "can we make tests temporarily create a ReFS volume"? I assume one could call Windows API to try to shrink the current volume and create a ReFS volume. Another alternative that's not entirely unreasonable is to create a test that needs manual setup. There are some manual console tests for example. Those only run on a developer's machine, and presumably only when they change relevant code.

On Mac/Linux possibly they could be created in memory? For example, here's a manual test we have that creates an EXFAT partition in memory:

sudo mkdir /mnt/ramdisk
sudo mount -t ramfs ramfs /mnt/ramdisk
sudo dd if=/dev/zero of=/mnt/ramdisk/exfat.image bs=1M count=512
sudo mkfs.exfat /mnt/ramdisk/exfat.image
sudo mkdir /mnt/exfatrd
sudo mount -o loop /mnt/ramdisk/exfat.image /mnt/exfatrd

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the issue here "can we make tests temporarily create a ReFS volume"?

It's not just that. I think it would be good to run some of these tests in CI automatically (and potentially locally also). Would that be possible? It would also be useful to test exFAT stuff properly on macOS for example. My understanding is that this stuff is not really tested currently, which isn't great imo.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we could potentially ask infra folks to set up a ReFS partition on the machines. Or we could try and do it at execution time, with shrinking as mentioned (seems slow though)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but I'd defer worrying about that until we have code we know is good to go...

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See the pipeline YAML and unit tests for https://github.com/microsoft/CopyOnWrite - there's a pattern there for (a) a dev to specify an env var pointing to a ReFS volume to test on, and (b) when running as admin, create a ReFS VHD and run tests on it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it helps, our test automation always runs as admin. Except potentially on a dev machine.
We conventionally mark tests as outer loop if they are potentially disruptive/messy/slow like creating a VHD likely is.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I reached out to internal eng folks to ask how they'd feel about either updating images to have a ReFS partition, or allowing us to resize as in the yml above.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another idea for the VHD would be to use the dotnet/runtime-assets repo to store the vhd file, which would at least cut the cost of creating the VHD and only leave the cost of mounting and unmounting it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this test, and the test about checking it's actually cloned should be moved to a new issue. That way we could add it for macOS at the same time.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We would ideally have a test to ensure it actually does a block copy operation on ReFS. Does anyone know what APIs you can call to check this on Windows?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You'd have to ask someone like @erikmav ? We don't in general have special knowledge of ReFS.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

e.g. on macOS, there is fcntl with F_LOG2PHYS or F_LOG2PHYS_EXT, which can be used to check this.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've been poking around looking for this as e.g. a mode under fsutil. Still looking or will ask around.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if you have a small test VHD and attempt to duplicate a file that is so large it already occupies more than half of the disk.

[ConditionalTheory(typeof(MountHelper), nameof(MountHelper.CanCreateSymbolicLinks)),
InlineData("", ""),
/*InlineData(":a", ""),*/
InlineData("", ":a")/*,
InlineData(":a", ":a")*/]
//todo: is copying from an ADS meant to fail?
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does anyone know if this is meant to fail. And if so, why?
Specifically, copying from an ADS fails, even today. But copying onto one doesn't for some reason.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JeremyKuhne maybe knows

BTW randomly linking here to the old issue requesting we support alternate data streams. #49604 Not currently planned.

Copy link
Contributor Author

@hamarb123 hamarb123 Jul 12, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do seem to currently "support" ADS btw. Most APIs work fine with them. Specifically copying from one doesn't seem to work, but you seem to be able to read and write to them fine as if they were files for example. I would think if we didn't support them, it would throw an exception instead for all operations (since it's pretty easy to detect), as opposed to only some of them.

That API would be more useful on platforms like macOS where it's done differently (ie. there isn't really a filename you can give the ADS). It would also be useful on Windows probably, but you can already do a number of things with them with .NET already on Windows.

[PlatformSpecific(TestPlatforms.Windows)]
public void WindowsAlternateDataStreamSymlinkTest(string stream1, string stream2)
{
// This test checks copying all combinations of alternate data streams with all combinations of symlinks referencing them.
// This test exists to check we don't cause a BSOD when using ReFS block copy operation on alternative data streams (pending? rolled out fix from Windows team), and that it has the correct behaviour.

string sourceFile = GetTestFilePath();
string destFile = GetTestFilePath();

void Test(string src, string dst)
{
try
{
File.WriteAllText(sourceFile, "abc");
File.WriteAllText(destFile, "def");
File.WriteAllText(sourceFile + stream1, "ghi");
File.WriteAllText(destFile + stream2, "jkl");

File.Copy(src, dst, true);

if (stream1 != "") Assert.Equal("abc", File.ReadAllText(sourceFile));
if (stream2 != "") Assert.Equal("def", File.ReadAllText(destFile));
hamarb123 marked this conversation as resolved.
Show resolved Hide resolved
Assert.Equal("ghi", File.ReadAllText(sourceFile + stream1));
Assert.Equal("ghi", File.ReadAllText(destFile + stream2));
}
catch (Exception ex)
{
throw new Exception($"Failed with src={src}, dst={dst}.", ex);
}
}

File.CreateSymbolicLink(sourceFile + ".link", sourceFile + stream1);
File.CreateSymbolicLink(destFile + ".link", destFile + stream2);

Test(sourceFile + stream1, destFile + stream2);
Test(sourceFile + stream1, destFile + ".link");
Test(sourceFile + ".link", destFile + stream2);
Test(sourceFile + ".link", destFile + ".link");
}

[Fact]
[PlatformSpecific(TestPlatforms.Windows)]
public unsafe void WindowsCheckSparseness()
{
string sourceFile = GetTestFilePath();
string destFile = GetTestFilePath();

File.WriteAllText(sourceFile, "abc");
File.WriteAllText(destFile, "def");

Assert.True((File.GetAttributes(sourceFile) & FileAttributes.SparseFile) == 0);
File.Copy(sourceFile, destFile, true);
Assert.True((File.GetAttributes(destFile) & FileAttributes.SparseFile) == 0);
Assert.Equal("abc", File.ReadAllText(sourceFile));

using (FileStream file = File.Open(sourceFile, FileMode.Open))
{
DeviceIoControl(file.SafeFileHandle.DangerousGetHandle(), /*FSCTL_SET_SPARSE*/ 0x000900c4, null, 0, null, 0, out _, 0);
hamarb123 marked this conversation as resolved.
Show resolved Hide resolved
}
File.WriteAllText(destFile, "def");

Assert.True((File.GetAttributes(sourceFile) & FileAttributes.SparseFile) != 0);
File.Copy(sourceFile, destFile, true);
Assert.True((File.GetAttributes(destFile) & FileAttributes.SparseFile) != 0);
Assert.Equal("abc", File.ReadAllText(sourceFile));

[DllImport("kernel32.dll", EntryPoint = "DeviceIoControl", SetLastError = true)]
[return: MarshalAs(UnmanagedType.Bool)]
static unsafe extern bool DeviceIoControl(
hamarb123 marked this conversation as resolved.
Show resolved Hide resolved
IntPtr hDevice,
uint dwIoControlCode,
void* lpInBuffer,
uint nInBufferSize,
void* lpOutBuffer,
uint nOutBufferSize,
out uint lpBytesReturned,
IntPtr lpOverlapped);
}

// Todo: add a way to run all these on ReFS, and a test to check we actually cloned the reference, not just the data on ReFS.
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1666,6 +1666,9 @@
<Compile Include="$(CommonPath)Interop\Windows\Kernel32\Interop.FILE_ALLOCATION_INFO.cs">
<Link>Common\Interop\Windows\Kernel32\Interop.FILE_ALLOCATION_INFO.cs</Link>
</Compile>
<Compile Include="$(CommonPath)Interop\Windows\Kernel32\Interop.FILE_DISPOSITION_INFO.cs">
<Link>Common\Interop\Windows\Kernel32\Interop.FILE_DISPOSITION_INFO.cs</Link>
</Compile>
<Compile Include="$(CommonPath)Interop\Windows\Kernel32\Interop.FILE_END_OF_FILE_INFO.cs">
<Link>Common\Interop\Windows\Kernel32\Interop.FILE_END_OF_FILE_INFO.cs</Link>
</Compile>
Expand Down Expand Up @@ -1732,6 +1735,9 @@
<Compile Include="$(CommonPath)\Interop\Windows\Kernel32\Interop.GetCurrentProcessId.cs">
<Link>Common\Interop\Windows\Kernel32\Interop.GetCurrentProcessId.cs</Link>
</Compile>
<Compile Include="$(CommonPath)Interop\Windows\Kernel32\Interop.GetDiskFreeSpace.cs">
<Link>Common\Interop\Windows\Kernel32\Interop.GetDiskFreeSpace.cs</Link>
</Compile>
<Compile Include="$(CommonPath)\Interop\Windows\Kernel32\Interop.GetCurrentThreadId.cs">
<Link>Interop\Windows\Kernel32\Interop.GetCurrentThreadId.cs</Link>
</Compile>
Expand Down Expand Up @@ -1792,6 +1798,12 @@
<Compile Include="$(CommonPath)Interop\Windows\Kernel32\Interop.GetVolumeInformation.cs">
<Link>Common\Interop\Windows\Kernel32\Interop.GetVolumeInformation.cs</Link>
</Compile>
<Compile Include="$(CommonPath)Interop\Windows\Kernel32\Interop.GetVolumeInformationByHandle.cs">
<Link>Common\Interop\Windows\Kernel32\Interop.GetVolumeInformationByHandle.cs</Link>
</Compile>
<Compile Include="$(CommonPath)Interop\Windows\Kernel32\Interop.GetVolumePathName.cs">
<Link>Common\Interop\Windows\Kernel32\Interop.GetVolumePathName.cs</Link>
</Compile>
<Compile Include="$(CommonPath)Interop\Windows\Kernel32\Interop.GlobalMemoryStatusEx.cs">
<Link>Common\Interop\Windows\Kernel32\Interop.GlobalMemoryStatusEx.cs</Link>
</Compile>
Expand Down Expand Up @@ -1894,6 +1906,9 @@
<Compile Include="$(CommonPath)Interop\Windows\Kernel32\Interop.SetCurrentDirectory.cs">
<Link>Common\Interop\Windows\Kernel32\Interop.SetCurrentDirectory.cs</Link>
</Compile>
<Compile Include="$(CommonPath)Interop\Windows\Kernel32\Interop.SetEndOfFile.cs">
<Link>Common\Interop\Windows\Kernel32\Interop.SetEndOfFile.cs</Link>
</Compile>
<Compile Include="$(CommonPath)Interop\Windows\Kernel32\Interop.SetFileAttributes.cs">
<Link>Common\Interop\Windows\Kernel32\Interop.SetFileAttributes.cs</Link>
</Compile>
Expand Down
Loading