From 76bc8c71bd5791640ee415dc73db389f769ca327 Mon Sep 17 00:00:00 2001 From: James A Sutherland Date: Mon, 16 Oct 2023 09:23:37 -0500 Subject: [PATCH] New FTP/FTPS support, improve SFTP (#1649) Co-authored-by: James Friel --- CHANGELOG.md | 4 + Documentation/CodeTutorials/Packages.md | 1 + .../DataLoad/Modules/FTP/FTPDownloader.cs | 234 +++++------------- .../DataLoad/Modules/FTP/SFTPDownloader.cs | 73 +++--- Rdmp.Core/Rdmp.Core.csproj | 3 +- 5 files changed, 98 insertions(+), 217 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7100a239d3..83f916faa6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ... +### Changed + +- Improved file transfer (FTP/SFTP/FTPS) support + ## [8.1.0] - 2023-09-19 ### Changed diff --git a/Documentation/CodeTutorials/Packages.md b/Documentation/CodeTutorials/Packages.md index 44d50761dc..d324a70fb6 100644 --- a/Documentation/CodeTutorials/Packages.md +++ b/Documentation/CodeTutorials/Packages.md @@ -10,6 +10,7 @@ | Package | Source Code | License | Purpose | Additional Risk Assessment | | ------- | ------------| ------- | ------- | -------------------------- | | Equ | [GitHub](https://github.com/thedmi/Equ) | [MIT](https://opensource.org/licenses/MIT) | Simplifies object equality implementation | | +| FluentFTP | [Github](https://github.com/robinrodricks/FluentFTP/) | [MIT](https://opensource.org/licenses/MIT) | FTP(S) client | | | MongoDB.Driver | [GitHub](https://github.com/mongodb/mongo-csharp-driver) | [Apache 2.0](https://opensource.org/licenses/Apache-2.0) | Database driver for MongoDB | | | Microsoft.SourceLink.GitHub | [GitHub](https://github.com/dotnet/sourcelink) | [MIT](https://opensource.org/licenses/MIT) | Enable source linkage from nupkg | Official MS project | | Microsoft.XmlSerializer.Generator | [Microsoft](https://learn.microsoft.com/en-us/dotnet/core/additional-tools/xml-serializer-generator) | [MIT](https://opensource.org/licenses/MIT) | XML handling improvements | diff --git a/Rdmp.Core/DataLoad/Modules/FTP/FTPDownloader.cs b/Rdmp.Core/DataLoad/Modules/FTP/FTPDownloader.cs index 06a8fb5a30..49ef7489ca 100644 --- a/Rdmp.Core/DataLoad/Modules/FTP/FTPDownloader.cs +++ b/Rdmp.Core/DataLoad/Modules/FTP/FTPDownloader.cs @@ -4,6 +4,8 @@ // RDMP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. // You should have received a copy of the GNU General Public License along with RDMP. If not, see . +#nullable enable + using System; using System.Collections.Generic; using System.Diagnostics; @@ -14,7 +16,9 @@ using System.Security.Cryptography.X509Certificates; using System.Text; using System.Text.RegularExpressions; +using System.Threading; using FAnsi.Discovery; +using FluentFTP; using Rdmp.Core.Curation; using Rdmp.Core.Curation.Data; using Rdmp.Core.DataFlowPipeline; @@ -29,18 +33,18 @@ namespace Rdmp.Core.DataLoad.Modules.FTP; /// load component which downloads files from a remote FTP server to the ForLoading directory /// /// Attempts to connect to the FTP server and download all files in the landing folder of the FTP (make sure you really want everything in the -/// root folder - if not then configure redirection on the FTP so you land in the correct directory). Files are downloaded into the ForLoading folder +/// root folder - if not then configure redirection on the FTP, so you land in the correct directory). Files are downloaded into the ForLoading folder /// public class FTPDownloader : IPluginDataProvider { - protected string _host; - protected string _username; - protected string _password; - - private bool _useSSL = false; + private readonly Lazy _connection; + protected readonly List _filesRetrieved = new(); + private ILoadDirectory? _directory; - protected List _filesRetrieved = new(); - private ILoadDirectory _directory; + public FTPDownloader() + { + _connection = new Lazy(SetupFtp, LazyThreadSafetyMode.ExecutionAndPublication); + } [DemandsInitialization( "Determines the behaviour of the system when no files are found on the server. If true the entire data load process immediately stops with exit code LoadNotRequired, if false then the load proceeds as normal (useful if for example if you have multiple Attachers and some files are optional)")] @@ -48,7 +52,7 @@ public class FTPDownloader : IPluginDataProvider [DemandsInitialization( "The Regex expression to validate files on the FTP server against, only files matching the expression will be downloaded")] - public Regex FilePattern { get; set; } + public Regex? FilePattern { get; set; } [DemandsInitialization("The timeout to use when connecting to the FTP server in SECONDS")] public int TimeoutInSeconds { get; set; } @@ -60,10 +64,10 @@ public class FTPDownloader : IPluginDataProvider [DemandsInitialization( "The FTP server to connect to. Server should be specified with only IP:Port e.g. 127.0.0.1:20. You do not have to specify ftp:// at the start", Mandatory = true)] - public ExternalDatabaseServer FTPServer { get; set; } + public ExternalDatabaseServer? FTPServer { get; set; } [DemandsInitialization("The directory on the FTP server that you want to download files from")] - public string RemoteDirectory { get; set; } + public string? RemoteDirectory { get; set; } [DemandsInitialization("True to set keep alive", DefaultValue = true)] public bool KeepAlive { get; set; } @@ -76,38 +80,25 @@ public void Initialize(ILoadDirectory directory, DiscoveredDatabase dbInfo) public ExitCodeType Fetch(IDataLoadJob job, GracefulCancellationToken cancellationToken) { - SetupFTP(); - return DownloadFilesOnFTP(_directory, job); - } - - public static string GetDescription() => "See Description attribute of class"; - - public static IDataProvider Clone() => new FTPDownloader(); - - public bool Validate(ILoadDirectory destination) - { - SetupFTP(); - return GetFileList().Any(); + return DownloadFilesOnFTP(_directory ?? throw new InvalidOperationException("No output directory set"), job); } - private void SetupFTP() + private FtpClient SetupFtp() { - _host = FTPServer.Server; - _username = FTPServer.Username ?? "anonymous"; - _password = string.IsNullOrWhiteSpace(FTPServer.Password) ? "guest" : FTPServer.GetDecryptedPassword(); - - if (string.IsNullOrWhiteSpace(_host)) - throw new NullReferenceException( - $"FTPServer is not set up correctly it must have Server property filled in{FTPServer}"); + var host = FTPServer?.Server ?? throw new NullReferenceException("FTP server not set"); + var username = FTPServer.Username ?? "anonymous"; + var password = string.IsNullOrWhiteSpace(FTPServer.Password) ? "guest" : FTPServer.GetDecryptedPassword(); + var c = new FtpClient(host, username, password); + c.AutoConnect(); + return c; } private ExitCodeType DownloadFilesOnFTP(ILoadDirectory destination, IDataLoadEventListener listener) { - var files = GetFileList(); + var files = GetFileList().ToArray(); - listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, files.Aggregate( - "Identified the following files on the FTP server:", (s, f) => - $"{f},").TrimEnd(','))); + listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, + $"Identified the following files on the FTP server:{string.Join(',',files)}")); var forLoadingContainedCachedFiles = false; @@ -117,28 +108,29 @@ private ExitCodeType DownloadFilesOnFTP(ILoadDirectory destination, IDataLoadEve listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, $"File {file} was evaluated as {action}")); - if (action == SkipReason.DoNotSkip) + + switch (action) { - listener.OnNotify(this, - new NotifyEventArgs(ProgressEventType.Information, $"About to download {file}")); - Download(file, destination, listener); + case SkipReason.DoNotSkip: + listener.OnNotify(this, + new NotifyEventArgs(ProgressEventType.Information, $"About to download {file}")); + Download(file, destination); + break; + case SkipReason.InForLoading: + forLoadingContainedCachedFiles = true; + break; } - - if (action == SkipReason.InForLoading) - forLoadingContainedCachedFiles = true; } - //if no files were downloaded (and there were none skiped because they were in forLoading) and in that eventuality we have our flag set to return LoadNotRequired then do so - if (!forLoadingContainedCachedFiles && !_filesRetrieved.Any() && SendLoadNotRequiredIfFileNotFound) - { - listener.OnNotify(this, - new NotifyEventArgs(ProgressEventType.Information, - "Could not find any files on the remote server worth downloading, so returning LoadNotRequired")); - return ExitCodeType.OperationNotRequired; - } + // it was a success - even if no files were actually retrieved... hey that's what the user said, otherwise he would have set SendLoadNotRequiredIfFileNotFound + if (forLoadingContainedCachedFiles || _filesRetrieved.Any() || !SendLoadNotRequiredIfFileNotFound) + return ExitCodeType.Success; - //otherwise it was a success - even if no files were actually retrieved... hey that's what the user said, otherwise he would have set SendLoadNotRequiredIfFileNotFound - return ExitCodeType.Success; + // if no files were downloaded (and there were none skipped because they were in forLoading) and in that eventuality we have our flag set to return LoadNotRequired then do so + listener.OnNotify(this, + new NotifyEventArgs(ProgressEventType.Information, + "Could not find any files on the remote server worth downloading, so returning LoadNotRequired")); + return ExitCodeType.OperationNotRequired; } protected enum SkipReason @@ -151,145 +143,43 @@ protected enum SkipReason protected SkipReason GetSkipActionForFile(string file, ILoadDirectory destination) { - if (file.StartsWith(".")) + if (file.StartsWith(".",StringComparison.Ordinal)) return SkipReason.IsImaginaryFile; //if there is a regex pattern - if (FilePattern != null) - if (!FilePattern.IsMatch(file)) //and it does not match - return SkipReason.DidNotMatchPattern; //skip because it did not match pattern + if (FilePattern?.IsMatch(file) == false) //and it does not match + return SkipReason.DidNotMatchPattern; //skip because it did not match pattern //if the file on the FTP already exists in the forLoading directory, skip it return destination.ForLoading.GetFiles(file).Any() ? SkipReason.InForLoading : SkipReason.DoNotSkip; } - private bool ValidateServerCertificate(object sender, X509Certificate certificate, X509Chain chain, - SslPolicyErrors sslpolicyerrors) => true; //any cert will do! yay + private static bool ValidateServerCertificate(object _1, X509Certificate _2, X509Chain _3, + SslPolicyErrors _4) => true; //any cert will do! yay - protected virtual string[] GetFileList() + protected virtual IEnumerable GetFileList() { - var result = new StringBuilder(); - WebResponse response = null; - StreamReader reader = null; - try - { - var uri = !string.IsNullOrWhiteSpace(RemoteDirectory) - ? $"ftp://{_host}/{RemoteDirectory}" - : $"ftp://{_host}"; - -#pragma warning disable SYSLIB0014 // Type or member is obsolete - var reqFTP = (FtpWebRequest)WebRequest.Create(new Uri(uri)); -#pragma warning restore SYSLIB0014 // Type or member is obsolete - reqFTP.UseBinary = true; - reqFTP.Credentials = new NetworkCredential(_username, _password); - reqFTP.Method = WebRequestMethods.Ftp.ListDirectory; - reqFTP.Timeout = TimeoutInSeconds * 1000; - reqFTP.KeepAlive = KeepAlive; - - reqFTP.Proxy = null; - reqFTP.KeepAlive = false; - reqFTP.UsePassive = true; - reqFTP.EnableSsl = _useSSL; - - //accept any certificates - ServicePointManager.ServerCertificateValidationCallback = ValidateServerCertificate; - response = reqFTP.GetResponse(); - - reader = new StreamReader(response.GetResponseStream()); - var line = reader.ReadLine(); - while (line != null) - { - result.Append(line); - result.Append('\n'); - line = reader.ReadLine(); - } - - // to remove the trailing '\n' - result.Remove(result.ToString().LastIndexOf('\n'), 1); - return result.ToString().Split('\n'); - } - finally - { - reader?.Close(); - - response?.Close(); - } + return _connection.Value.GetNameListing().ToList().Where(_connection.Value.FileExists); } - protected virtual void Download(string file, ILoadDirectory destination, IDataLoadEventListener job) + protected virtual void Download(string file, ILoadDirectory destination) { - var s = new Stopwatch(); - s.Start(); - - var uri = !string.IsNullOrWhiteSpace(RemoteDirectory) - ? $"ftp://{_host}/{RemoteDirectory}/{file}" - : $"ftp://{_host}/{file}"; - - if (_useSSL) - uri = $"s{uri}"; - - var serverUri = new Uri(uri); - if (serverUri.Scheme != Uri.UriSchemeFtp) return; - -#pragma warning disable SYSLIB0014 // Type or member is obsolete - var reqFTP = (FtpWebRequest)WebRequest.Create(new Uri(uri)); -#pragma warning restore SYSLIB0014 // Type or member is obsolete - reqFTP.Credentials = new NetworkCredential(_username, _password); - reqFTP.KeepAlive = false; - reqFTP.Method = WebRequestMethods.Ftp.DownloadFile; - reqFTP.UseBinary = true; - reqFTP.Proxy = null; - reqFTP.UsePassive = true; - reqFTP.EnableSsl = _useSSL; - reqFTP.Timeout = TimeoutInSeconds * 1000; - - var response = (FtpWebResponse)reqFTP.GetResponse(); - var responseStream = response.GetResponseStream(); - var destinationFileName = Path.Combine(destination.ForLoading.FullName, file); + var remotePath = !string.IsNullOrWhiteSpace(RemoteDirectory) + ? $"{RemoteDirectory}/{file}" + : file; - using (var writeStream = new FileStream(destinationFileName, FileMode.Create)) - { - responseStream.CopyTo(writeStream); - writeStream.Close(); - } - - response.Close(); - - _filesRetrieved.Add(serverUri.ToString()); - s.Stop(); + var destinationFileName = Path.Combine(destination.ForLoading.FullName, file); + _connection.Value.DownloadFile(destinationFileName, remotePath); + _filesRetrieved.Add(remotePath); } public virtual void LoadCompletedSoDispose(ExitCodeType exitCode, IDataLoadEventListener postLoadEventListener) { - if (exitCode == ExitCodeType.Success && DeleteFilesOffFTPServerAfterSuccesfulDataLoad) - foreach (var file in _filesRetrieved) - { -#pragma warning disable SYSLIB0014 - // Type or member is obsolete - var reqFTP = (FtpWebRequest)WebRequest.Create(new Uri(file)); -#pragma warning restore SYSLIB0014 - // Type or member is obsolete - reqFTP.Credentials = new NetworkCredential(_username, _password); - reqFTP.KeepAlive = false; - reqFTP.Method = WebRequestMethods.Ftp.DeleteFile; - reqFTP.UseBinary = true; - reqFTP.Proxy = null; - reqFTP.UsePassive = true; - reqFTP.EnableSsl = _useSSL; - - var response = (FtpWebResponse)reqFTP.GetResponse(); - - if (response.StatusCode != FtpStatusCode.FileActionOK) - postLoadEventListener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, - $"Attempt to delete file at URI {file} resulted in response with StatusCode = {response.StatusCode}")); - else - postLoadEventListener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, - $"Deleted FTP file at URI {file} status code was {response.StatusCode}")); - - response.Close(); - } + if (exitCode != ExitCodeType.Success || !DeleteFilesOffFTPServerAfterSuccesfulDataLoad) return; + + foreach (var file in _filesRetrieved) _connection.Value.DeleteFile(file); } @@ -297,7 +187,7 @@ public void Check(ICheckNotifier notifier) { try { - SetupFTP(); + SetupFtp(); } catch (Exception e) { diff --git a/Rdmp.Core/DataLoad/Modules/FTP/SFTPDownloader.cs b/Rdmp.Core/DataLoad/Modules/FTP/SFTPDownloader.cs index 3b507bb673..c08736805e 100644 --- a/Rdmp.Core/DataLoad/Modules/FTP/SFTPDownloader.cs +++ b/Rdmp.Core/DataLoad/Modules/FTP/SFTPDownloader.cs @@ -8,6 +8,7 @@ using System.Diagnostics; using System.IO; using System.Linq; +using System.Threading; using Rdmp.Core.Curation; using Rdmp.Core.Curation.Data; using Rdmp.Core.ReusableLibraryCode.Progress; @@ -18,7 +19,7 @@ namespace Rdmp.Core.DataLoad.Modules.FTP; /// /// load component which downloads files from a remote SFTP (Secure File Transfer Protocol) server to the ForLoading directory /// -/// Operates in the same way as except that it uses SSH. In addition this +/// Operates in the same way as except that it uses SSH. In addition, this /// class will not bother downloading any files that already exist in the forLoading directory (have the same name - file size is NOT checked) /// public class SFTPDownloader : FTPDownloader @@ -26,57 +27,48 @@ public class SFTPDownloader : FTPDownloader [DemandsInitialization("The keep-alive interval. In milliseconds. Requires KeepAlive to be set to take effect.")] public int KeepAliveIntervalMilliseconds { get; set; } - protected override void Download(string file, ILoadDirectory destination, IDataLoadEventListener job) - { - if (file.Contains('/') || file.Contains('\\')) - throw new Exception("Was not expecting a relative path here"); - - var s = new Stopwatch(); - s.Start(); + private readonly Lazy _connection; - using (var sftp = new SftpClient(_host, _username, _password)) - { - if (KeepAlive && KeepAliveIntervalMilliseconds > 0) - sftp.KeepAliveInterval = TimeSpan.FromMilliseconds(KeepAliveIntervalMilliseconds); - - sftp.ConnectionInfo.Timeout = new TimeSpan(0, 0, 0, TimeoutInSeconds); - sftp.Connect(); + public SFTPDownloader(Lazy connection) + { + _connection = new Lazy(SetupSftp,LazyThreadSafetyMode.ExecutionAndPublication); + } - //if there is a specified remote directory then reference it otherwise reference it locally (or however we were told about it from GetFileList()) - var fullFilePath = !string.IsNullOrWhiteSpace(RemoteDirectory) ? Path.Combine(RemoteDirectory, file) : file; + private SftpClient SetupSftp() + { + var host = FTPServer?.Server ?? throw new NullReferenceException("FTP server not set"); + var username = FTPServer.Username ?? "anonymous"; + var password = string.IsNullOrWhiteSpace(FTPServer.Password) ? "guest" : FTPServer.GetDecryptedPassword(); + var c = new SftpClient(host, username, password); + c.Connect(); + return c; + } - var destinationFilePath = Path.Combine(destination.ForLoading.FullName, file); - //register for events - void Callback(ulong totalBytes) => job.OnProgress(this, - new ProgressEventArgs(destinationFilePath, - new ProgressMeasurement((int)(totalBytes * 0.001), ProgressType.Kilobytes), s.Elapsed)); + protected override void Download(string file, ILoadDirectory destination) + { + if (file.Contains('/') || file.Contains('\\')) + throw new Exception("Was not expecting a relative path here"); - using (var fs = new FileStream(destinationFilePath, FileMode.CreateNew)) - { - //download - sftp.DownloadFile(fullFilePath, fs, Callback); - fs.Close(); - } + //if there is a specified remote directory then reference it otherwise reference it locally (or however we were told about it from GetFileList()) + var fullFilePath = !string.IsNullOrWhiteSpace(RemoteDirectory) ? Path.Combine(RemoteDirectory, file) : file; - _filesRetrieved.Add(fullFilePath); - } + var destinationFilePath = Path.Combine(destination.ForLoading.FullName, file); - s.Stop(); + using (var dest=File.Create(destinationFilePath)) + _connection.Value.DownloadFile(fullFilePath,dest); + _filesRetrieved.Add(fullFilePath); } public override void LoadCompletedSoDispose(ExitCodeType exitCode, IDataLoadEventListener postLoadEventListener) { if (exitCode != ExitCodeType.Success) return; - using var sftp = new SftpClient(_host, _username, _password); - sftp.ConnectionInfo.Timeout = new TimeSpan(0, 0, 0, TimeoutInSeconds); - sftp.Connect(); foreach (var retrievedFiles in _filesRetrieved) try { - sftp.DeleteFile(retrievedFiles); + _connection.Value.DeleteFile(retrievedFiles); postLoadEventListener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, $"Deleted SFTP file {retrievedFiles} from SFTP server")); } @@ -90,15 +82,8 @@ public override void LoadCompletedSoDispose(ExitCodeType exitCode, IDataLoadEven protected override string[] GetFileList() { - using var sftp = new SftpClient(_host, _username, _password); - sftp.ConnectionInfo.Timeout = new TimeSpan(0, 0, 0, TimeoutInSeconds); - sftp.Connect(); - - var directory = RemoteDirectory; - - if (string.IsNullOrWhiteSpace(directory)) - directory = "."; + var directory = string.IsNullOrWhiteSpace(RemoteDirectory) ? "." : RemoteDirectory; - return sftp.ListDirectory(directory).Select(d => d.Name).ToArray(); + return _connection.Value.ListDirectory(directory).Select(static d => d.Name).ToArray(); } } \ No newline at end of file diff --git a/Rdmp.Core/Rdmp.Core.csproj b/Rdmp.Core/Rdmp.Core.csproj index 8f1d467ea4..3cf410c2fd 100644 --- a/Rdmp.Core/Rdmp.Core.csproj +++ b/Rdmp.Core/Rdmp.Core.csproj @@ -295,6 +295,7 @@ + @@ -305,8 +306,8 @@ + -