Skip to content

Commit

Permalink
Merge branch 'develop' into feature/fewerfiles
Browse files Browse the repository at this point in the history
  • Loading branch information
jas88 authored Oct 16, 2023
2 parents 4d98675 + 76bc8c7 commit befe577
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 217 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

...

### Changed

- Improved file transfer (FTP/SFTP/FTPS) support

## [8.1.0] - 2023-09-19

### Changed
Expand Down
1 change: 1 addition & 0 deletions Documentation/CodeTutorials/Packages.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
| Package | Source Code | License | Purpose | Additional Risk Assessment |
| ------- | ------------| ------- | ------- | -------------------------- |
| Equ | [GitHub](https://github.com/thedmi/Equ) | [MIT](https://opensource.org/licenses/MIT) | Simplifies object equality implementation | |
| FluentFTP | [Github](https://github.com/robinrodricks/FluentFTP/) | [MIT](https://opensource.org/licenses/MIT) | FTP(S) client | |
| MongoDB.Driver | [GitHub](https://github.com/mongodb/mongo-csharp-driver) | [Apache 2.0](https://opensource.org/licenses/Apache-2.0) | Database driver for MongoDB | |
| Microsoft.SourceLink.GitHub | [GitHub](https://github.com/dotnet/sourcelink) | [MIT](https://opensource.org/licenses/MIT) | Enable source linkage from nupkg | Official MS project |
| Microsoft.XmlSerializer.Generator | [Microsoft](https://learn.microsoft.com/en-us/dotnet/core/additional-tools/xml-serializer-generator) | [MIT](https://opensource.org/licenses/MIT) | XML handling improvements |
Expand Down
234 changes: 62 additions & 172 deletions Rdmp.Core/DataLoad/Modules/FTP/FTPDownloader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
// RDMP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
// You should have received a copy of the GNU General Public License along with RDMP. If not, see <https://www.gnu.org/licenses/>.

#nullable enable

using System;
using System.Collections.Generic;
using System.Diagnostics;
Expand All @@ -14,7 +16,9 @@
using System.Security.Cryptography.X509Certificates;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
using FAnsi.Discovery;
using FluentFTP;
using Rdmp.Core.Curation;
using Rdmp.Core.Curation.Data;
using Rdmp.Core.DataFlowPipeline;
Expand All @@ -29,26 +33,26 @@ namespace Rdmp.Core.DataLoad.Modules.FTP;
/// load component which downloads files from a remote FTP server to the ForLoading directory
///
/// <para>Attempts to connect to the FTP server and download all files in the landing folder of the FTP (make sure you really want everything in the
/// root folder - if not then configure redirection on the FTP so you land in the correct directory). Files are downloaded into the ForLoading folder</para>
/// root folder - if not then configure redirection on the FTP, so you land in the correct directory). Files are downloaded into the ForLoading folder</para>
/// </summary>
public class FTPDownloader : IPluginDataProvider
{
protected string _host;
protected string _username;
protected string _password;

private bool _useSSL = false;
private readonly Lazy<FtpClient> _connection;
protected readonly List<string> _filesRetrieved = new();
private ILoadDirectory? _directory;

protected List<string> _filesRetrieved = new();
private ILoadDirectory _directory;
public FTPDownloader()
{
_connection = new Lazy<FtpClient>(SetupFtp, LazyThreadSafetyMode.ExecutionAndPublication);
}

[DemandsInitialization(
"Determines the behaviour of the system when no files are found on the server. If true the entire data load process immediately stops with exit code LoadNotRequired, if false then the load proceeds as normal (useful if for example if you have multiple Attachers and some files are optional)")]
public bool SendLoadNotRequiredIfFileNotFound { get; set; }

[DemandsInitialization(
"The Regex expression to validate files on the FTP server against, only files matching the expression will be downloaded")]
public Regex FilePattern { get; set; }
public Regex? FilePattern { get; set; }

[DemandsInitialization("The timeout to use when connecting to the FTP server in SECONDS")]
public int TimeoutInSeconds { get; set; }
Expand All @@ -60,10 +64,10 @@ public class FTPDownloader : IPluginDataProvider
[DemandsInitialization(
"The FTP server to connect to. Server should be specified with only IP:Port e.g. 127.0.0.1:20. You do not have to specify ftp:// at the start",
Mandatory = true)]
public ExternalDatabaseServer FTPServer { get; set; }
public ExternalDatabaseServer? FTPServer { get; set; }

[DemandsInitialization("The directory on the FTP server that you want to download files from")]
public string RemoteDirectory { get; set; }
public string? RemoteDirectory { get; set; }

[DemandsInitialization("True to set keep alive", DefaultValue = true)]
public bool KeepAlive { get; set; }
Expand All @@ -76,38 +80,25 @@ public void Initialize(ILoadDirectory directory, DiscoveredDatabase dbInfo)

public ExitCodeType Fetch(IDataLoadJob job, GracefulCancellationToken cancellationToken)
{
SetupFTP();
return DownloadFilesOnFTP(_directory, job);
}

public static string GetDescription() => "See Description attribute of class";

public static IDataProvider Clone() => new FTPDownloader();

public bool Validate(ILoadDirectory destination)
{
SetupFTP();
return GetFileList().Any();
return DownloadFilesOnFTP(_directory ?? throw new InvalidOperationException("No output directory set"), job);
}

private void SetupFTP()
private FtpClient SetupFtp()
{
_host = FTPServer.Server;
_username = FTPServer.Username ?? "anonymous";
_password = string.IsNullOrWhiteSpace(FTPServer.Password) ? "guest" : FTPServer.GetDecryptedPassword();

if (string.IsNullOrWhiteSpace(_host))
throw new NullReferenceException(
$"FTPServer is not set up correctly it must have Server property filled in{FTPServer}");
var host = FTPServer?.Server ?? throw new NullReferenceException("FTP server not set");
var username = FTPServer.Username ?? "anonymous";
var password = string.IsNullOrWhiteSpace(FTPServer.Password) ? "guest" : FTPServer.GetDecryptedPassword();
var c = new FtpClient(host, username, password);
c.AutoConnect();
return c;
}

private ExitCodeType DownloadFilesOnFTP(ILoadDirectory destination, IDataLoadEventListener listener)
{
var files = GetFileList();
var files = GetFileList().ToArray();

listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, files.Aggregate(
"Identified the following files on the FTP server:", (s, f) =>
$"{f},").TrimEnd(',')));
listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information,
$"Identified the following files on the FTP server:{string.Join(',',files)}"));

var forLoadingContainedCachedFiles = false;

Expand All @@ -117,28 +108,29 @@ private ExitCodeType DownloadFilesOnFTP(ILoadDirectory destination, IDataLoadEve

listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information,
$"File {file} was evaluated as {action}"));
if (action == SkipReason.DoNotSkip)

switch (action)
{
listener.OnNotify(this,
new NotifyEventArgs(ProgressEventType.Information, $"About to download {file}"));
Download(file, destination, listener);
case SkipReason.DoNotSkip:
listener.OnNotify(this,
new NotifyEventArgs(ProgressEventType.Information, $"About to download {file}"));
Download(file, destination);
break;
case SkipReason.InForLoading:
forLoadingContainedCachedFiles = true;
break;
}

if (action == SkipReason.InForLoading)
forLoadingContainedCachedFiles = true;
}

//if no files were downloaded (and there were none skiped because they were in forLoading) and in that eventuality we have our flag set to return LoadNotRequired then do so
if (!forLoadingContainedCachedFiles && !_filesRetrieved.Any() && SendLoadNotRequiredIfFileNotFound)
{
listener.OnNotify(this,
new NotifyEventArgs(ProgressEventType.Information,
"Could not find any files on the remote server worth downloading, so returning LoadNotRequired"));
return ExitCodeType.OperationNotRequired;
}
// it was a success - even if no files were actually retrieved... hey that's what the user said, otherwise he would have set SendLoadNotRequiredIfFileNotFound
if (forLoadingContainedCachedFiles || _filesRetrieved.Any() || !SendLoadNotRequiredIfFileNotFound)
return ExitCodeType.Success;

//otherwise it was a success - even if no files were actually retrieved... hey that's what the user said, otherwise he would have set SendLoadNotRequiredIfFileNotFound
return ExitCodeType.Success;
// if no files were downloaded (and there were none skipped because they were in forLoading) and in that eventuality we have our flag set to return LoadNotRequired then do so
listener.OnNotify(this,
new NotifyEventArgs(ProgressEventType.Information,
"Could not find any files on the remote server worth downloading, so returning LoadNotRequired"));
return ExitCodeType.OperationNotRequired;
}

protected enum SkipReason
Expand All @@ -151,153 +143,51 @@ protected enum SkipReason

protected SkipReason GetSkipActionForFile(string file, ILoadDirectory destination)
{
if (file.StartsWith("."))
if (file.StartsWith(".",StringComparison.Ordinal))
return SkipReason.IsImaginaryFile;

//if there is a regex pattern
if (FilePattern != null)
if (!FilePattern.IsMatch(file)) //and it does not match
return SkipReason.DidNotMatchPattern; //skip because it did not match pattern
if (FilePattern?.IsMatch(file) == false) //and it does not match
return SkipReason.DidNotMatchPattern; //skip because it did not match pattern

//if the file on the FTP already exists in the forLoading directory, skip it
return destination.ForLoading.GetFiles(file).Any() ? SkipReason.InForLoading : SkipReason.DoNotSkip;
}


private bool ValidateServerCertificate(object sender, X509Certificate certificate, X509Chain chain,
SslPolicyErrors sslpolicyerrors) => true; //any cert will do! yay
private static bool ValidateServerCertificate(object _1, X509Certificate _2, X509Chain _3,
SslPolicyErrors _4) => true; //any cert will do! yay


protected virtual string[] GetFileList()
protected virtual IEnumerable<string> GetFileList()
{
var result = new StringBuilder();
WebResponse response = null;
StreamReader reader = null;
try
{
var uri = !string.IsNullOrWhiteSpace(RemoteDirectory)
? $"ftp://{_host}/{RemoteDirectory}"
: $"ftp://{_host}";

#pragma warning disable SYSLIB0014 // Type or member is obsolete
var reqFTP = (FtpWebRequest)WebRequest.Create(new Uri(uri));
#pragma warning restore SYSLIB0014 // Type or member is obsolete
reqFTP.UseBinary = true;
reqFTP.Credentials = new NetworkCredential(_username, _password);
reqFTP.Method = WebRequestMethods.Ftp.ListDirectory;
reqFTP.Timeout = TimeoutInSeconds * 1000;
reqFTP.KeepAlive = KeepAlive;

reqFTP.Proxy = null;
reqFTP.KeepAlive = false;
reqFTP.UsePassive = true;
reqFTP.EnableSsl = _useSSL;

//accept any certificates
ServicePointManager.ServerCertificateValidationCallback = ValidateServerCertificate;
response = reqFTP.GetResponse();

reader = new StreamReader(response.GetResponseStream());
var line = reader.ReadLine();
while (line != null)
{
result.Append(line);
result.Append('\n');
line = reader.ReadLine();
}

// to remove the trailing '\n'
result.Remove(result.ToString().LastIndexOf('\n'), 1);
return result.ToString().Split('\n');
}
finally
{
reader?.Close();

response?.Close();
}
return _connection.Value.GetNameListing().ToList().Where(_connection.Value.FileExists);
}

protected virtual void Download(string file, ILoadDirectory destination, IDataLoadEventListener job)
protected virtual void Download(string file, ILoadDirectory destination)
{
var s = new Stopwatch();
s.Start();

var uri = !string.IsNullOrWhiteSpace(RemoteDirectory)
? $"ftp://{_host}/{RemoteDirectory}/{file}"
: $"ftp://{_host}/{file}";

if (_useSSL)
uri = $"s{uri}";

var serverUri = new Uri(uri);
if (serverUri.Scheme != Uri.UriSchemeFtp) return;

#pragma warning disable SYSLIB0014 // Type or member is obsolete
var reqFTP = (FtpWebRequest)WebRequest.Create(new Uri(uri));
#pragma warning restore SYSLIB0014 // Type or member is obsolete
reqFTP.Credentials = new NetworkCredential(_username, _password);
reqFTP.KeepAlive = false;
reqFTP.Method = WebRequestMethods.Ftp.DownloadFile;
reqFTP.UseBinary = true;
reqFTP.Proxy = null;
reqFTP.UsePassive = true;
reqFTP.EnableSsl = _useSSL;
reqFTP.Timeout = TimeoutInSeconds * 1000;

var response = (FtpWebResponse)reqFTP.GetResponse();
var responseStream = response.GetResponseStream();
var destinationFileName = Path.Combine(destination.ForLoading.FullName, file);
var remotePath = !string.IsNullOrWhiteSpace(RemoteDirectory)
? $"{RemoteDirectory}/{file}"
: file;

using (var writeStream = new FileStream(destinationFileName, FileMode.Create))
{
responseStream.CopyTo(writeStream);
writeStream.Close();
}

response.Close();

_filesRetrieved.Add(serverUri.ToString());
s.Stop();
var destinationFileName = Path.Combine(destination.ForLoading.FullName, file);
_connection.Value.DownloadFile(destinationFileName, remotePath);
_filesRetrieved.Add(remotePath);
}

public virtual void LoadCompletedSoDispose(ExitCodeType exitCode, IDataLoadEventListener postLoadEventListener)
{
if (exitCode == ExitCodeType.Success && DeleteFilesOffFTPServerAfterSuccesfulDataLoad)
foreach (var file in _filesRetrieved)
{
#pragma warning disable SYSLIB0014
// Type or member is obsolete
var reqFTP = (FtpWebRequest)WebRequest.Create(new Uri(file));
#pragma warning restore SYSLIB0014
// Type or member is obsolete
reqFTP.Credentials = new NetworkCredential(_username, _password);
reqFTP.KeepAlive = false;
reqFTP.Method = WebRequestMethods.Ftp.DeleteFile;
reqFTP.UseBinary = true;
reqFTP.Proxy = null;
reqFTP.UsePassive = true;
reqFTP.EnableSsl = _useSSL;

var response = (FtpWebResponse)reqFTP.GetResponse();

if (response.StatusCode != FtpStatusCode.FileActionOK)
postLoadEventListener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning,
$"Attempt to delete file at URI {file} resulted in response with StatusCode = {response.StatusCode}"));
else
postLoadEventListener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information,
$"Deleted FTP file at URI {file} status code was {response.StatusCode}"));

response.Close();
}
if (exitCode != ExitCodeType.Success || !DeleteFilesOffFTPServerAfterSuccesfulDataLoad) return;

foreach (var file in _filesRetrieved) _connection.Value.DeleteFile(file);
}


public void Check(ICheckNotifier notifier)
{
try
{
SetupFTP();
SetupFtp();
}
catch (Exception e)
{
Expand Down
Loading

0 comments on commit befe577

Please sign in to comment.