Skip to content

Commit

Permalink
Feature/downloader manifest 1014 (#655)
Browse files Browse the repository at this point in the history
* adding appsettings to could

* using config file for clouds

* using the s3 bucket as default for lambdas

* using default s3 base url from config file

* using Cloud in Downloader

* fixing downloader manifest path

* adding index for gsa files

* adding check for index file

* fixing texts

* ensuring consistency
  • Loading branch information
rajatshuvro authored and GitHub Enterprise committed Jun 13, 2022
1 parent b8a5fcb commit a0e6e36
Show file tree
Hide file tree
Showing 15 changed files with 158 additions and 51 deletions.
8 changes: 8 additions & 0 deletions Cloud/Cloud.appsettings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"DataSource": {
"BaseUrl": "http://nirvana-annotations.s3.us-west-2.amazonaws.com/",
"CacheDirectory": "ab0cf104f39708eabd07b8cb67e149ba-Cache",
"ReferencesDirectory": "d95867deadfe690e40f42068d6b59df8-References",
"ManifestDirectory": "a9f54ea6ac0548696c97a3ee64bc39ec2e71b84b-SaManifest"
}
}
7 changes: 7 additions & 0 deletions Cloud/Cloud.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,19 @@
<PackageReference Include="Amazon.Lambda.Serialization.Json" Version="2.0.0" />
<PackageReference Include="AWSSDK.S3" Version="3.7.8.3" />
<PackageReference Include="AWSSDK.SimpleNotificationService" Version="3.7.3.31" />
<PackageReference Include="Microsoft.Extensions.Configuration" Version="6.0.0" />
<PackageReference Include="Microsoft.Extensions.Configuration.Json" Version="6.0.0" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.1" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Genome\Genome.csproj" />
<ProjectReference Include="..\IO\IO.csproj" />
<ProjectReference Include="..\ReferenceSequence\ReferenceSequence.csproj" />
</ItemGroup>
<ItemGroup>
<None Update="Cloud.appsettings.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>
<Import Project="..\CommonAssemblyInfo.props" />
</Project>
27 changes: 27 additions & 0 deletions Cloud/Configuration.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
using Cloud.Utilities;

namespace Cloud;
using Microsoft.Extensions.Configuration;

public sealed class Configuration
{
public readonly IConfigurationRoot Config;
public readonly IConfigurationSection DataSources;
public string CacheDirectory => DataSources["CacheDirectory"];
public string ReferencesDirectory => DataSources["ReferencesDirectory"];
public string ManifestDirectory => DataSources["ManifestDirectory"];
public string NirvanaBaseUrl => DataSources["BaseUrl"];
public Configuration()
{
const string appSettingsFilename = "Cloud.appsettings.json";

Config = new ConfigurationBuilder()
.AddJsonFile(appSettingsFilename)
.Build();

DataSources = Config.GetSection("DataSource");

}


}
36 changes: 22 additions & 14 deletions Cloud/LambdaUrlHelper.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using Cloud.Utilities;
using System;
using Cloud.Utilities;
using Genome;
using IO;
using ReferenceSequence;
Expand All @@ -7,33 +8,40 @@ namespace Cloud
{
public static class LambdaUrlHelper
{
public const string UrlBaseEnvironmentVariableName = "NirvanaDataUrlBase";
public const ushort SaSchemaVersion = 22;
public const string UrlBaseEnvironmentVariableName = "NirvanaDataUrlBase";
private static readonly Configuration Config = new ();

public const string S3CacheFolderBase = "ab0cf104f39708eabd07b8cb67e149ba-Cache";
public const string S3ManifestFolderBase = "a9f54ea6ac0548696c97a3ee64bc39ec2e71b84b-SaManifest";
public static string S3CacheFolderBase = Config.CacheDirectory;
// public const string S3ManifestFolderBase = "a9f54ea6ac0548696c97a3ee64bc39ec2e71b84b-SaManifest";
public static readonly string S3CacheFolder =
$"{S3CacheFolderBase}/{CacheConstants.DataVersion}/";
$"{Config.CacheDirectory}/{CacheConstants.DataVersion}/";

private static readonly string S3RefPrefix =
$"d95867deadfe690e40f42068d6b59df8-References/{ReferenceSequenceCommon.HeaderVersion}/Homo_sapiens.";
$"{Config.ReferencesDirectory}/{ReferenceSequenceCommon.HeaderVersion}/Homo_sapiens.";


private const string UgaFileName = "UGA.tsv.gz";
public const string DefaultCacheSource = "Both";
public const string RefSuffix = ".Nirvana.dat";
public const string JsonSuffix = ".json.gz";
public const string JsonIndexSuffix = ".jsi";
public const string SuccessMessage = "Success";

public static string GetBaseUrl(string baseUrl = null) =>
baseUrl ?? LambdaUtilities.GetEnvironmentVariable(UrlBaseEnvironmentVariableName);
public static string GetBaseUrl()
{
var envBaseUrl = Environment.GetEnvironmentVariable(UrlBaseEnvironmentVariableName);

return string.IsNullOrEmpty(envBaseUrl) ? Config.NirvanaBaseUrl: envBaseUrl;
}

public static string GetManifestBaseUrl(string baseUrl = null) => GetBaseUrl(baseUrl) + S3ManifestFolderBase;
public static string GetManifestBaseUrl() => GetBaseUrl() + Config.ManifestDirectory;

public static string GetCacheFolder(string baseUrl = null) => GetBaseUrl(baseUrl) + S3CacheFolder;
public static string GetUgaUrl(string baseUrl = null) => GetCacheFolder(baseUrl) + UgaFileName;
public static string GetRefPrefix(string baseUrl = null) => GetBaseUrl(baseUrl) + S3RefPrefix;
public static string GetCacheFolder() => GetBaseUrl() + S3CacheFolder;
public static string GetUgaUrl() => GetCacheFolder() + UgaFileName;
public static string GetRefPrefix() => GetBaseUrl() + S3RefPrefix;

public static string GetRefUrl(GenomeAssembly genomeAssembly, string baseUrl = null) =>
GetRefPrefix(baseUrl) + genomeAssembly + RefSuffix;
public static string GetRefUrl(GenomeAssembly genomeAssembly) =>
GetRefPrefix() + genomeAssembly + RefSuffix;
}
}
8 changes: 4 additions & 4 deletions Cloud/Utilities/LambdaUtilities.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ public static void DeleteTempOutput()
foreach (string tempFile in files) File.Delete(tempFile);
}

public static string GetManifestUrl(string version, GenomeAssembly genomeAssembly, int saSchemaVersion, string baseUrl = null)
public static string GetManifestUrl(string version, GenomeAssembly genomeAssembly, int saSchemaVersion = LambdaUrlHelper.SaSchemaVersion)
{
if (string.IsNullOrEmpty(version)) version = "latest";
string s3BaseUrl = LambdaUrlHelper.GetManifestBaseUrl(baseUrl) +$"/{saSchemaVersion}/";
string s3BaseUrl = LambdaUrlHelper.GetManifestBaseUrl() +$"/{saSchemaVersion}/";
switch (version)
{
case "latest":
Expand All @@ -47,9 +47,9 @@ public static string GetManifestUrl(string version, GenomeAssembly genomeAssembl
}
}

public static string GetCachePathPrefix(GenomeAssembly genomeAssembly, string baseUrl=null)
public static string GetCachePathPrefix(GenomeAssembly genomeAssembly)
{
return LambdaUrlHelper.GetCacheFolder(baseUrl).UrlCombine(genomeAssembly.ToString())
return LambdaUrlHelper.GetCacheFolder().UrlCombine(genomeAssembly.ToString())
.UrlCombine(LambdaUrlHelper.DefaultCacheSource);
}
}
Expand Down
29 changes: 18 additions & 11 deletions Downloader/Configuration.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
using System.IO;
using System;
using System.IO;
using Cloud;
using Cloud.Utilities;
using Genome;
using Microsoft.Extensions.Configuration;

namespace Downloader
Expand All @@ -21,33 +25,36 @@ public static (string HostName, string CacheDir, string ReferencesDir, string Ma
hostName = dataSource["HostName"];
if (string.IsNullOrEmpty(hostName))
throw new InvalidDataException($"Could not find the HostName entry in the {appSettingsFilename} file.");
// this env variable will over-ride the configuration in cloud
Environment.SetEnvironmentVariable(LambdaUrlHelper.UrlBaseEnvironmentVariableName, $"http://{hostName}/");
}

string cacheDir = dataSource["CacheDirectory"];
var cloudConfiguration = new Cloud.Configuration();
string cacheDir = cloudConfiguration.CacheDirectory;
if (string.IsNullOrEmpty(cacheDir))
throw new InvalidDataException($"Could not find the CacheDirectory entry in the {appSettingsFilename} file.");
throw new InvalidDataException($"Could not find the CacheDirectory entry in the Cloud.appsettings.json file.");

string referencesDir = dataSource["ReferencesDirectory"];
string referencesDir = cloudConfiguration.ReferencesDirectory;
if (string.IsNullOrEmpty(referencesDir))
throw new InvalidDataException($"Could not find the ReferencesDirectory entry in the {appSettingsFilename} file.");
throw new InvalidDataException($"Could not find the ReferencesDirectory entry in the Cloud.appsettings.json file.");

string manifestGRCh37;
string manifestGRCh38;
string manifestGRCh37 ;
string manifestGRCh38 ;

if (string.IsNullOrEmpty(manifestPrefix))
{
manifestGRCh37 = dataSource["ManifestGRCh37"];
manifestGRCh37 = LambdaUtilities.GetManifestUrl(dataSource["ManifestGRCh37"], GenomeAssembly.GRCh37);
if (string.IsNullOrEmpty(manifestGRCh37))
throw new InvalidDataException($"Could not find the ManifestGRCh37 entry in the {appSettingsFilename} file.");

manifestGRCh38 = dataSource["ManifestGRCh38"];
manifestGRCh38 = LambdaUtilities.GetManifestUrl(dataSource["ManifestGRCh38"], GenomeAssembly.GRCh38);
if (string.IsNullOrEmpty(manifestGRCh38))
throw new InvalidDataException($"Could not find the ManifestGRCh38 entry in the {appSettingsFilename} file.");
}
else
{
manifestGRCh37 = $"{manifestPrefix}_GRCh37.txt";
manifestGRCh38 = $"{manifestPrefix}_GRCh38.txt";
manifestGRCh37 = LambdaUtilities.GetManifestUrl($"{manifestPrefix}", GenomeAssembly.GRCh37);
manifestGRCh38 = LambdaUtilities.GetManifestUrl($"{manifestPrefix}", GenomeAssembly.GRCh38);
}

return (hostName, '/' + cacheDir, '/' + referencesDir, manifestGRCh37, manifestGRCh38);
Expand Down
6 changes: 2 additions & 4 deletions Downloader/Downloader.appsettings.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
{
"DataSource": {
"HostName": "annotations.nirvana.illumina.com",
"CacheDirectory": "ab0cf104f39708eabd07b8cb67e149ba-Cache",
"ReferencesDirectory": "d95867deadfe690e40f42068d6b59df8-References",
"ManifestGRCh37": "latest_SA_GRCh37.txt",
"ManifestGRCh38": "latest_SA_GRCh38.txt"
"ManifestGRCh37": "latest",
"ManifestGRCh38": "latest"
}
}
1 change: 1 addition & 0 deletions Downloader/Downloader.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
<PackageReference Include="Microsoft.Extensions.Configuration.Json" Version="6.0.0" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Cloud\Cloud.csproj" />
<ProjectReference Include="..\CommandLine\CommandLine.csproj" />
<ProjectReference Include="..\ErrorHandling\ErrorHandling.csproj" />
<ProjectReference Include="..\ReferenceSequence\ReferenceSequence.csproj" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ static SupplementaryAnnotationFileExtensions()
NeedsIndexSet.Add(".nsa");
NeedsIndexSet.Add(".npd");
NeedsIndexSet.Add(".rma");
NeedsIndexSet.Add(".gsa");
}

public static void AddSupplementaryAnnotationFiles(this List<RemoteFile> files,
Expand Down
3 changes: 2 additions & 1 deletion Downloader/Manifest.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System.Collections.Generic;
using System;
using System.Collections.Generic;
using Genome;

namespace Downloader
Expand Down
41 changes: 33 additions & 8 deletions Nirvana/ProviderUtilities.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using ErrorHandling.Exceptions;
using IO;
using VariantAnnotation.GeneAnnotation;
using VariantAnnotation.GeneFusions.IO;
Expand Down Expand Up @@ -28,21 +30,40 @@ public static ProteinConservationProvider GetProteinConservationProvider(Annotat
public static IAnnotationProvider GetConservationProvider(AnnotationFiles files)
{
if (files == null || files.PhylopFile == default) return null;
(Stream phylopStream, Stream indexStream) = GetDataAndIndexStreams(files.PhylopFile.Npd, files.PhylopFile.Idx);
return new ConservationScoreProvider()
.AddPhylopReader(PersistentStreamUtils.GetReadStream(files.PhylopFile.Npd),
PersistentStreamUtils.GetReadStream(files.PhylopFile.Idx));
.AddPhylopReader(phylopStream, indexStream);
}

private static (Stream, Stream) GetDataAndIndexStreams(string dataFilePath, string indexPath)
{
var dataStream = PersistentStreamUtils.GetReadStream(dataFilePath);
var indexStream = PersistentStreamUtils.GetReadStream(indexPath);
if (dataStream == null)
{
throw new UserErrorException($"Unable to open data file {dataFilePath}");
}

if (indexStream == null)
{
throw new UserErrorException($"Unable to open index file {indexPath}");
}

return (dataStream, indexStream);
}

public static IAnnotationProvider GetLcrProvider(AnnotationFiles files) =>
files?.LowComplexityRegionFile == null
? null
: new LcrProvider(PersistentStreamUtils.GetReadStream(files.LowComplexityRegionFile));

public static IRefMinorProvider GetRefMinorProvider(AnnotationFiles files) =>
files == null || files.RefMinorFile == default
? null
: new RefMinorProvider(PersistentStreamUtils.GetReadStream(files.RefMinorFile.Rma),
public static IRefMinorProvider GetRefMinorProvider(AnnotationFiles files)
{
if( files == null || files.RefMinorFile == default) return null;

return new RefMinorProvider(PersistentStreamUtils.GetReadStream(files.RefMinorFile.Rma),
PersistentStreamUtils.GetReadStream(files.RefMinorFile.Idx));
}

public static IGeneAnnotationProvider GetGeneAnnotationProvider(AnnotationFiles files) => files?.NsiFiles == null
? null
Expand All @@ -64,7 +85,10 @@ private static INsaReader[] GetNsaReaders(IReadOnlyCollection<(string Nsa, strin
{
var readers = new List<INsaReader>(filePaths.Count);
foreach ((string nsaPath, string idxPath) in filePaths)
readers.Add(new NsaReader(PersistentStreamUtils.GetReadStream(nsaPath), PersistentStreamUtils.GetReadStream(idxPath)));
{
var (nsaStream, idxStream) = GetDataAndIndexStreams(nsaPath, idxPath);
readers.Add(new NsaReader(nsaStream, idxStream));
}
return readers.SortByJsonKey();
}

Expand All @@ -79,7 +103,8 @@ public static IAnnotationProvider GetGsaProvider(AnnotationFiles files)
var i = 0;
foreach ((string gsaPath, string idxPath) in filePaths)
{
readers[i] = ScoreReader.Read(PersistentStreamUtils.GetReadStream(gsaPath), PersistentStreamUtils.GetReadStream(idxPath));
var (gsaStream, idxStream) = GetDataAndIndexStreams(gsaPath, idxPath);
readers[i] = ScoreReader.Read(gsaStream, idxStream);
i++;
}

Expand Down
7 changes: 3 additions & 4 deletions SingleAnnotationLambda/CacheUtilities.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,13 @@ public static bool IsVepVersionSupported(int vepVersion) =>
public static string GetCachePathPrefix(int vepVersion, GenomeAssembly genomeAssembly)
{
string suffix = $"{genomeAssembly}/{LambdaUrlHelper.DefaultCacheSource}";

//LambdaUrlHelper.GetBaseUrl() +

switch (vepVersion)
{
case 84:
return UrlCombine($"{LambdaUrlHelper.GetBaseUrl()+LambdaUrlHelper.S3CacheFolderBase}/26/VEP84/", suffix);
return UrlCombine($"{LambdaUrlHelper.GetBaseUrl() +LambdaUrlHelper.S3CacheFolderBase}/26/VEP84/", suffix);
default:
return UrlCombine($"{LambdaUrlHelper.GetBaseUrl()+LambdaUrlHelper.S3CacheFolder}", suffix);
return UrlCombine($"{LambdaUrlHelper.GetCacheFolder()}", suffix);
}

}
Expand Down
14 changes: 14 additions & 0 deletions UnitTests/Cloud/ConsistencyTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
using Cloud;
using VariantAnnotation.SA;
using Xunit;

namespace UnitTests.Cloud;

public sealed class ConsistencyTests
{
[Fact]
public void Consistency_with_SAUtils()
{
Assert.Equal(LambdaUrlHelper.SaSchemaVersion, SaCommon.SchemaVersion);
}
}
15 changes: 12 additions & 3 deletions UnitTests/Cloud/LambdaUrlHelperTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,16 @@ public sealed class LambdaUrlHelperTests
[Fact]
public void GetDataUrlBase_AsExpected()
{
Assert.Equal($"http://somewhere.on.the.earth/ab0cf104f39708eabd07b8cb67e149ba-Cache/{CacheConstants.DataVersion}/", LambdaUrlHelper.GetCacheFolder("http://somewhere.on.the.earth/"));
Assert.Equal($"http://somewhere.on.the.earth/d95867deadfe690e40f42068d6b59df8-References/{ReferenceSequenceCommon.HeaderVersion}/Homo_sapiens.", LambdaUrlHelper.GetRefPrefix("http://somewhere.on.the.earth/"));
Environment.SetEnvironmentVariable("NirvanaDataUrlBase", "http://somewhere.on.the.earth/");
Assert.Equal($"http://somewhere.on.the.earth/ab0cf104f39708eabd07b8cb67e149ba-Cache/{CacheConstants.DataVersion}/", LambdaUrlHelper.GetCacheFolder());
Assert.Equal($"http://somewhere.on.the.earth/d95867deadfe690e40f42068d6b59df8-References/{ReferenceSequenceCommon.HeaderVersion}/Homo_sapiens.", LambdaUrlHelper.GetRefPrefix());
}

[Fact]
public void GetS3RefLocation_AsExpected()
{
Assert.Equal(LambdaUrlHelper.GetRefPrefix("whatever") + "GRCh37" + LambdaUrlHelper.RefSuffix, LambdaUrlHelper.GetRefUrl(GenomeAssembly.GRCh37, "whatever"));
Environment.SetEnvironmentVariable("NirvanaDataUrlBase", "whatever");
Assert.Equal(LambdaUrlHelper.GetRefPrefix() + "GRCh37" + LambdaUrlHelper.RefSuffix, LambdaUrlHelper.GetRefUrl(GenomeAssembly.GRCh37));
}

[Fact]
Expand All @@ -31,5 +33,12 @@ public void GetS3_SaManifest_Location_AsExpected()
var saManifestUrl = LambdaUtilities.GetManifestUrl("latest", GenomeAssembly.GRCh38, SaCommon.SchemaVersion);
HttpUtilities.ValidateUrl(saManifestUrl);
}

[Fact]
public void GetS3_SaManifest_Location_from_config()
{
var saManifestUrl = LambdaUtilities.GetManifestUrl("latest", GenomeAssembly.GRCh38, SaCommon.SchemaVersion);
HttpUtilities.ValidateUrl(saManifestUrl);
}
}
}
Loading

0 comments on commit a0e6e36

Please sign in to comment.