From 1567d40ad8b0a46f00076b47d25761668c67a8d4 Mon Sep 17 00:00:00 2001 From: Gregoire Pailler Date: Wed, 24 May 2023 07:58:45 +0800 Subject: [PATCH 1/9] Massive refactoring to use IAsyncEnumerable and to support multiple providers (GitLab...) --- .../Interceptor/InterceptionExtensions.cs | 22 --- .../Interceptor/TimingInterceptor.cs | 24 --- .../LoggingExtensions.cs | 15 +- src/ScoopSearch.Indexer.Console/Program.cs | 21 +-- .../ScoopSearch.Indexer.Console.csproj | 2 +- src/ScoopSearch.Indexer/Buckets/Bucket.cs | 14 ++ .../Providers/GitHubBucketsProvider.cs | 28 ++++ .../Buckets/Providers/IBucketsProvider.cs | 8 + .../Buckets/Sources/GitHubBucketsSource.cs | 42 +++++ .../Buckets/Sources/IBucketsSource.cs | 6 + .../Buckets/Sources/IOfficialBucketsSource.cs | 5 + .../Sources/ManualBucketsListSource.cs | 64 ++++++++ .../Buckets/Sources/ManualBucketsSource.cs | 42 +++++ .../Buckets/Sources/OfficialBucketsSource.cs | 65 ++++++++ .../Configuration/BucketsOptions.cs | 10 +- .../Configuration/GitHubOptions.cs | 4 +- src/ScoopSearch.Indexer/Constants.cs | 6 - src/ScoopSearch.Indexer/Data/BucketInfo.cs | 22 --- .../Extensions/HttpClientExtensions.cs | 77 +++++++++ .../Extensions/ServiceCollectionExtensions.cs | 57 ------- src/ScoopSearch.Indexer/Git/GitRepository.cs | 13 +- src/ScoopSearch.Indexer/Git/IGitRepository.cs | 4 +- .../GitHub/GitHubClient.cs | 85 ++++++---- .../GitHub/IGitHubClient.cs | 8 +- .../IScoopSearchIndexer.cs | 2 +- .../Indexer/AzureSearchClient.cs | 7 +- .../Processor/FetchBucketsProcessor.cs | 154 ------------------ .../Processor/FetchManifestsProcessor.cs | 33 ++-- .../Processor/IFetchBucketsProcessor.cs | 8 - .../Processor/IFetchManifestsProcessor.cs | 3 +- .../ScoopSearch.Indexer.csproj | 5 +- src/ScoopSearch.Indexer/ScoopSearchIndexer.cs | 82 ++++++++-- src/ScoopSearch.Indexer/ServicesExtensions.cs | 16 +- src/ScoopSearch.Indexer/appsettings.json | 28 ++-- 34 files changed, 549 insertions(+), 433 deletions(-) delete mode 100644 src/ScoopSearch.Indexer.Console/Interceptor/InterceptionExtensions.cs delete mode 100644 src/ScoopSearch.Indexer.Console/Interceptor/TimingInterceptor.cs create mode 100644 src/ScoopSearch.Indexer/Buckets/Bucket.cs create mode 100644 src/ScoopSearch.Indexer/Buckets/Providers/GitHubBucketsProvider.cs create mode 100644 src/ScoopSearch.Indexer/Buckets/Providers/IBucketsProvider.cs create mode 100644 src/ScoopSearch.Indexer/Buckets/Sources/GitHubBucketsSource.cs create mode 100644 src/ScoopSearch.Indexer/Buckets/Sources/IBucketsSource.cs create mode 100644 src/ScoopSearch.Indexer/Buckets/Sources/IOfficialBucketsSource.cs create mode 100644 src/ScoopSearch.Indexer/Buckets/Sources/ManualBucketsListSource.cs create mode 100644 src/ScoopSearch.Indexer/Buckets/Sources/ManualBucketsSource.cs create mode 100644 src/ScoopSearch.Indexer/Buckets/Sources/OfficialBucketsSource.cs delete mode 100644 src/ScoopSearch.Indexer/Constants.cs delete mode 100644 src/ScoopSearch.Indexer/Data/BucketInfo.cs create mode 100644 src/ScoopSearch.Indexer/Extensions/HttpClientExtensions.cs delete mode 100644 src/ScoopSearch.Indexer/Extensions/ServiceCollectionExtensions.cs delete mode 100644 src/ScoopSearch.Indexer/Processor/FetchBucketsProcessor.cs delete mode 100644 src/ScoopSearch.Indexer/Processor/IFetchBucketsProcessor.cs diff --git a/src/ScoopSearch.Indexer.Console/Interceptor/InterceptionExtensions.cs b/src/ScoopSearch.Indexer.Console/Interceptor/InterceptionExtensions.cs deleted file mode 100644 index 7b0b512..0000000 --- a/src/ScoopSearch.Indexer.Console/Interceptor/InterceptionExtensions.cs +++ /dev/null @@ -1,22 +0,0 @@ -using Castle.DynamicProxy; -using Microsoft.Extensions.DependencyInjection; - -namespace ScoopSearch.Indexer.Console.Interceptor; - -internal static class InterceptionExtensions -{ - private static readonly ProxyGenerator _proxyGenerator = new ProxyGenerator(); - - public static void DecorateWithInterceptors(this IServiceCollection serviceCollection) - where TService : class - where TInterceptor : IAsyncInterceptor - { - serviceCollection.Decorate((instance, serviceProvider) => - _proxyGenerator.CreateInterfaceProxyWithTargetInterface( - instance, - serviceProvider - .GetServices() - .OfType() - .ToArray())); - } -} diff --git a/src/ScoopSearch.Indexer.Console/Interceptor/TimingInterceptor.cs b/src/ScoopSearch.Indexer.Console/Interceptor/TimingInterceptor.cs deleted file mode 100644 index 2e0217a..0000000 --- a/src/ScoopSearch.Indexer.Console/Interceptor/TimingInterceptor.cs +++ /dev/null @@ -1,24 +0,0 @@ -using System.Diagnostics; -using Castle.DynamicProxy; -using Microsoft.Extensions.Logging; - -namespace ScoopSearch.Indexer.Console.Interceptor; - -internal class TimingInterceptor : AsyncTimingInterceptor -{ - private readonly ILogger _logger; - - public TimingInterceptor(ILogger logger) - { - _logger = logger; - } - - protected override void StartingTiming(IInvocation invocation) - { - } - - protected override void CompletedTiming(IInvocation invocation, Stopwatch stopwatch) - { - _logger.LogDebug("Executed '{Method}({Arguments})' in {Duration:g}", invocation.Method.Name, string.Join(", ", invocation.Arguments), stopwatch.Elapsed); - } -} diff --git a/src/ScoopSearch.Indexer.Console/LoggingExtensions.cs b/src/ScoopSearch.Indexer.Console/LoggingExtensions.cs index 5da713a..7e57122 100644 --- a/src/ScoopSearch.Indexer.Console/LoggingExtensions.cs +++ b/src/ScoopSearch.Indexer.Console/LoggingExtensions.cs @@ -28,16 +28,21 @@ public static IHostBuilder ConfigureSerilog(this IHostBuilder @this, string logF .Enrich.WithSensitiveDataMasking(options => { options.MaskingOperators.Clear(); - options.MaskingOperators.Add(new TokensMaskingOperator( - provider.GetRequiredService>().Value.Token, - provider.GetRequiredService>().Value.AdminApiKey - )); + var tokens = new[] + { + provider.GetRequiredService>().Value.Token, + provider.GetRequiredService>().Value.AdminApiKey + } + .Where(token => token != null) + .Cast() + .ToArray(); + options.MaskingOperators.Add(new TokensMaskingOperator(tokens)); }) .WriteTo.File(new CompactJsonFormatter(), logFile) .WriteTo.Logger(options => options .MinimumLevel.Information() // Exclude verbose HttpClient logs from the console - .Filter.ByExcluding(_ => Matching.FromSource(typeof(HttpClient).FullName)(_) && _.Level < LogEventLevel.Warning) + .Filter.ByExcluding(_ => Matching.FromSource(typeof(HttpClient).FullName!)(_) && _.Level < LogEventLevel.Warning) .WriteTo.Console(outputTemplate: "[{Timestamp:HH:mm:ss} {Level:u3} {ThreadId}] {Message:lj}{NewLine}")); }); } diff --git a/src/ScoopSearch.Indexer.Console/Program.cs b/src/ScoopSearch.Indexer.Console/Program.cs index 8a35fa6..752e091 100644 --- a/src/ScoopSearch.Indexer.Console/Program.cs +++ b/src/ScoopSearch.Indexer.Console/Program.cs @@ -1,33 +1,18 @@ -using Castle.DynamicProxy; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Hosting; using ScoopSearch.Indexer; using ScoopSearch.Indexer.Console; -using ScoopSearch.Indexer.Console.Interceptor; -using ScoopSearch.Indexer.Git; -using ScoopSearch.Indexer.GitHub; -using ScoopSearch.Indexer.Indexer; -using ScoopSearch.Indexer.Processor; const string LogFile = "output.txt"; +TimeSpan Timeout = TimeSpan.FromMinutes(30); using IHost host = Host.CreateDefaultBuilder(args) .ConfigureServices(services => { services.RegisterScoopSearchIndexer(); - - // Decorate some classes with interceptors for logging purpose - services.AddSingleton(); - services.DecorateWithInterceptors(); - services.DecorateWithInterceptors(); - services.DecorateWithInterceptors(); - services.DecorateWithInterceptors(); - services.DecorateWithInterceptors(); - services.DecorateWithInterceptors(); - services.DecorateWithInterceptors(); - services.DecorateWithInterceptors(); }) .ConfigureSerilog(LogFile) .Build(); -await host.Services.GetRequiredService().ExecuteAsync(); +var cancellationToken = new CancellationTokenSource(Timeout).Token; +await host.Services.GetRequiredService().ExecuteAsync(cancellationToken); diff --git a/src/ScoopSearch.Indexer.Console/ScoopSearch.Indexer.Console.csproj b/src/ScoopSearch.Indexer.Console/ScoopSearch.Indexer.Console.csproj index f0f6093..65a0c61 100644 --- a/src/ScoopSearch.Indexer.Console/ScoopSearch.Indexer.Console.csproj +++ b/src/ScoopSearch.Indexer.Console/ScoopSearch.Indexer.Console.csproj @@ -9,7 +9,7 @@ - + diff --git a/src/ScoopSearch.Indexer/Buckets/Bucket.cs b/src/ScoopSearch.Indexer/Buckets/Bucket.cs new file mode 100644 index 0000000..cb27869 --- /dev/null +++ b/src/ScoopSearch.Indexer/Buckets/Bucket.cs @@ -0,0 +1,14 @@ +namespace ScoopSearch.Indexer.Buckets; + +public class Bucket +{ + public Bucket(Uri uri, int stars) + { + Uri = new Uri(uri.AbsoluteUri.ToLowerInvariant()); + Stars = stars; + } + + public Uri Uri { get; private set; } + + public int Stars { get; private set; } +} diff --git a/src/ScoopSearch.Indexer/Buckets/Providers/GitHubBucketsProvider.cs b/src/ScoopSearch.Indexer/Buckets/Providers/GitHubBucketsProvider.cs new file mode 100644 index 0000000..e14efb4 --- /dev/null +++ b/src/ScoopSearch.Indexer/Buckets/Providers/GitHubBucketsProvider.cs @@ -0,0 +1,28 @@ +using ScoopSearch.Indexer.GitHub; + +namespace ScoopSearch.Indexer.Buckets.Providers; + +internal class GitHubBucketsProvider : IBucketsProvider +{ + private const string GitHubDomain = "github.com"; + + private readonly IGitHubClient _gitHubClient; + + public GitHubBucketsProvider(IGitHubClient gitHubClient) + { + _gitHubClient = gitHubClient; + } + + public async Task GetBucketAsync(Uri uri, CancellationToken cancellationToken) + { + var result = await _gitHubClient.GetRepositoryAsync(uri, cancellationToken); + if (result is not null) + { + return new Bucket(result.HtmlUri, result.Stars); + } + + return null; + } + + public bool IsCompatible(Uri uri) => uri.Host.EndsWith(GitHubDomain, StringComparison.Ordinal); +} diff --git a/src/ScoopSearch.Indexer/Buckets/Providers/IBucketsProvider.cs b/src/ScoopSearch.Indexer/Buckets/Providers/IBucketsProvider.cs new file mode 100644 index 0000000..4697ac8 --- /dev/null +++ b/src/ScoopSearch.Indexer/Buckets/Providers/IBucketsProvider.cs @@ -0,0 +1,8 @@ +namespace ScoopSearch.Indexer.Buckets.Providers; + +public interface IBucketsProvider +{ + Task GetBucketAsync(Uri uri, CancellationToken cancellationToken); + + bool IsCompatible(Uri uri); +} diff --git a/src/ScoopSearch.Indexer/Buckets/Sources/GitHubBucketsSource.cs b/src/ScoopSearch.Indexer/Buckets/Sources/GitHubBucketsSource.cs new file mode 100644 index 0000000..a4dbf7d --- /dev/null +++ b/src/ScoopSearch.Indexer/Buckets/Sources/GitHubBucketsSource.cs @@ -0,0 +1,42 @@ +using System.Runtime.CompilerServices; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using ScoopSearch.Indexer.Buckets.Providers; +using ScoopSearch.Indexer.Configuration; +using ScoopSearch.Indexer.GitHub; + +namespace ScoopSearch.Indexer.Buckets.Sources; + +internal class GitHubBucketsSource : IBucketsSource +{ + private readonly IGitHubClient _gitHubClient; + private readonly GitHubOptions _gitHubOptions; + private readonly ILogger _logger; + + public GitHubBucketsSource( + IGitHubClient gitHubClient, + IOptions gitHubOptions, + ILogger logger) + { + _gitHubClient = gitHubClient; + _logger = logger; + _gitHubOptions = gitHubOptions.Value; + } + + public async IAsyncEnumerable GetBucketsAsync([EnumeratorCancellation] CancellationToken cancellationToken) + { + if (_gitHubOptions.BucketsSearchQueries is null || _gitHubOptions.BucketsSearchQueries.Length == 0) + { + _logger.LogWarning("No buckets search queries found in configuration"); + yield break; + } + + foreach (var query in _gitHubOptions.BucketsSearchQueries) + { + await foreach(var repo in _gitHubClient.SearchRepositoriesAsync(query, cancellationToken)) + { + yield return new Bucket(repo.HtmlUri, repo.Stars); + } + } + } +} diff --git a/src/ScoopSearch.Indexer/Buckets/Sources/IBucketsSource.cs b/src/ScoopSearch.Indexer/Buckets/Sources/IBucketsSource.cs new file mode 100644 index 0000000..de50c77 --- /dev/null +++ b/src/ScoopSearch.Indexer/Buckets/Sources/IBucketsSource.cs @@ -0,0 +1,6 @@ +namespace ScoopSearch.Indexer.Buckets.Sources; + +public interface IBucketsSource +{ + IAsyncEnumerable GetBucketsAsync(CancellationToken cancellationToken); +} diff --git a/src/ScoopSearch.Indexer/Buckets/Sources/IOfficialBucketsSource.cs b/src/ScoopSearch.Indexer/Buckets/Sources/IOfficialBucketsSource.cs new file mode 100644 index 0000000..7e92ee7 --- /dev/null +++ b/src/ScoopSearch.Indexer/Buckets/Sources/IOfficialBucketsSource.cs @@ -0,0 +1,5 @@ +namespace ScoopSearch.Indexer.Buckets.Sources; + +public interface IOfficialBucketsSource : IBucketsSource +{ +} diff --git a/src/ScoopSearch.Indexer/Buckets/Sources/ManualBucketsListSource.cs b/src/ScoopSearch.Indexer/Buckets/Sources/ManualBucketsListSource.cs new file mode 100644 index 0000000..ae93e69 --- /dev/null +++ b/src/ScoopSearch.Indexer/Buckets/Sources/ManualBucketsListSource.cs @@ -0,0 +1,64 @@ +using System.Globalization; +using System.Runtime.CompilerServices; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using ScoopSearch.Indexer.Buckets.Providers; +using ScoopSearch.Indexer.Configuration; +using ScoopSearch.Indexer.Extensions; + +namespace ScoopSearch.Indexer.Buckets.Sources; + +internal class ManualBucketsListSource : IBucketsSource +{ + private readonly IHttpClientFactory _httpClientFactory; + private readonly IEnumerable _bucketsProviders; + private readonly BucketsOptions _bucketOptions; + private readonly ILogger _logger; + + public ManualBucketsListSource( + IHttpClientFactory httpClientFactory, + IEnumerable bucketsProviders, + IOptions bucketOptions, + ILogger logger) + { + _httpClientFactory = httpClientFactory; + _bucketsProviders = bucketsProviders; + _bucketOptions = bucketOptions.Value; + _logger = logger; + } + + public async IAsyncEnumerable GetBucketsAsync([EnumeratorCancellation] CancellationToken cancellationToken) + { + if (_bucketOptions.ManualBucketsListUrl is null) + { + _logger.LogWarning("No buckets list url found in configuration"); + yield break; + } + + var content = await _httpClientFactory.CreateDefaultClient().GetStringAsync(_bucketOptions.ManualBucketsListUrl, cancellationToken); + using var csv = new CsvHelper.CsvReader(new StringReader(content), CultureInfo.InvariantCulture); + await csv.ReadAsync(); + csv.ReadHeader(); + + while (await csv.ReadAsync()) + { + var uri = csv.GetField("url"); + if (uri == null) + { + continue; + } + + if (uri.EndsWith(".git")) + { + uri = uri[..^4]; + } + + var bucketUri = new Uri(uri); + var provider = _bucketsProviders.FirstOrDefault(provider => provider.IsCompatible(bucketUri)); + if (provider is not null && await provider.GetBucketAsync(bucketUri, cancellationToken) is { } bucket) + { + yield return bucket; + } + } + } +} diff --git a/src/ScoopSearch.Indexer/Buckets/Sources/ManualBucketsSource.cs b/src/ScoopSearch.Indexer/Buckets/Sources/ManualBucketsSource.cs new file mode 100644 index 0000000..e6e32d4 --- /dev/null +++ b/src/ScoopSearch.Indexer/Buckets/Sources/ManualBucketsSource.cs @@ -0,0 +1,42 @@ +using System.Runtime.CompilerServices; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using ScoopSearch.Indexer.Buckets.Providers; +using ScoopSearch.Indexer.Configuration; + +namespace ScoopSearch.Indexer.Buckets.Sources; + +internal class ManualBucketsSource : IBucketsSource +{ + private readonly IEnumerable _bucketsProviders; + private readonly BucketsOptions _bucketOptions; + private readonly ILogger _logger; + + public ManualBucketsSource( + IEnumerable bucketsProviders, + IOptions bucketOptions, + ILogger logger) + { + _bucketsProviders = bucketsProviders; + _bucketOptions = bucketOptions.Value; + _logger = logger; + } + + public async IAsyncEnumerable GetBucketsAsync([EnumeratorCancellation] CancellationToken cancellationToken) + { + if (_bucketOptions.ManualBuckets is null) + { + _logger.LogWarning("No manual buckets found in configuration"); + yield break; + } + + foreach (var uri in _bucketOptions.ManualBuckets) + { + var provider = _bucketsProviders.FirstOrDefault(provider => provider.IsCompatible(uri)); + if (provider is not null && await provider.GetBucketAsync(uri, cancellationToken) is { } bucket) + { + yield return bucket; + } + } + } +} diff --git a/src/ScoopSearch.Indexer/Buckets/Sources/OfficialBucketsSource.cs b/src/ScoopSearch.Indexer/Buckets/Sources/OfficialBucketsSource.cs new file mode 100644 index 0000000..b5b7f68 --- /dev/null +++ b/src/ScoopSearch.Indexer/Buckets/Sources/OfficialBucketsSource.cs @@ -0,0 +1,65 @@ +using System.Runtime.CompilerServices; +using System.Text.Json; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using ScoopSearch.Indexer.Buckets.Providers; +using ScoopSearch.Indexer.Configuration; +using ScoopSearch.Indexer.Extensions; + +namespace ScoopSearch.Indexer.Buckets.Sources; + +internal class OfficialBucketsSource : IOfficialBucketsSource +{ + private readonly IHttpClientFactory _httpClientFactory; + private readonly IEnumerable _bucketsProviders; + private readonly BucketsOptions _bucketOptions; + private readonly ILogger _logger; + + public OfficialBucketsSource( + IHttpClientFactory httpClientFactory, + IEnumerable bucketsProviders, + IOptions bucketOptions, + ILogger logger) + { + _httpClientFactory = httpClientFactory; + _bucketsProviders = bucketsProviders; + _logger = logger; + _bucketOptions = bucketOptions.Value; + } + + public async IAsyncEnumerable GetBucketsAsync([EnumeratorCancellation] CancellationToken cancellationToken) + { + if (_bucketOptions.OfficialBucketsListUrl is null) + { + _logger.LogWarning("No official buckets list url found in configuration"); + yield break; + } + + _logger.LogInformation("Retrieving official buckets from '{Uri}'", _bucketOptions.OfficialBucketsListUrl); + + await foreach (var uri in GetBucketsFromJsonAsync(_bucketOptions.OfficialBucketsListUrl, cancellationToken)) + { + var provider = _bucketsProviders.FirstOrDefault(provider => provider.IsCompatible(uri)); + if (provider is not null && await provider.GetBucketAsync(uri, cancellationToken) is { } bucket) + { + yield return bucket; + } + } + } + + private async IAsyncEnumerable GetBucketsFromJsonAsync(Uri uri, [EnumeratorCancellation] CancellationToken cancellationToken) + { + var contentJson = await _httpClientFactory.CreateDefaultClient().GetStreamAsync(uri, cancellationToken); + var officialBuckets = await JsonSerializer.DeserializeAsync>(contentJson, cancellationToken: cancellationToken); + if (officialBuckets is null) + { + _logger.LogWarning("Unable to parse buckets list from '{Uri}'", uri); + yield break; + } + + foreach (var officialBucket in officialBuckets) + { + yield return new Uri(officialBucket.Value); + } + } +} diff --git a/src/ScoopSearch.Indexer/Configuration/BucketsOptions.cs b/src/ScoopSearch.Indexer/Configuration/BucketsOptions.cs index 07bd693..1fe8f23 100644 --- a/src/ScoopSearch.Indexer/Configuration/BucketsOptions.cs +++ b/src/ScoopSearch.Indexer/Configuration/BucketsOptions.cs @@ -4,13 +4,11 @@ public class BucketsOptions { public const string Key = "Buckets"; - public Uri OfficialBucketsListUrl { get; set; } = null!; + public Uri? OfficialBucketsListUrl { get; set; } - public List GithubBucketsSearchQueries { get; set; } = new List(); + public Uri? ManualBucketsListUrl { get; set; } - public HashSet IgnoredBuckets { get; set; } = new HashSet(); + public HashSet? IgnoredBuckets { get; set; } - public HashSet ManualBuckets { get; set; } = new HashSet(); - - public Uri ManualBucketsListUrl { get; set; } = null!; + public Uri[]? ManualBuckets { get; set; } } diff --git a/src/ScoopSearch.Indexer/Configuration/GitHubOptions.cs b/src/ScoopSearch.Indexer/Configuration/GitHubOptions.cs index 08db1dd..4c4f597 100644 --- a/src/ScoopSearch.Indexer/Configuration/GitHubOptions.cs +++ b/src/ScoopSearch.Indexer/Configuration/GitHubOptions.cs @@ -4,5 +4,7 @@ public class GitHubOptions { public const string Key = "GitHub"; - public string Token { get; set; } = null!; + public string? Token { get; set; } + + public string[][]? BucketsSearchQueries { get; set; } } diff --git a/src/ScoopSearch.Indexer/Constants.cs b/src/ScoopSearch.Indexer/Constants.cs deleted file mode 100644 index fd3f107..0000000 --- a/src/ScoopSearch.Indexer/Constants.cs +++ /dev/null @@ -1,6 +0,0 @@ -namespace ScoopSearch.Indexer; - -internal static class Constants -{ - public const string GitHubHttpClientName = "GitHub"; -} diff --git a/src/ScoopSearch.Indexer/Data/BucketInfo.cs b/src/ScoopSearch.Indexer/Data/BucketInfo.cs deleted file mode 100644 index ba2b327..0000000 --- a/src/ScoopSearch.Indexer/Data/BucketInfo.cs +++ /dev/null @@ -1,22 +0,0 @@ -using System.Text.Json.Serialization; - -namespace ScoopSearch.Indexer.Data; - -public class BucketInfo -{ - public BucketInfo(Uri uri, int stars, bool official) - { - Uri = uri; - Stars = stars; - Official = official; - } - - [JsonInclude] - public Uri Uri { get; private set; } - - [JsonInclude] - public int Stars { get; private set; } - - [JsonInclude] - public bool Official { get; private set; } -} diff --git a/src/ScoopSearch.Indexer/Extensions/HttpClientExtensions.cs b/src/ScoopSearch.Indexer/Extensions/HttpClientExtensions.cs new file mode 100644 index 0000000..b27e69d --- /dev/null +++ b/src/ScoopSearch.Indexer/Extensions/HttpClientExtensions.cs @@ -0,0 +1,77 @@ +using System.Net; +using System.Net.Http.Headers; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using Polly; +using Polly.Extensions.Http; +using ScoopSearch.Indexer.Configuration; + +namespace ScoopSearch.Indexer.Extensions; + +internal static class HttpClientExtensions +{ + private const string DefaultHttpClient = "Default"; + private const string GitHubHttpClient = "GitHub"; + + public static void AddHttpClients(this IServiceCollection services) + { + services + .AddHttpClient(DefaultHttpClient) + .AddTransientHttpErrorPolicy(policyBuilder => policyBuilder.WaitAndRetryAsync(5, retryAttempt => TimeSpan.FromSeconds(Math.Pow(2, retryAttempt)))); + + services + .AddHttpClient(GitHubHttpClient, (serviceProvider, client) => + { + // Github requires a user-agent + var assemblyName = typeof(Extensions).Assembly.GetName(); + client.DefaultRequestHeaders.UserAgent.Add(new ProductInfoHeaderValue( + assemblyName.Name!, + assemblyName.Version!.ToString())); + + // Authentication to avoid API rate limitation + var gitHubOptions = serviceProvider.GetRequiredService>(); + if (gitHubOptions.Value.Token == null) + { + serviceProvider.GetRequiredService>().LogWarning("GitHub Token is not defined in configuration."); + } + client.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Token", gitHubOptions.Value.Token); + }) + .AddPolicyHandler((provider, _) => CreateGitHubRetryPolicy(provider)); + } + + public static HttpClient CreateDefaultClient(this IHttpClientFactory @this) + { + return @this.CreateClient(DefaultHttpClient); + } + + public static HttpClient CreateGitHubClient(this IHttpClientFactory @this) + { + return @this.CreateClient(GitHubHttpClient); + } + + private static IAsyncPolicy CreateGitHubRetryPolicy(IServiceProvider provider) + { + return Policy + .HandleResult(_ => _.StatusCode == HttpStatusCode.Forbidden) + .OrTransientHttpStatusCode() + .WaitAndRetryAsync(5, (retryAttempt, result, _) => + { + TimeSpan delay = TimeSpan.FromSeconds(Math.Pow(2, retryAttempt)); + if (result.Result?.StatusCode == HttpStatusCode.Forbidden && result.Result.Headers.TryGetValues("X-RateLimit-Reset", out var values)) + { + var rateLimitReset = DateTimeOffset.FromUnixTimeSeconds(Convert.ToInt64(values.Single())); + delay = rateLimitReset - DateTimeOffset.UtcNow + TimeSpan.FromSeconds(1); + } + + provider.GetRequiredService>().LogWarning( + "GitHub HttpClient failed with {StatusCode}. Waiting {TimeSpan} before next retry. Retry attempt {RetryCount}.", + result.Result?.StatusCode, + delay, + retryAttempt); + + return delay; + }, (_, _, _, _) => Task.CompletedTask); + } +} + diff --git a/src/ScoopSearch.Indexer/Extensions/ServiceCollectionExtensions.cs b/src/ScoopSearch.Indexer/Extensions/ServiceCollectionExtensions.cs deleted file mode 100644 index 678278d..0000000 --- a/src/ScoopSearch.Indexer/Extensions/ServiceCollectionExtensions.cs +++ /dev/null @@ -1,57 +0,0 @@ -using System.Net; -using System.Net.Http.Headers; -using Microsoft.Extensions.DependencyInjection; -using Microsoft.Extensions.Logging; -using Microsoft.Extensions.Options; -using Polly; -using Polly.Extensions.Http; -using ScoopSearch.Indexer.Configuration; - -namespace ScoopSearch.Indexer.Extensions; - -internal static class ServiceCollectionExtensions -{ - public static IHttpClientBuilder AddGitHubHttpClient(this IServiceCollection services, string name) - { - return services - .AddHttpClient(name, (serviceProvider, client) => - { - // Github requires a user-agent - var assemblyName = typeof(Extensions).Assembly.GetName(); - client.DefaultRequestHeaders.UserAgent.Add(new ProductInfoHeaderValue( - assemblyName.Name!, - assemblyName.Version!.ToString())); - - // Authentication to avoid API rate limitation - var gitHubOptions = serviceProvider.GetRequiredService>(); - client.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Token", gitHubOptions.Value.Token); - }) - .AddPolicyHandler((provider, _) => CreateRetryPolicy(provider, name)); - } - - private static IAsyncPolicy CreateRetryPolicy(IServiceProvider provider, string httpClientName) - { - return Policy - .HandleResult(_ => _.StatusCode == HttpStatusCode.Forbidden) - .OrTransientHttpStatusCode() - .WaitAndRetryAsync(5, (retryAttempt, result, _) => - { - TimeSpan delay = TimeSpan.FromSeconds(Math.Pow(2, retryAttempt)); - if (result.Result?.StatusCode == HttpStatusCode.Forbidden && result.Result.Headers.TryGetValues("X-RateLimit-Reset", out var values)) - { - var rateLimitReset = DateTimeOffset.FromUnixTimeSeconds(Convert.ToInt64(values.Single())); - delay = rateLimitReset - DateTimeOffset.UtcNow + TimeSpan.FromSeconds(1); - } - - provider.GetRequiredService>().LogWarning( - "HttpClient {Name} failed with {StatusCode}. Waiting {TimeSpan} before next retry. Retry attempt {RetryCount}.", - httpClientName, - result.Result?.StatusCode, - delay, - retryAttempt); - - return delay; - }, (_, _, _, _) => Task.CompletedTask); - } -} - diff --git a/src/ScoopSearch.Indexer/Git/GitRepository.cs b/src/ScoopSearch.Indexer/Git/GitRepository.cs index 5883d23..144dedd 100644 --- a/src/ScoopSearch.Indexer/Git/GitRepository.cs +++ b/src/ScoopSearch.Indexer/Git/GitRepository.cs @@ -41,7 +41,7 @@ public void Delete() } } - public IReadOnlyDictionary> GetCommitsCache(Predicate filter, CancellationToken cancellationToken) + public async Task>> GetCommitsCacheAsync(Predicate filter, CancellationToken cancellationToken) { _logger.LogDebug("Computing commits cache for repository '{WorkingDirectory}'", _repository.Info.WorkingDirectory); @@ -59,7 +59,6 @@ public IReadOnlyDictionary> GetCommitsCa } }; - string? currentLine; string? sha = default; DateTimeOffset commitDate = default; List files = new List(); @@ -70,7 +69,7 @@ void AddFilesToCache() { foreach (var file in files) { - if (!commitsCache!.TryGetValue(file, out var list)) + if (!commitsCache.TryGetValue(file, out var list)) { list = new List(); commitsCache.Add(file, list); @@ -82,7 +81,7 @@ void AddFilesToCache() files.Clear(); } - while ((currentLine = process.StandardOutput.ReadLine()) != null && !cancellationToken.IsCancellationRequested) + while (await process.StandardOutput.ReadLineAsync() is { } currentLine && !cancellationToken.IsCancellationRequested) { var parts = currentLine.Split(':'); switch (parts[0]) @@ -107,7 +106,7 @@ void AddFilesToCache() AddFilesToCache(); - process.WaitForExit(); + await process.WaitForExitAsync(cancellationToken); if (process.ExitCode != 0) { throw new InvalidOperationException($"git returned non-zero exit code ({process.ExitCode})"); @@ -130,8 +129,8 @@ public IEnumerable GetFilesFromIndex() .Select(_ => _.Path); } - public string ReadContent(string filePath) + public Task ReadContentAsync(string filePath, CancellationToken cancellationToken) { - return File.ReadAllText(Path.Combine(_repository.Info.WorkingDirectory, filePath)); + return File.ReadAllTextAsync(Path.Combine(_repository.Info.WorkingDirectory, filePath), cancellationToken); } } diff --git a/src/ScoopSearch.Indexer/Git/IGitRepository.cs b/src/ScoopSearch.Indexer/Git/IGitRepository.cs index e8ea909..f8ffb90 100644 --- a/src/ScoopSearch.Indexer/Git/IGitRepository.cs +++ b/src/ScoopSearch.Indexer/Git/IGitRepository.cs @@ -4,11 +4,11 @@ public interface IGitRepository { void Delete(); - IReadOnlyDictionary> GetCommitsCache(Predicate filter, CancellationToken cancellationToken); + Task>> GetCommitsCacheAsync(Predicate filter, CancellationToken cancellationToken); string GetBranchName(); IEnumerable GetFilesFromIndex(); - string ReadContent(string filePath); + Task ReadContentAsync(string filePath, CancellationToken cancellationToken); } diff --git a/src/ScoopSearch.Indexer/GitHub/GitHubClient.cs b/src/ScoopSearch.Indexer/GitHub/GitHubClient.cs index bb98dec..0e9d7d3 100644 --- a/src/ScoopSearch.Indexer/GitHub/GitHubClient.cs +++ b/src/ScoopSearch.Indexer/GitHub/GitHubClient.cs @@ -1,46 +1,40 @@ using System.Runtime.CompilerServices; using System.Text.Json; +using Microsoft.Extensions.Logging; +using ScoopSearch.Indexer.Extensions; namespace ScoopSearch.Indexer.GitHub; internal class GitHubClient : IGitHubClient { - private const string GitHubApiRepoBaseUri = "https://api.github.com/repos"; - private const string GitHubDomain = "github.com"; + private const string GitHubApiBaseUri = "https://api.github.com/"; private const int ResultsPerPage = 100; - private readonly HttpClient _githubHttpClient; + private readonly IHttpClientFactory _httpClientFactory; + private readonly ILogger _logger; - public GitHubClient(IHttpClientFactory httpClientFactory) + public GitHubClient(IHttpClientFactory httpClientFactory, ILogger logger) { - _githubHttpClient = httpClientFactory.CreateClient(Constants.GitHubHttpClientName); + _httpClientFactory = httpClientFactory; + _logger = logger; } - public async Task GetAsStringAsync(Uri uri, CancellationToken cancellationToken) + public async Task GetRepositoryAsync(Uri uri, CancellationToken cancellationToken) { - using (var request = new HttpRequestMessage(HttpMethod.Get, uri)) - using (var response = await _githubHttpClient.SendAsync(request, cancellationToken)) + var targetUri = await GetTargetRepositoryAsync(uri, cancellationToken); + if (targetUri == null) { - response.EnsureSuccessStatusCode(); - - return await response.Content.ReadAsStringAsync(cancellationToken); + _logger.LogWarning("{Uri} doesn't appear to be valid (non success status code)", uri); + return null; } - } - - public Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) - { - return _githubHttpClient.SendAsync(request, cancellationToken); - } - public async Task GetRepositoryAsync(Uri uri, CancellationToken cancellationToken) - { - if (!IsValidRepositoryDomain(uri)) + if (targetUri != uri) { - throw new ArgumentException("The URI must be a GitHub repo URI", nameof(uri)); + _logger.LogInformation("{Uri} is redirected to {TargetUri}", uri, targetUri); } - var apiRepoUri = new Uri(GitHubApiRepoBaseUri + uri.PathAndQuery); - return await GetAsStringAsync(apiRepoUri, cancellationToken) + var getRepoUri = BuildUri("repos" + targetUri.PathAndQuery); + return await _httpClientFactory.CreateGitHubClient().GetStringAsync(getRepoUri, cancellationToken) .ContinueWith(task => { if (task.IsCompletedSuccessfully) @@ -54,24 +48,46 @@ public Task SendAsync(HttpRequestMessage request, Cancellat }, cancellationToken); } - public bool IsValidRepositoryDomain(Uri uri) + private async Task GetTargetRepositoryAsync(Uri uri, CancellationToken cancellationToken) { - return uri.Host.EndsWith(GitHubDomain, StringComparison.Ordinal); + // Validate uri (existing repository, follow redirections...) + using var request = new HttpRequestMessage(HttpMethod.Head, uri); + using var response = await _httpClientFactory.CreateGitHubClient().SendAsync(request, cancellationToken); + + if (request.RequestUri != null) + { + if (!response.IsSuccessStatusCode) + { + return null; + } + + return request.RequestUri; + } + + return null; } - public async IAsyncEnumerable SearchRepositoriesAsync(Uri query, [EnumeratorCancellation] CancellationToken cancellationToken) + public async IAsyncEnumerable SearchRepositoriesAsync(string[] query, [EnumeratorCancellation] CancellationToken cancellationToken) { int page = 1; int? totalPages = null; do { - var searchUri = new Uri($"{query.AbsoluteUri}&per_page={ResultsPerPage}&page={page}&sort=updated"); - var results = await GetSearchResultsAsync(searchUri, cancellationToken); + var queryString = new Dictionary() + { + { "q", string.Join('+', query) }, + { "per_page", ResultsPerPage }, + { "page", page }, + { "sort", "updated" } + }; + var searchReposUri = BuildUri("/search/repositories", queryString); + var results = await GetSearchResultsAsync(searchReposUri, cancellationToken); if (results == null) { break; } + _logger.LogDebug("Found {Count} repositories for query '{Query}'", results.Items.Length, searchReposUri); foreach (var gitHubRepo in results.Items) { yield return gitHubRepo; @@ -83,7 +99,18 @@ public async IAsyncEnumerable SearchRepositoriesAsync(Uri query, [En private async Task GetSearchResultsAsync(Uri searchUri, CancellationToken cancellationToken) { - return await GetAsStringAsync(searchUri, cancellationToken) + return await _httpClientFactory.CreateGitHubClient().GetStringAsync(searchUri, cancellationToken) .ContinueWith(task => JsonSerializer.Deserialize(task.Result), cancellationToken); } + + private static Uri BuildUri(string path, Dictionary? queryString = null) + { + var uriBuilder = new UriBuilder(GitHubApiBaseUri) + { + Path = path, + Query = queryString == null ? null : string.Join("&", queryString.Select(kv => $"{kv.Key}={kv.Value}")) + }; + + return uriBuilder.Uri; + } } diff --git a/src/ScoopSearch.Indexer/GitHub/IGitHubClient.cs b/src/ScoopSearch.Indexer/GitHub/IGitHubClient.cs index f8803e9..ef72ef9 100644 --- a/src/ScoopSearch.Indexer/GitHub/IGitHubClient.cs +++ b/src/ScoopSearch.Indexer/GitHub/IGitHubClient.cs @@ -2,13 +2,7 @@ namespace ScoopSearch.Indexer.GitHub; public interface IGitHubClient { - Task GetAsStringAsync(Uri uri, CancellationToken cancellationToken); - - Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken); - Task GetRepositoryAsync(Uri uri, CancellationToken cancellationToken); - bool IsValidRepositoryDomain(Uri uri); - - IAsyncEnumerable SearchRepositoriesAsync(Uri query, CancellationToken cancellationToken); + IAsyncEnumerable SearchRepositoriesAsync(string[] query, CancellationToken cancellationToken); } diff --git a/src/ScoopSearch.Indexer/IScoopSearchIndexer.cs b/src/ScoopSearch.Indexer/IScoopSearchIndexer.cs index c80e408..b741861 100644 --- a/src/ScoopSearch.Indexer/IScoopSearchIndexer.cs +++ b/src/ScoopSearch.Indexer/IScoopSearchIndexer.cs @@ -2,5 +2,5 @@ namespace ScoopSearch.Indexer; public interface IScoopSearchIndexer { - Task ExecuteAsync(); + Task ExecuteAsync(CancellationToken cancellationToken); } diff --git a/src/ScoopSearch.Indexer/Indexer/AzureSearchClient.cs b/src/ScoopSearch.Indexer/Indexer/AzureSearchClient.cs index bffd771..68d7c91 100644 --- a/src/ScoopSearch.Indexer/Indexer/AzureSearchClient.cs +++ b/src/ScoopSearch.Indexer/Indexer/AzureSearchClient.cs @@ -3,7 +3,6 @@ using Azure.Search.Documents; using Azure.Search.Documents.Models; using Microsoft.Extensions.Options; -using MoreLinq; using ScoopSearch.Indexer.Configuration; using ScoopSearch.Indexer.Data; @@ -52,7 +51,7 @@ public async IAsyncEnumerable GetAllManifestsAsync([EnumeratorCanc options.Select.Add(ManifestMetadata.DuplicateOfField); options.OrderBy.Add(ManifestInfo.IdField); options.IncludeTotalCount = true; - + // Batch retrieve manifests to overcome limitation of 100_000 documents per search string? lastId = null; bool hasResults; @@ -89,11 +88,11 @@ public async Task> GetBucketsAsync(CancellationToken token) public async Task DeleteManifestsAsync(IEnumerable manifests, CancellationToken token) { - await Parallel.ForEachAsync(manifests.Batch(BatchSize), token, async (batch, _) => { await _client.DeleteDocumentsAsync(batch, null, _); }); + await Parallel.ForEachAsync(manifests.Chunk(BatchSize), token, async (batch, _) => { await _client.DeleteDocumentsAsync(batch, null, _); }); } public async Task UpsertManifestsAsync(IEnumerable manifests, CancellationToken token) { - await Parallel.ForEachAsync(manifests.Batch(BatchSize), token, async (batch, _) => { await _client.UploadDocumentsAsync(batch, null, _); }); + await Parallel.ForEachAsync(manifests.Chunk(BatchSize), token, async (batch, _) => { await _client.UploadDocumentsAsync(batch, null, _); }); } } diff --git a/src/ScoopSearch.Indexer/Processor/FetchBucketsProcessor.cs b/src/ScoopSearch.Indexer/Processor/FetchBucketsProcessor.cs deleted file mode 100644 index 18d0cdb..0000000 --- a/src/ScoopSearch.Indexer/Processor/FetchBucketsProcessor.cs +++ /dev/null @@ -1,154 +0,0 @@ -using System.Collections.Concurrent; -using System.Globalization; -using System.Runtime.CompilerServices; -using System.Text.Json; -using Microsoft.Extensions.Logging; -using Microsoft.Extensions.Options; -using ScoopSearch.Indexer.Configuration; -using ScoopSearch.Indexer.Data; -using ScoopSearch.Indexer.GitHub; - -namespace ScoopSearch.Indexer.Processor; - -internal class FetchBucketsProcessor : IFetchBucketsProcessor -{ - private readonly IGitHubClient _gitHubClient; - private readonly ILogger _logger; - private readonly BucketsOptions _bucketOptions; - - public FetchBucketsProcessor( - IGitHubClient gitHubClient, - IOptions bucketOptions, - ILogger logger) - { - _gitHubClient = gitHubClient; - _logger = logger; - _bucketOptions = bucketOptions.Value; - } - - public async Task FetchBucketsAsync(CancellationToken cancellationToken) - { - _logger.LogInformation("Retrieving buckets from sources"); - var officialBucketsTask = RetrieveOfficialBucketsAsync(cancellationToken); - var githubBucketsTask = SearchForBucketsOnGitHubAsync(cancellationToken); - var manualBucketsTask = RetrieveBucketsFromListAsync(_bucketOptions.ManualBucketsListUrl, cancellationToken); - - await Task.WhenAll(officialBucketsTask, githubBucketsTask, manualBucketsTask); - - _logger.LogInformation("Found {Count} official buckets ({Url})", officialBucketsTask.Result.Count(), _bucketOptions.OfficialBucketsListUrl); - _logger.LogInformation("Found {Count} buckets on GitHub", githubBucketsTask.Result.Count); - _logger.LogInformation("Found {Count} buckets to ignore (appsettings.json)", _bucketOptions.IgnoredBuckets.Count); - _logger.LogInformation("Found {Count} buckets to add (appsettings.json)", _bucketOptions.ManualBuckets.Count); - _logger.LogInformation("Found {Count} buckets to add from external list ({Url})", manualBucketsTask.Result.Count(), _bucketOptions.ManualBucketsListUrl); - - var allBuckets = githubBucketsTask.Result.Keys - .Concat(officialBucketsTask.Result) - .Concat(_bucketOptions.ManualBuckets) - .Concat(manualBucketsTask.Result) - .Except(_bucketOptions.IgnoredBuckets) - .Where(_gitHubClient.IsValidRepositoryDomain) - .DistinctBy(_ => _.AbsoluteUri.ToLowerInvariant()) - .ToHashSet(); - - _logger.LogInformation("{Count} buckets found for indexing", allBuckets.Count); - var bucketsToIndexTasks = allBuckets.Select(async _ => - { - var stars = githubBucketsTask.Result.TryGetValue(_, out var value) ? value : (await _gitHubClient.GetRepositoryAsync(_, cancellationToken))?.Stars ?? -1; - var official = officialBucketsTask.Result.Contains(_); - _logger.LogDebug("Adding bucket '{Url}' (stars: {Stars}, official: {Official})", _, stars, official); - - return new BucketInfo(_, stars, official); - }).ToArray(); - - return await Task.WhenAll(bucketsToIndexTasks); - } - - private async Task> RetrieveOfficialBucketsAsync(CancellationToken cancellationToken) - { - var contentJson = await _gitHubClient.GetAsStringAsync(_bucketOptions.OfficialBucketsListUrl, cancellationToken); - var officialBuckets = JsonSerializer.Deserialize>(contentJson)?.Values; - - return officialBuckets?.Select(_ => new Uri(_)).ToHashSet() ?? Enumerable.Empty(); - } - - private async Task> RetrieveBucketsFromListAsync(Uri bucketsList, CancellationToken cancellationToken) - { - ConcurrentBag buckets = new(); - - var tasks = new List(); - await foreach (var uri in GetBucketsFromList(bucketsList, cancellationToken)) - { - tasks.Add(GetTargetRepository(uri, cancellationToken) - .ContinueWith(t => { if (t.Result != null) { buckets.Add(t.Result); } }, cancellationToken)); - } - - await Task.WhenAll(tasks); - - return buckets; - } - - private async IAsyncEnumerable GetBucketsFromList(Uri bucketsList, [EnumeratorCancellation] CancellationToken cancellationToken) - { - var content = await _gitHubClient.GetAsStringAsync(bucketsList, cancellationToken); - using var csv = new CsvHelper.CsvReader(new StringReader(content), CultureInfo.InvariantCulture); - await csv.ReadAsync(); - csv.ReadHeader(); - - while (await csv.ReadAsync()) - { - var uri = csv.GetField("url"); - if (uri == null) - { - continue; - } - - if (uri.EndsWith(".git")) - { - uri = uri.Substring(0, uri.Length - 4); - } - - yield return new Uri(uri); - } - } - - private async Task GetTargetRepository(Uri uri, CancellationToken cancellationToken) - { - // Validate uri (existing repository, follow redirections...) - using var request = new HttpRequestMessage(HttpMethod.Head, uri); - using var response = await _gitHubClient.SendAsync(request, cancellationToken); - - if (request.RequestUri != null) - { - if (!response.IsSuccessStatusCode) - { - _logger.LogWarning("Skipping '{Uri}' because it returns '{Status}' status", uri, response.StatusCode); - return null; - } - - if (request.RequestUri != uri) - { - _logger.LogDebug("'{Uri}' redirects to '{RedirectUri}'", uri, request.RequestUri); - } - - return request.RequestUri; - } - - return null; - } - - private async Task> SearchForBucketsOnGitHubAsync(CancellationToken cancellationToken) - { - ConcurrentDictionary buckets = new ConcurrentDictionary(); - await Parallel.ForEachAsync(_bucketOptions.GithubBucketsSearchQueries, - new ParallelOptions() { CancellationToken = cancellationToken }, - async (gitHubSearchQuery, token) => - { - await foreach (var repository in _gitHubClient.SearchRepositoriesAsync(gitHubSearchQuery, token)) - { - buckets[repository.HtmlUri] = repository.Stars; - } - }); - - return buckets; - } -} diff --git a/src/ScoopSearch.Indexer/Processor/FetchManifestsProcessor.cs b/src/ScoopSearch.Indexer/Processor/FetchManifestsProcessor.cs index dddd0ed..71f9bca 100644 --- a/src/ScoopSearch.Indexer/Processor/FetchManifestsProcessor.cs +++ b/src/ScoopSearch.Indexer/Processor/FetchManifestsProcessor.cs @@ -1,4 +1,6 @@ +using System.Runtime.CompilerServices; using Microsoft.Extensions.Logging; +using ScoopSearch.Indexer.Buckets; using ScoopSearch.Indexer.Data; using ScoopSearch.Indexer.Git; using ScoopSearch.Indexer.Manifest; @@ -18,33 +20,20 @@ public FetchManifestsProcessor(IGitRepositoryProvider gitRepositoryProvider, IKe _logger = logger; } - public async Task FetchManifestsAsync(BucketInfo bucketInfo, CancellationToken cancellationToken) + public IAsyncEnumerable FetchManifestsAsync(Bucket bucket, CancellationToken cancellationToken) { // Clone/Update bucket repository and retrieve manifests - _logger.LogInformation("Generating manifests list for '{Bucket}'", bucketInfo.Uri); + _logger.LogDebug("Generating manifests list for '{Bucket}'", bucket.Uri); - var manifestsFromBucket = this - .GetManifestsFromRepository(bucketInfo.Uri, cancellationToken) - .ToArray(); - - _logger.LogInformation("Found {Count} manifests for {Bucket}", manifestsFromBucket.Length, bucketInfo.Uri); - - foreach (var manifestInfo in manifestsFromBucket) - { - manifestInfo.Metadata.SetRepositoryMetadata(bucketInfo.Official, bucketInfo.Stars); - } - - return await Task.FromResult(manifestsFromBucket); + return GetManifestsFromRepositoryAsync(bucket.Uri, cancellationToken); } - private IEnumerable GetManifestsFromRepository(Uri bucketUri, CancellationToken cancellationToken) + private async IAsyncEnumerable GetManifestsFromRepositoryAsync(Uri bucketUri, [EnumeratorCancellation] CancellationToken cancellationToken) { - var results = new List(); - var repository = _gitRepositoryProvider.Download(bucketUri, cancellationToken); if (repository == null) { - return results; + yield break; } _logger.LogDebug("Generating manifest infos from repository '{Repository}'", bucketUri); @@ -52,7 +41,7 @@ private IEnumerable GetManifestsFromRepository(Uri bucketUri, Canc var files = repository.GetFilesFromIndex().ToArray(); var manifestsSubPath = files.Any(_ => _.StartsWith("bucket/")) ? "bucket" : null; - var commitCache = repository.GetCommitsCache(_ => IsManifestPredicate(manifestsSubPath, _), cancellationToken); + var commitCache = await repository.GetCommitsCacheAsync(_ => IsManifestPredicate(manifestsSubPath, _), cancellationToken); foreach (var filePath in files .Where(_ => IsManifestPredicate(manifestsSubPath, _)) @@ -60,7 +49,7 @@ private IEnumerable GetManifestsFromRepository(Uri bucketUri, Canc { if (commitCache.TryGetValue(filePath, out var commits) && commits.FirstOrDefault() is { } commit) { - var manifestData = repository.ReadContent(filePath); + var manifestData = await repository.ReadContentAsync(filePath, cancellationToken); var manifestMetadata = new ManifestMetadata( bucketUri.AbsoluteUri, repository.GetBranchName(), @@ -71,7 +60,7 @@ private IEnumerable GetManifestsFromRepository(Uri bucketUri, Canc var manifest = CreateManifest(manifestData, manifestMetadata); if (manifest != null) { - results.Add(manifest); + yield return manifest; } } else @@ -81,8 +70,6 @@ private IEnumerable GetManifestsFromRepository(Uri bucketUri, Canc } repository.Delete(); - - return results; } bool IsManifestPredicate(string? manifestsSubPath, string filePath) diff --git a/src/ScoopSearch.Indexer/Processor/IFetchBucketsProcessor.cs b/src/ScoopSearch.Indexer/Processor/IFetchBucketsProcessor.cs deleted file mode 100644 index 5bb308b..0000000 --- a/src/ScoopSearch.Indexer/Processor/IFetchBucketsProcessor.cs +++ /dev/null @@ -1,8 +0,0 @@ -using ScoopSearch.Indexer.Data; - -namespace ScoopSearch.Indexer.Processor; - -public interface IFetchBucketsProcessor -{ - Task FetchBucketsAsync(CancellationToken cancellationToken); -} diff --git a/src/ScoopSearch.Indexer/Processor/IFetchManifestsProcessor.cs b/src/ScoopSearch.Indexer/Processor/IFetchManifestsProcessor.cs index 958bc12..092a7fd 100644 --- a/src/ScoopSearch.Indexer/Processor/IFetchManifestsProcessor.cs +++ b/src/ScoopSearch.Indexer/Processor/IFetchManifestsProcessor.cs @@ -1,8 +1,9 @@ +using ScoopSearch.Indexer.Buckets; using ScoopSearch.Indexer.Data; namespace ScoopSearch.Indexer.Processor; public interface IFetchManifestsProcessor { - Task FetchManifestsAsync(BucketInfo bucketInfo, CancellationToken cancellationToken); + IAsyncEnumerable FetchManifestsAsync(Bucket bucket, CancellationToken cancellationToken); } diff --git a/src/ScoopSearch.Indexer/ScoopSearch.Indexer.csproj b/src/ScoopSearch.Indexer/ScoopSearch.Indexer.csproj index a922bf2..100f33b 100644 --- a/src/ScoopSearch.Indexer/ScoopSearch.Indexer.csproj +++ b/src/ScoopSearch.Indexer/ScoopSearch.Indexer.csproj @@ -1,16 +1,13 @@ - - - - + diff --git a/src/ScoopSearch.Indexer/ScoopSearchIndexer.cs b/src/ScoopSearch.Indexer/ScoopSearchIndexer.cs index 27e753d..d5979bf 100644 --- a/src/ScoopSearch.Indexer/ScoopSearchIndexer.cs +++ b/src/ScoopSearch.Indexer/ScoopSearchIndexer.cs @@ -1,4 +1,10 @@ using System.Collections.Concurrent; +using System.Diagnostics; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using ScoopSearch.Indexer.Buckets; +using ScoopSearch.Indexer.Buckets.Sources; +using ScoopSearch.Indexer.Configuration; using ScoopSearch.Indexer.Data; using ScoopSearch.Indexer.Processor; @@ -6,34 +12,80 @@ namespace ScoopSearch.Indexer; internal class ScoopSearchIndexer : IScoopSearchIndexer { - private readonly IFetchBucketsProcessor _fetchBucketsProcessor; + private readonly IEnumerable _bucketsProviders; + private readonly IOfficialBucketsSource _officialBucketsSource; private readonly IFetchManifestsProcessor _fetchManifestsProcessor; private readonly IIndexingProcessor _indexingProcessor; + private readonly BucketsOptions _bucketsOptions; + private readonly ILogger _logger; - public ScoopSearchIndexer(IFetchBucketsProcessor fetchBucketsProcessor, IFetchManifestsProcessor fetchManifestsProcessor, IIndexingProcessor indexingProcessor) + public ScoopSearchIndexer( + IEnumerable bucketsProviders, + IOfficialBucketsSource officialBucketsSource, + IFetchManifestsProcessor fetchManifestsProcessor, + IIndexingProcessor indexingProcessor, + IOptions bucketsOptions, + ILogger logger) { - _fetchBucketsProcessor = fetchBucketsProcessor; + _bucketsProviders = bucketsProviders; + _officialBucketsSource = officialBucketsSource; _fetchManifestsProcessor = fetchManifestsProcessor; _indexingProcessor = indexingProcessor; + _bucketsOptions = bucketsOptions.Value; + _logger = logger; } - public async Task ExecuteAsync() + public async Task ExecuteAsync(CancellationToken cancellationToken) { - var cancellationToken = CancellationToken.None; + var (allBuckets, allManifests) = await ProcessBucketsAsync(cancellationToken); + _logger.LogInformation("Found {Buckets} buckets for a total of {Manifests} manifests.", allBuckets.Count, allManifests.Count); - var buckets = await _fetchBucketsProcessor.FetchBucketsAsync(cancellationToken); - var bucketsUrl = buckets.Select(_ => _.Uri).ToArray(); + await _indexingProcessor.CreateIndexIfRequiredAsync(cancellationToken); + await _indexingProcessor.CleanIndexFromNonExistentBucketsAsync(allBuckets.Select(x => x.Uri).ToArray(), cancellationToken); + await _indexingProcessor.UpdateIndexWithManifestsAsync(allManifests.ToArray(), cancellationToken); + } + + private async Task<(ConcurrentBag allBuckets, ConcurrentBag allManifests)> ProcessBucketsAsync(CancellationToken cancellationToken) + { + var officialBuckets = await _officialBucketsSource + .GetBucketsAsync(cancellationToken) + .ToArrayAsync(cancellationToken); - ConcurrentBag tasksResult = new(); - await Parallel.ForEachAsync(buckets, cancellationToken, async (bucket, _) => + var buckets = _bucketsProviders + .Where(bucketSource => bucketSource is not IOfficialBucketsSource) + .Select(provider => provider.GetBucketsAsync(cancellationToken)) + .Prepend(officialBuckets.ToAsyncEnumerable()) + .Merge() + .Distinct(bucket => bucket.Uri) + .Where(bucket => _bucketsOptions.IgnoredBuckets is null || !_bucketsOptions.IgnoredBuckets.Contains(bucket.Uri)); + + var officialBucketsHashSet = officialBuckets.Select(bucket => bucket.Uri).ToHashSet(); + var allManifests = new ConcurrentBag(); + var allBuckets = new ConcurrentBag(); + await Parallel.ForEachAsync(buckets, cancellationToken, async (bucket, token) => { - var result = await _fetchManifestsProcessor.FetchManifestsAsync(bucket, cancellationToken); - tasksResult.Add(result); + int manifestsCount = 0; + var stopWatch = Stopwatch.StartNew(); + var isOfficialBuckets = officialBucketsHashSet.Contains(bucket.Uri); + await foreach (var manifest in _fetchManifestsProcessor.FetchManifestsAsync(bucket, token)) + { + manifest.Metadata.SetRepositoryMetadata(isOfficialBuckets, bucket.Stars); + allManifests.Add(manifest); + manifestsCount++; + } + + allBuckets.Add(bucket); + stopWatch.Stop(); + if (manifestsCount == 0) + { + _logger.LogInformation("Processed bucket {Uri} (No manifest found, Duration: {Duration:g})", bucket.Uri, stopWatch.Elapsed); + } + else + { + _logger.LogInformation("Processed bucket {Uri} (Manifests: {Manifests}, Stars: {Stars}, Official: {Official}, Duration: {Duration:g})", bucket.Uri, manifestsCount, bucket.Stars, isOfficialBuckets, stopWatch.Elapsed); + } }); - await _indexingProcessor.CreateIndexIfRequiredAsync(cancellationToken); - await _indexingProcessor.CleanIndexFromNonExistentBucketsAsync(bucketsUrl, cancellationToken); - var manifests = tasksResult.SelectMany(_ => _).ToArray(); - await _indexingProcessor.UpdateIndexWithManifestsAsync(manifests, cancellationToken); + return (allBuckets, allManifests); } } diff --git a/src/ScoopSearch.Indexer/ServicesExtensions.cs b/src/ScoopSearch.Indexer/ServicesExtensions.cs index 614ed8b..b7fa9f6 100644 --- a/src/ScoopSearch.Indexer/ServicesExtensions.cs +++ b/src/ScoopSearch.Indexer/ServicesExtensions.cs @@ -1,5 +1,7 @@ using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; +using ScoopSearch.Indexer.Buckets.Providers; +using ScoopSearch.Indexer.Buckets.Sources; using ScoopSearch.Indexer.Configuration; using ScoopSearch.Indexer.Extensions; using ScoopSearch.Indexer.Git; @@ -26,14 +28,24 @@ public static void RegisterScoopSearchIndexer(this IServiceCollection @this) .Configure((options, configuration) => configuration.GetRequiredSection(GitHubOptions.Key).Bind(options)); // Services - @this.AddGitHubHttpClient(Constants.GitHubHttpClientName); + @this.AddHttpClients(); @this.AddSingleton(); @this.AddSingleton(); @this.AddSingleton(); @this.AddSingleton(); @this.AddSingleton(); @this.AddSingleton(); - @this.AddSingleton(); + + @this.AddSingleton(); + // TODO Add other providers (GitLab...) + + @this.AddSingleton(); + @this.AddSingleton(); + @this.AddSingleton(); + // TODO Add other sources (GitLab...) + @this.AddSingleton(); + @this.AddSingleton(); + @this.AddSingleton(); @this.AddSingleton(); } diff --git a/src/ScoopSearch.Indexer/appsettings.json b/src/ScoopSearch.Indexer/appsettings.json index 4562bcc..d517bc5 100644 --- a/src/ScoopSearch.Indexer/appsettings.json +++ b/src/ScoopSearch.Indexer/appsettings.json @@ -8,22 +8,24 @@ "GitHub": { // GitHub API token with public_repo scope - "Token": "" + "Token": "", + + "BucketsSearchQueries": [ + [ "topic:scoop-bucket" ], + // Split search queries as GitHub search API returns at most 1000 results + [ "scoop-bucket", "created:>2023-01-01" ], + [ "scoop+bucket", "created:>2023-01-01" ], + [ "scoop-bucket", "created:2020-01-01..2023-01-01" ], + [ "scoop+bucket", "created:2020-01-01..2023-01-01" ], + [ "scoop-bucket", "created:<2020-01-01" ], + [ "scoop+bucket", "created:<2020-01-01" ] + ] }, "Buckets": { "OfficialBucketsListUrl": "https://raw.githubusercontent.com/ScoopInstaller/Scoop/master/buckets.json", - "GithubBucketsSearchQueries": [ - "https://api.github.com/search/repositories?q=topic:scoop-bucket", - // Split search queries as GitHub search API returns at most 1000 results - "https://api.github.com/search/repositories?q=scoop-bucket+created:>2023-01-01", - "https://api.github.com/search/repositories?q=scoop+bucket+created:>2023-01-01", - "https://api.github.com/search/repositories?q=scoop-bucket+created:2020-01-01..2023-01-01", - "https://api.github.com/search/repositories?q=scoop+bucket+created:2020-01-01..2023-01-01", - "https://api.github.com/search/repositories?q=scoop-bucket+created:<2020-01-01", - "https://api.github.com/search/repositories?q=scoop+bucket+created:<2020-01-01" - ], + "ManualBucketsListUrl": "https://raw.githubusercontent.com/rasa/scoop-directory/master/include.txt", "IgnoredBuckets": [ // No manifests inside @@ -33,8 +35,6 @@ ], "ManualBuckets": [ - ], - - "ManualBucketsListUrl": "https://raw.githubusercontent.com/rasa/scoop-directory/master/include.txt" + ] } } From 704783ee3e4801b78828b09dc14afa28e2658b24 Mon Sep 17 00:00:00 2001 From: Gregoire Pailler Date: Wed, 24 May 2023 10:04:52 +0800 Subject: [PATCH 2/9] Fix a bunch of tests and ignore remaining files --- .../Git/GitRepositoryTests.cs | 32 ++--- .../GitHub/GitHubClientTests.cs | 120 ++++-------------- .../ScoopSearch.Indexer.Tests.csproj | 12 +- 3 files changed, 49 insertions(+), 115 deletions(-) diff --git a/src/ScoopSearch.Indexer.Tests/Git/GitRepositoryTests.cs b/src/ScoopSearch.Indexer.Tests/Git/GitRepositoryTests.cs index db87a46..eae5a28 100644 --- a/src/ScoopSearch.Indexer.Tests/Git/GitRepositoryTests.cs +++ b/src/ScoopSearch.Indexer.Tests/Git/GitRepositoryTests.cs @@ -89,45 +89,46 @@ public void GetItemsFromIndex_ReturnsEntries() } [Fact] - public void ReadContent_NonExistentEntry_Throws() + public async void ReadContentAsync_NonExistentEntry_Throws() { // Arrange var repository = _provider.Download(new Uri(Constants.TestRepositoryUri), CancellationToken.None)!; // Act - Action act = () => repository.ReadContent("foo"); + var result = async () => await repository.ReadContentAsync("foo", CancellationToken.None); // Assert - act.Should().Throw(); + await result.Should().ThrowAsync(); } [Fact] - public void ReadContent_ExistentEntry_ReturnsContent() + public async void ReadContentAsync_ExistentEntry_ReturnsContent() { // Arrange var repository = _provider.Download(new Uri(Constants.TestRepositoryUri), CancellationToken.None)!; // Act - var result = repository.ReadContent("kaxaml.json"); + var result = async () => await repository.ReadContentAsync("kaxaml.json", CancellationToken.None); // Assert - result.Should().NotBeNull(); - JsonSerializer.Deserialize(result).Should().NotBeNull(); + var taskResult = await result.Should().NotThrowAsync(); + JsonSerializer.Deserialize(taskResult.Subject).Should().NotBeNull(); } [Theory] [MemberData(nameof(GetCommitsCacheTestCases))] - public void GetCommitsCache_ReturnsExpectedFilesAndCommits(string repositoryUri, Predicate filter, int expectedFiles, int expectedCommits) + public async void GetCommitsCacheAsync_ReturnsExpectedFilesAndCommits(string repositoryUri, Predicate filter, int expectedFiles, int expectedCommits) { // Arrange var repository = _provider.Download(new Uri(repositoryUri), CancellationToken.None)!; // Act - var result = repository.GetCommitsCache(filter, CancellationToken.None); + var result = async () => await repository.GetCommitsCacheAsync(filter, CancellationToken.None); // Assert - result.Should().HaveCount(expectedFiles); - result.SelectMany(_ => _.Value).DistinctBy(_ => _.Sha).Should().HaveCount(expectedCommits); + var taskResult = await result.Should().NotThrowAsync(); + taskResult.Subject.Should().HaveCount(expectedFiles) + .And.Subject.SelectMany(_ => _.Value).DistinctBy(_ => _.Sha).Should().HaveCount(expectedCommits); } public static IEnumerable GetCommitsCacheTestCases() @@ -142,18 +143,17 @@ public static IEnumerable GetCommitsCacheTestCases() [InlineData(Constants.TestRepositoryUri, 1, 5)] [InlineData("https://github.com/niheaven/scoop-sysinternals", 1, 70)] [InlineData("https://github.com/ScoopInstaller/Extras", 10, 1_900)] - public void GetCommitsCache_BuildCache_Succeeds(string repositoryUri, double maxSeconds, int minimalManifestsCount) + public async void GetCommitsCacheAsync_BuildCache_Succeeds(string repositoryUri, double maxSeconds, int minimalManifestsCount) { // Arrange var repository = _provider.Download(new Uri(repositoryUri), CancellationToken.None)!; bool IsManifestFile(string filePath) => Path.GetExtension(filePath).Equals(".json", StringComparison.OrdinalIgnoreCase); // Act - IReadOnlyDictionary>? result = null; - Action act = () => result = repository.GetCommitsCache(IsManifestFile, CancellationToken.None); + var result = async () => await repository.GetCommitsCacheAsync(IsManifestFile, CancellationToken.None); // Assert - act.ExecutionTime().Should().BeLessThan(maxSeconds.Seconds()); - result.Should().HaveCountGreaterThan(minimalManifestsCount); + var taskResult = await result.Should().CompleteWithinAsync(maxSeconds.Seconds()); + taskResult.Subject.Should().HaveCountGreaterThan(minimalManifestsCount); } } diff --git a/src/ScoopSearch.Indexer.Tests/GitHub/GitHubClientTests.cs b/src/ScoopSearch.Indexer.Tests/GitHub/GitHubClientTests.cs index 2127a2d..83932a6 100644 --- a/src/ScoopSearch.Indexer.Tests/GitHub/GitHubClientTests.cs +++ b/src/ScoopSearch.Indexer.Tests/GitHub/GitHubClientTests.cs @@ -1,7 +1,7 @@ -using System.Text.Json; using FluentAssertions; using Microsoft.Extensions.DependencyInjection; using ScoopSearch.Indexer.GitHub; +using ScoopSearch.Indexer.Tests.Helpers; using Xunit.Abstractions; namespace ScoopSearch.Indexer.Tests.GitHub; @@ -13,75 +13,38 @@ public class GitHubClientTests : IClassFixture public GitHubClientTests(HostFixture hostFixture, ITestOutputHelper testOutputHelper) { hostFixture.Configure(testOutputHelper); + var logger = new XUnitLogger(testOutputHelper); - _sut = new GitHubClient(hostFixture.Instance.Services.GetRequiredService()); - } - - [Fact] - public async void GetAsStringAsync_NonExistentUrl_Throws() - { - // Arrange - var uri = new Uri("http://example.invalid/foo/bar"); - - // Act - Func act = () => _sut.GetAsStringAsync(uri, CancellationToken.None); - - // Assert - await act.Should().ThrowAsync(); - } - - [Fact] - public async void GetAsStringAsync_OfficialBuckets_ReturnsDictionaryOfBuckets() - { - // Arrange - var bucketsListUri = new Uri("https://raw.githubusercontent.com/ScoopInstaller/Scoop/master/buckets.json"); - - // Act - var result = await _sut.GetAsStringAsync(bucketsListUri, CancellationToken.None); - - // Assert - result.Should().NotBeNull(); - - // Act - var dictionary = JsonSerializer.Deserialize>(result); - - // Assert - dictionary.Should().NotBeNull() - .And.HaveCountGreaterOrEqualTo(10, "because it contains a bunch of Official buckets") - .And.ContainKey("main", "because it contains the Official main bucket") - .And.ContainKey("extras", "because it contains the Official extras bucket"); + _sut = new GitHubClient(hostFixture.Instance.Services.GetRequiredService(), logger); } [Theory] - [InlineData("https://raw.githubusercontent.com/rasa/scoop-directory/master/exclude.txt")] - [InlineData("https://raw.githubusercontent.com/rasa/scoop-directory/master/include.txt")] - public async void GetAsStringAsync_BucketsLists_ReturnsListOfBuckets(string input) + [InlineData("http://example.com/foo/bar")] + public async void GetRepositoryAsync_InvalidRepo_ReturnsNull(string input) { // Arrange var uri = new Uri(input); // Act - var result = await _sut.GetAsStringAsync(uri, CancellationToken.None); + var result = () => _sut.GetRepositoryAsync(uri, CancellationToken.None); // Assert - result.Should().StartWith("url"); - result.Split(Environment.NewLine).Should().HaveCountGreaterThan(10, "because it contains at least 10 buckets"); + var taskResult = await result.Should().NotThrowAsync(); + taskResult.Subject.Should().BeNull(); } [Theory] [InlineData("http://example.invalid/foo/bar")] - [InlineData("http://example.com/foo/bar")] - public async void GetRepositoryAsync_InvalidRepo_Throws(string input) + public async void GetRepositoryAsync_InvalidDomain_Throws(string input) { // Arrange var uri = new Uri(input); // Act - Func act = () => _sut.GetRepositoryAsync(uri, CancellationToken.None); + var result = () => _sut.GetRepositoryAsync(uri, CancellationToken.None); // Assert - (await act.Should().ThrowAsync()) - .And.Message.Should().Be("The URI must be a GitHub repo URI (Parameter 'uri')"); + await result.Should().ThrowAsync(); } [Fact] @@ -111,23 +74,19 @@ public async void GetRepositoryAsync_ValidRepo_ReturnsGitHubRepo(string input, i // Assert result.Should().NotBeNull(); result!.HtmlUri.Should().Be(uri); - result!.Stars.Should().BeGreaterThan(expectedMinimumStars, "because official repo should have a large amount of stars"); + result.Stars.Should().BeGreaterThan(expectedMinimumStars, "because official repo should have a large amount of stars"); } [Theory] - [InlineData("http://example.invalid/foo/bar")] - [InlineData("http://example.com/foo/bar")] - [InlineData("https://github.com/foo/bar")] - [InlineData("https://api.github.com/search/repositories?q")] - public async void SearchRepositoriesAsync_InvalidQueryUrl_Throws(string input) + [InlineData(new object[] { new string[0] })] + [InlineData(new object[] { new[] { "" } })] + [InlineData(new object[] { new[] { "&&==" } })] + public async void SearchRepositoriesAsync_InvalidQueryUrl_Throws(string[] input) { - // Arrange - var uri = new Uri(input); - - // Act + // Arrange + Act try { - await _sut.SearchRepositoriesAsync(uri, CancellationToken.None).ToArrayAsync(); + await _sut.SearchRepositoriesAsync(input, CancellationToken.None).ToArrayAsync(); Assert.Fail("Should have thrown"); } catch (AggregateException ex) @@ -141,15 +100,12 @@ public async void SearchRepositoriesAsync_InvalidQueryUrl_Throws(string input) } [Theory] - [InlineData("https://api.github.com/search/repositories?q=scoop-bucket+created:>2023-01-01")] - [InlineData("https://api.github.com/search/repositories?q=scoop+bucket+created:>2023-01-01")] - public async void SearchRepositoriesAsync_ValidQuery_ReturnsSearchResults(string input) + [InlineData(new object[] { new[] { "scoop-bucket", "created:>2023-01-01" } })] + [InlineData(new object[] { new[] { "scoop+bucket", "created:>2023-01-01" } })] + public async void SearchRepositoriesAsync_ValidQuery_ReturnsSearchResults(string[] input) { - // Arrange - var uri = new Uri(input); - - // Act - var result = await _sut.SearchRepositoriesAsync(uri, CancellationToken.None).ToArrayAsync(); + // Arrange + Act + var result = await _sut.SearchRepositoriesAsync(input, CancellationToken.None).ToArrayAsync(); // Assert result.Should().NotBeNull(); @@ -157,34 +113,4 @@ public async void SearchRepositoriesAsync_ValidQuery_ReturnsSearchResults(string .BeGreaterThan(0, "because there should be at least 1 result") .And.BeLessThan(900, "because there should be less than 900 results. If it returns more than 900, the date condition should be updated"); } - - [Fact] - public async void SendAsync_NonExistentUrl_Throws() - { - // Arrange - var uri = new Uri("http://example.invalid/foo/bar"); - var httpRequestMessage = new HttpRequestMessage(HttpMethod.Head, uri); - - // Act - Func act = () => _sut.SendAsync(httpRequestMessage, CancellationToken.None); - - // Assert - await act.Should().ThrowAsync(); - } - - [Theory] - [InlineData("https://github.com/okibcn/Bucket.git")] - [InlineData("https://github.com/01walid/it-scoop.git")] - public async void SendAsync_FollowRedirection_Succeeds(string input) - { - // Arrange - var uri = new Uri(input); - var httpRequestMessage = new HttpRequestMessage(HttpMethod.Head, uri); - - // Act - var result = await _sut.SendAsync(httpRequestMessage, CancellationToken.None); - - // Assert - result.Should().BeSuccessful(); - } } diff --git a/src/ScoopSearch.Indexer.Tests/ScoopSearch.Indexer.Tests.csproj b/src/ScoopSearch.Indexer.Tests/ScoopSearch.Indexer.Tests.csproj index 7294f01..14b951c 100644 --- a/src/ScoopSearch.Indexer.Tests/ScoopSearch.Indexer.Tests.csproj +++ b/src/ScoopSearch.Indexer.Tests/ScoopSearch.Indexer.Tests.csproj @@ -2,8 +2,8 @@ - - + + @@ -19,4 +19,12 @@ + + + + + + + + From b2b4f9f940e58617655bdcb48cfe8d1c4efd60ad Mon Sep 17 00:00:00 2001 From: Gregoire Pailler Date: Wed, 24 May 2023 10:44:54 +0800 Subject: [PATCH 3/9] Deduplicate buckets in ScoopSearchIndexer by comparing their Url and ignoring the casing --- src/ScoopSearch.Indexer/Buckets/Bucket.cs | 2 +- src/ScoopSearch.Indexer/ScoopSearchIndexer.cs | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/ScoopSearch.Indexer/Buckets/Bucket.cs b/src/ScoopSearch.Indexer/Buckets/Bucket.cs index cb27869..a6cf06e 100644 --- a/src/ScoopSearch.Indexer/Buckets/Bucket.cs +++ b/src/ScoopSearch.Indexer/Buckets/Bucket.cs @@ -4,7 +4,7 @@ public class Bucket { public Bucket(Uri uri, int stars) { - Uri = new Uri(uri.AbsoluteUri.ToLowerInvariant()); + Uri = uri; Stars = stars; } diff --git a/src/ScoopSearch.Indexer/ScoopSearchIndexer.cs b/src/ScoopSearch.Indexer/ScoopSearchIndexer.cs index d5979bf..e71e141 100644 --- a/src/ScoopSearch.Indexer/ScoopSearchIndexer.cs +++ b/src/ScoopSearch.Indexer/ScoopSearchIndexer.cs @@ -51,13 +51,14 @@ public async Task ExecuteAsync(CancellationToken cancellationToken) .GetBucketsAsync(cancellationToken) .ToArrayAsync(cancellationToken); + var ignoredBuckets = _bucketsOptions.IgnoredBuckets?.Select(uri => uri.AbsoluteUri.ToLowerInvariant()).ToHashSet() ?? new HashSet(); var buckets = _bucketsProviders .Where(bucketSource => bucketSource is not IOfficialBucketsSource) .Select(provider => provider.GetBucketsAsync(cancellationToken)) .Prepend(officialBuckets.ToAsyncEnumerable()) .Merge() - .Distinct(bucket => bucket.Uri) - .Where(bucket => _bucketsOptions.IgnoredBuckets is null || !_bucketsOptions.IgnoredBuckets.Contains(bucket.Uri)); + .Distinct(bucket => bucket.Uri.AbsoluteUri.ToLowerInvariant()) + .Where(bucket => ignoredBuckets.Contains(bucket.Uri.AbsoluteUri.ToLowerInvariant()) == false); var officialBucketsHashSet = officialBuckets.Select(bucket => bucket.Uri).ToHashSet(); var allManifests = new ConcurrentBag(); From 9edd943a613fb369aa722ca2aea7540c5e8ab484 Mon Sep 17 00:00:00 2001 From: Gregoire Pailler Date: Wed, 24 May 2023 11:10:30 +0800 Subject: [PATCH 4/9] Update unit tests + remove single quotes in logging --- .../Git/GitRepositoryFactoryTests.cs | 16 +-- .../Git/GitRepositoryTests.cs | 4 +- .../Processor/FetchManifestsProcessorTests.cs | 107 ++++++++++-------- .../Buckets/Sources/OfficialBucketsSource.cs | 4 +- src/ScoopSearch.Indexer/Git/GitRepository.cs | 6 +- .../Git/GitRepositoryProvider.cs | 10 +- .../GitHub/GitHubClient.cs | 2 +- .../Processor/FetchManifestsProcessor.cs | 8 +- .../Processor/IndexingProcessor.cs | 2 +- 9 files changed, 84 insertions(+), 75 deletions(-) diff --git a/src/ScoopSearch.Indexer.Tests/Git/GitRepositoryFactoryTests.cs b/src/ScoopSearch.Indexer.Tests/Git/GitRepositoryFactoryTests.cs index d254d02..991bb21 100644 --- a/src/ScoopSearch.Indexer.Tests/Git/GitRepositoryFactoryTests.cs +++ b/src/ScoopSearch.Indexer.Tests/Git/GitRepositoryFactoryTests.cs @@ -44,7 +44,7 @@ public void Download_NonExistentRepository_ReturnsNull() // Assert result.Should().BeNull(); _logger.Should() - .Log(LogLevel.Error, _ => _.StartsWith($"Unable to clone repository '{repositoryUri}' to")); + .Log(LogLevel.Error, message => message.StartsWith($"Unable to clone repository {repositoryUri} to")); } [Fact] @@ -61,7 +61,7 @@ public void Download_ValidRepository_ReturnsRepositoryDirectory() result.Should().NotBeNull(); Directory.Exists(expectedRepositoryDirectory).Should().BeTrue(); _logger.Should() - .Log(LogLevel.Debug, $"Cloning repository '{repositoryUri}' in '{expectedRepositoryDirectory}'") + .Log(LogLevel.Debug, $"Cloning repository {repositoryUri} in {expectedRepositoryDirectory}") .And.NoLog(LogLevel.Warning); } @@ -79,8 +79,8 @@ public void Download_ValidExistingDirectoryRepository_ReturnsRepositoryDirectory // Assert result.Should().NotBeNull(); _logger.Should() - .Log(LogLevel.Debug, $"Pulling repository '{expectedRepositoryDirectory}'") - .And.NoLog(LogLevel.Warning);; + .Log(LogLevel.Debug, $"Pulling repository {expectedRepositoryDirectory}") + .And.NoLog(LogLevel.Warning); } [Fact] @@ -98,7 +98,7 @@ public void Download_Cancellation_ReturnsNull() result.Should().BeNull(); _logger.Should().Log( LogLevel.Error, - $"Unable to clone repository '{repositoryUri}' to '{expectedRepositoryDirectory}'"); + $"Unable to clone repository {repositoryUri} to {expectedRepositoryDirectory}"); } [Fact] @@ -117,8 +117,8 @@ public void Download_CorruptedExistingDirectoryRepository_ReturnsDirectoryReposi _logger.Should() .Log( LogLevel.Warning, - $"Unable to pull repository '{Constants.TestRepositoryUri}' to '{expectedRepositoryDirectory}'") - .And.Log(LogLevel.Debug, $"Cloning repository '{Constants.TestRepositoryUri}' in '{expectedRepositoryDirectory}'"); + $"Unable to pull repository {Constants.TestRepositoryUri} to {expectedRepositoryDirectory}") + .And.Log(LogLevel.Debug, $"Cloning repository {Constants.TestRepositoryUri} in {expectedRepositoryDirectory}"); } [Fact] @@ -134,6 +134,6 @@ public void Download_EmptyRepository_ReturnsNull() // Assert result.Should().BeNull(); _logger.Should() - .Log(LogLevel.Error, $"No valid branch found in '{expectedRepositoryDirectory}'"); + .Log(LogLevel.Error, $"No valid branch found in {expectedRepositoryDirectory}"); } } diff --git a/src/ScoopSearch.Indexer.Tests/Git/GitRepositoryTests.cs b/src/ScoopSearch.Indexer.Tests/Git/GitRepositoryTests.cs index eae5a28..2f3cc08 100644 --- a/src/ScoopSearch.Indexer.Tests/Git/GitRepositoryTests.cs +++ b/src/ScoopSearch.Indexer.Tests/Git/GitRepositoryTests.cs @@ -128,7 +128,7 @@ public async void GetCommitsCacheAsync_ReturnsExpectedFilesAndCommits(string rep // Assert var taskResult = await result.Should().NotThrowAsync(); taskResult.Subject.Should().HaveCount(expectedFiles) - .And.Subject.SelectMany(_ => _.Value).DistinctBy(_ => _.Sha).Should().HaveCount(expectedCommits); + .And.Subject.SelectMany(kv => kv.Value).DistinctBy(commitInfo => commitInfo.Sha).Should().HaveCount(expectedCommits); } public static IEnumerable GetCommitsCacheTestCases() @@ -136,7 +136,7 @@ public static IEnumerable GetCommitsCacheTestCases() // repository, filter, expected files, expected commits yield return new object[] { Constants.TestRepositoryUri, new Predicate(_ => true), 14, 39 }; yield return new object[] { Constants.TestRepositoryUri, new Predicate(_ => false), 0, 0 }; - yield return new object[] { Constants.TestRepositoryUri, new Predicate(_ => _.EndsWith(".json")), 11, 30 }; + yield return new object[] { Constants.TestRepositoryUri, new Predicate(filePath => filePath.EndsWith(".json")), 11, 30 }; } [Theory] diff --git a/src/ScoopSearch.Indexer.Tests/Processor/FetchManifestsProcessorTests.cs b/src/ScoopSearch.Indexer.Tests/Processor/FetchManifestsProcessorTests.cs index af5bda6..34483ff 100644 --- a/src/ScoopSearch.Indexer.Tests/Processor/FetchManifestsProcessorTests.cs +++ b/src/ScoopSearch.Indexer.Tests/Processor/FetchManifestsProcessorTests.cs @@ -2,15 +2,17 @@ using FluentAssertions; using FluentAssertions.Execution; using FluentAssertions.Extensions; +using LibGit2Sharp; using Microsoft.Extensions.DependencyInjection; -using Microsoft.Extensions.Logging; using Moq; +using ScoopSearch.Indexer.Buckets; using ScoopSearch.Indexer.Data; using ScoopSearch.Indexer.Git; using ScoopSearch.Indexer.Manifest; using ScoopSearch.Indexer.Processor; using ScoopSearch.Indexer.Tests.Helpers; using Xunit.Abstractions; +using LogLevel = Microsoft.Extensions.Logging.LogLevel; namespace ScoopSearch.Indexer.Tests.Processor; @@ -18,6 +20,7 @@ public class FetchManifestsProcessorTests : IClassFixture { private readonly HostFixture _hostFixture; private readonly XUnitLogger _logger; + private readonly XUnitLogger _gitRepositoryLogger; public FetchManifestsProcessorTests(HostFixture hostFixture, ITestOutputHelper testOutputHelper) { @@ -25,48 +28,57 @@ public FetchManifestsProcessorTests(HostFixture hostFixture, ITestOutputHelper t _hostFixture.Configure(testOutputHelper); _logger = new XUnitLogger(testOutputHelper); + _gitRepositoryLogger = new XUnitLogger(testOutputHelper); } - [Theory] - [CombinatorialData] - public async void FetchManifestsAsync_ValidRepository_ReturnsManifestsWithStarsAndKind([CombinatorialValues(123, 321)] int stars, bool officialRepository) + [Fact] + public async void FetchManifestsAsync_ValidRepository_ReturnsManifests() { // Arrange var uri = new Uri(Constants.TestRepositoryUri); - var bucketInfo = new BucketInfo(uri, stars, officialRepository); + var bucket = new Bucket(uri, 0); + var cancellationToken = CancellationToken.None; + var sut = CreateSut(); + + // Act + var result = await sut.FetchManifestsAsync(bucket, cancellationToken).ToArrayAsync(cancellationToken); + + // Assert + _logger.Should().Log(LogLevel.Debug, $"Generating manifests list for {uri}"); + result.Should().HaveCount(5); + } + + [Fact] + public async void FetchManifestsAsync_EmptyRepository_ReturnsEmptyResults() + { + // Arrange + var uri = new Uri(Constants.EmptyTestRepositoryUri); + var bucket = new Bucket(uri, 0); var cancellationToken = CancellationToken.None; var sut = CreateSut(); // Act - var result = await sut.FetchManifestsAsync(bucketInfo, cancellationToken); + var result = await sut.FetchManifestsAsync(bucket, cancellationToken).ToArrayAsync(cancellationToken); // Assert - _logger.Should().Log(LogLevel.Information, $"Found 5 manifests for {uri}"); - result - .Should().HaveCount(5) - .And.AllSatisfy(_ => - { - _.Metadata.RepositoryStars.Should().Be(stars); - _.Metadata.OfficialRepository.Should().Be(officialRepository); - }); + _gitRepositoryLogger.Should().Log(LogLevel.Error, message => message.StartsWith("No valid branch found in")); + result.Should().BeEmpty(); } - [Theory] - [InlineData(Constants.NonExistentTestRepositoryUri)] - [InlineData(Constants.EmptyTestRepositoryUri)] - public async void FetchManifestsAsync_InvalidRepository_ReturnsEmptyResults(string repository) + [Fact] + public async void FetchManifestsAsync_NonExistentRepository_ReturnsEmptyResults() { // Arrange - var uri = new Uri(repository); - var bucketInfo = new BucketInfo(uri, 0, false); + var uri = new Uri(Constants.NonExistentTestRepositoryUri); + var bucket = new Bucket(uri, 0); var cancellationToken = CancellationToken.None; var sut = CreateSut(); // Act - var result = await sut.FetchManifestsAsync(bucketInfo, cancellationToken); + var result = await sut.FetchManifestsAsync(bucket, cancellationToken).ToArrayAsync(cancellationToken); // Assert - _logger.Should().Log(LogLevel.Information, $"Found 0 manifests for {uri}"); + _gitRepositoryLogger.Should().Log(LogLevel.Error, message => message.StartsWith("Unable to clone repository")); result.Should().BeEmpty(); } @@ -75,14 +87,14 @@ public async void FetchManifestsAsync_NullRepository_ReturnsEmptyResults() { // Arrange var uri = new Uri(Constants.TestRepositoryUri); - var bucketInfo = new BucketInfo(uri, 0, false); + var bucket = new Bucket(uri, 0); var cancellationToken = new CancellationToken(); - var sut = CreateSut(_ => _ + var sut = CreateSut(mockConfig => mockConfig .Setup(_ => _.Download(uri, cancellationToken)) .Returns((IGitRepository?)null)); // Act - var result = await sut.FetchManifestsAsync(bucketInfo, cancellationToken); + var result = await sut.FetchManifestsAsync(bucket, cancellationToken).ToArrayAsync(cancellationToken); // Assert result.Should().BeEmpty(); @@ -93,7 +105,7 @@ public async void FetchManifestsAsync_ManifestNotInCommitsCache_ManifestSkipped( { // Arrange var uri = new Uri(Constants.TestRepositoryUri); - var bucketInfo = new BucketInfo(uri, 0, false); + var bucket = new Bucket(uri, 0); var cancellationToken = new CancellationToken(); var gitRepositoryMock = CreateGitRepositoryMock(new[] { @@ -101,12 +113,12 @@ public async void FetchManifestsAsync_ManifestNotInCommitsCache_ManifestSkipped( new GitRepositoryMockEntry("manifest2.json", "{}"), }, cancellationToken); - var sut = CreateSut(_ => _ + var sut = CreateSut(mockConfig => mockConfig .Setup(_ => _.Download(uri, cancellationToken)) .Returns(gitRepositoryMock.Object)); // Act - var result = await sut.FetchManifestsAsync(bucketInfo, cancellationToken); + var result = await sut.FetchManifestsAsync(bucket, cancellationToken).ToArrayAsync(cancellationToken); // Assert result.Should().HaveCount(1); @@ -117,7 +129,7 @@ public async void FetchManifestsAsync_ManifestNotInCommitsCache_ManifestSkipped( manifestInfo.Metadata.Sha.Should().Be("sha_manifest2.json"); manifestInfo.Metadata.Committed.Should().BeCloseTo(DateTimeOffset.Now, 1.Seconds()); } - _logger.Should().Log(LogLevel.Warning, $"Unable to find a commit for manifest 'manifest1.json' from '{Constants.TestRepositoryUri}'"); + _logger.Should().Log(LogLevel.Warning, $"Unable to find a commit for manifest manifest1.json from {Constants.TestRepositoryUri}"); } [Fact] @@ -125,7 +137,7 @@ public async void FetchManifestsAsync_InvalidManifest_ManifestSkipped() { // Arrange var uri = new Uri(Constants.TestRepositoryUri); - var bucketInfo = new BucketInfo(uri, 0, false); + var bucket = new Bucket(uri, 0); var cancellationToken = new CancellationToken(); var gitRepositoryMock = CreateGitRepositoryMock(new[] { @@ -133,12 +145,12 @@ public async void FetchManifestsAsync_InvalidManifest_ManifestSkipped() new GitRepositoryMockEntry("manifest2.json", "{}"), }, cancellationToken); - var sut = CreateSut(_ => _ + var sut = CreateSut(mockConfig => mockConfig .Setup(_ => _.Download(uri, cancellationToken)) .Returns(gitRepositoryMock.Object)); // Act - var result = await sut.FetchManifestsAsync(bucketInfo, cancellationToken); + var result = await sut.FetchManifestsAsync(bucket, cancellationToken).ToArrayAsync(cancellationToken); // Assert result.Should().HaveCount(1); @@ -149,7 +161,7 @@ public async void FetchManifestsAsync_InvalidManifest_ManifestSkipped() manifestInfo.Metadata.Sha.Should().Be("sha_manifest2.json"); manifestInfo.Metadata.Committed.Should().BeCloseTo(DateTimeOffset.Now, 1.Seconds()); } - _logger.Should().Log(LogLevel.Error, $"Unable to parse manifest 'manifest1.json' from '{Constants.TestRepositoryUri}'"); + _logger.Should().Log(LogLevel.Error, $"Unable to parse manifest manifest1.json from {Constants.TestRepositoryUri}"); } [Fact] @@ -157,19 +169,19 @@ public async void FetchManifestsAsync_SelectsBucketSubDirectoryIfExists_ReturnsM { // Arrange var uri = new Uri(Constants.TestRepositoryUri); - var bucketInfo = new BucketInfo(uri, 0, false); + var bucket = new Bucket(uri, 0); var cancellationToken = new CancellationToken(); var gitRepositoryMock = CreateGitRepositoryMock(new[] { new GitRepositoryMockEntry("bucket/manifest1.json", "{}"), new GitRepositoryMockEntry("manifest2.json", "{}"), }, cancellationToken); - var sut = CreateSut(_ => _ + var sut = CreateSut(mockConfig => mockConfig .Setup(_ => _.Download(uri, cancellationToken)) .Returns(gitRepositoryMock.Object)); // Act - var result = await sut.FetchManifestsAsync(bucketInfo, cancellationToken); + var result = await sut.FetchManifestsAsync(bucket, cancellationToken).ToArrayAsync(cancellationToken); // Assert result.Should().HaveCount(1); @@ -189,20 +201,17 @@ private Mock CreateGitRepositoryMock(GitRepositoryMockEntry[] en var gitRepositoryMock = new Mock(); gitRepositoryMock .Setup(_ => _.GetFilesFromIndex()) - .Returns(entries.Select(_ => _.Path)); + .Returns(entries.Select(entry => entry.Path)); gitRepositoryMock - .Setup(_ => _.GetCommitsCache(It.IsAny>(), cancellationToken)) - .Returns(entries.Where(_ => _.Content != null).ToDictionary( - k => k.Path, - v => (IReadOnlyCollection)new[] - { - new CommitInfo(DateTimeOffset.Now, $"sha_{v.Path}") - })); - foreach (var entry in entries.Where(_ => _.Content != null)) + .Setup(_ => _.GetCommitsCacheAsync(It.IsAny>(), cancellationToken)) + .ReturnsAsync(entries.Where(entry => entry.Content != null).ToDictionary( + kv => kv.Path, + kv => (IReadOnlyCollection)new[] { new CommitInfo(DateTimeOffset.Now, $"sha_{kv.Path}") })); + foreach (var entry in entries.Where(entry => entry.Content != null)) { gitRepositoryMock - .Setup(_ => _.ReadContent(It.Is(_ => _ == entry.Path))) - .Returns(entry.Content!); + .Setup(_ => _.ReadContentAsync(It.Is(filePath => filePath == entry.Path), cancellationToken)) + .ReturnsAsync(entry.Content!); } return gitRepositoryMock; @@ -211,7 +220,7 @@ private Mock CreateGitRepositoryMock(GitRepositoryMockEntry[] en private FetchManifestsProcessor CreateSut() { return new FetchManifestsProcessor( - _hostFixture.Instance.Services.GetRequiredService(), + new GitRepositoryProvider(_gitRepositoryLogger), _hostFixture.Instance.Services.GetRequiredService(), _logger); } @@ -224,7 +233,7 @@ private FetchManifestsProcessor CreateSut(Action> c var keyGeneratorMock = new Mock(); keyGeneratorMock .Setup(_ => _.Generate(It.IsAny())) - .Returns(_ => $"KEY_{_.FilePath}"); + .Returns(manifestMetadata => $"KEY_{manifestMetadata.FilePath}"); return new FetchManifestsProcessor( gitRepositoryProviderMock.Object, diff --git a/src/ScoopSearch.Indexer/Buckets/Sources/OfficialBucketsSource.cs b/src/ScoopSearch.Indexer/Buckets/Sources/OfficialBucketsSource.cs index b5b7f68..fb70ef7 100644 --- a/src/ScoopSearch.Indexer/Buckets/Sources/OfficialBucketsSource.cs +++ b/src/ScoopSearch.Indexer/Buckets/Sources/OfficialBucketsSource.cs @@ -35,7 +35,7 @@ public async IAsyncEnumerable GetBucketsAsync([EnumeratorCancellation] C yield break; } - _logger.LogInformation("Retrieving official buckets from '{Uri}'", _bucketOptions.OfficialBucketsListUrl); + _logger.LogInformation("Retrieving official buckets from {Uri}", _bucketOptions.OfficialBucketsListUrl); await foreach (var uri in GetBucketsFromJsonAsync(_bucketOptions.OfficialBucketsListUrl, cancellationToken)) { @@ -53,7 +53,7 @@ private async IAsyncEnumerable GetBucketsFromJsonAsync(Uri uri, [Enumerator var officialBuckets = await JsonSerializer.DeserializeAsync>(contentJson, cancellationToken: cancellationToken); if (officialBuckets is null) { - _logger.LogWarning("Unable to parse buckets list from '{Uri}'", uri); + _logger.LogWarning("Unable to parse buckets list from {Uri}", uri); yield break; } diff --git a/src/ScoopSearch.Indexer/Git/GitRepository.cs b/src/ScoopSearch.Indexer/Git/GitRepository.cs index 144dedd..79630a9 100644 --- a/src/ScoopSearch.Indexer/Git/GitRepository.cs +++ b/src/ScoopSearch.Indexer/Git/GitRepository.cs @@ -22,7 +22,7 @@ public GitRepository(string repositoryDirectory, string gitExecutable, ILogger l public void Delete() { - _logger.LogDebug("Deleting repository '{WorkingDirectory}'", _repository.Info.WorkingDirectory); + _logger.LogDebug("Deleting repository {WorkingDirectory}", _repository.Info.WorkingDirectory); var workingDirectory = _repository.Info.WorkingDirectory; _repository.Dispose(); @@ -43,7 +43,7 @@ public void Delete() public async Task>> GetCommitsCacheAsync(Predicate filter, CancellationToken cancellationToken) { - _logger.LogDebug("Computing commits cache for repository '{WorkingDirectory}'", _repository.Info.WorkingDirectory); + _logger.LogDebug("Computing commits cache for repository {WorkingDirectory}", _repository.Info.WorkingDirectory); var commitsCache = new Dictionary>(); @@ -112,7 +112,7 @@ void AddFilesToCache() throw new InvalidOperationException($"git returned non-zero exit code ({process.ExitCode})"); } - _logger.LogDebug("Cache computed for repository '{WorkingDirectory}': {Count} files", _repository.Info.WorkingDirectory, commitsCache.Count); + _logger.LogDebug("Cache computed for repository {WorkingDirectory}: {Count} files", _repository.Info.WorkingDirectory, commitsCache.Count); return new ReadOnlyDictionary>(commitsCache.ToDictionary(_ => _.Key, _ => (IReadOnlyCollection)_.Value)); } diff --git a/src/ScoopSearch.Indexer/Git/GitRepositoryProvider.cs b/src/ScoopSearch.Indexer/Git/GitRepositoryProvider.cs index e706ab8..f4e9d67 100644 --- a/src/ScoopSearch.Indexer/Git/GitRepositoryProvider.cs +++ b/src/ScoopSearch.Indexer/Git/GitRepositoryProvider.cs @@ -37,7 +37,7 @@ internal GitRepositoryProvider(ILogger logger, string repositorie } catch (Exception ex) { - _logger.LogWarning(ex, "Unable to pull repository '{Uri}' to '{RepositoryDirectory}'", uri, repositoryDirectory); + _logger.LogWarning(ex, "Unable to pull repository {Uri} to {RepositoryDirectory}", uri, repositoryDirectory); DeleteRepository(repositoryDirectory); CloneRepository(uri, repositoryDirectory, cancellationToken); } @@ -51,7 +51,7 @@ internal GitRepositoryProvider(ILogger logger, string repositorie { if (repository.Head.Tip == null) { - _logger.LogError("No valid branch found in '{RepositoryDirectory}'", repositoryDirectory); + _logger.LogError("No valid branch found in {RepositoryDirectory}", repositoryDirectory); return null; } } @@ -60,7 +60,7 @@ internal GitRepositoryProvider(ILogger logger, string repositorie } catch (Exception ex) { - _logger.LogError(ex, "Unable to clone repository '{Uri}' to '{RepositoryDirectory}'", uri, repositoryDirectory); + _logger.LogError(ex, "Unable to clone repository {Uri} to {RepositoryDirectory}", uri, repositoryDirectory); DeleteRepository(repositoryDirectory); return null; } @@ -77,7 +77,7 @@ private void DeleteRepository(string repositoryDirectory) private void PullRepository(string repositoryDirectory, CancellationToken cancellationToken) { - _logger.LogDebug("Pulling repository '{RepositoryDirectory}'", repositoryDirectory); + _logger.LogDebug("Pulling repository {RepositoryDirectory}", repositoryDirectory); using var repository = new Repository(repositoryDirectory); @@ -102,7 +102,7 @@ private void PullRepository(string repositoryDirectory, CancellationToken cancel private void CloneRepository(Uri uri, string repositoryDirectory, CancellationToken cancellationToken) { - _logger.LogDebug("Cloning repository '{Uri}' in '{RepositoryDirectory}'", uri, repositoryDirectory); + _logger.LogDebug("Cloning repository {Uri} in {RepositoryDirectory}", uri, repositoryDirectory); var cloneOptions = CreateOptions(cancellationToken); cloneOptions.RecurseSubmodules = false; diff --git a/src/ScoopSearch.Indexer/GitHub/GitHubClient.cs b/src/ScoopSearch.Indexer/GitHub/GitHubClient.cs index 0e9d7d3..0c44632 100644 --- a/src/ScoopSearch.Indexer/GitHub/GitHubClient.cs +++ b/src/ScoopSearch.Indexer/GitHub/GitHubClient.cs @@ -87,7 +87,7 @@ public async IAsyncEnumerable SearchRepositoriesAsync(string[] query break; } - _logger.LogDebug("Found {Count} repositories for query '{Query}'", results.Items.Length, searchReposUri); + _logger.LogDebug("Found {Count} repositories for query {Query}", results.Items.Length, searchReposUri); foreach (var gitHubRepo in results.Items) { yield return gitHubRepo; diff --git a/src/ScoopSearch.Indexer/Processor/FetchManifestsProcessor.cs b/src/ScoopSearch.Indexer/Processor/FetchManifestsProcessor.cs index 71f9bca..f4bb5cc 100644 --- a/src/ScoopSearch.Indexer/Processor/FetchManifestsProcessor.cs +++ b/src/ScoopSearch.Indexer/Processor/FetchManifestsProcessor.cs @@ -23,7 +23,7 @@ public FetchManifestsProcessor(IGitRepositoryProvider gitRepositoryProvider, IKe public IAsyncEnumerable FetchManifestsAsync(Bucket bucket, CancellationToken cancellationToken) { // Clone/Update bucket repository and retrieve manifests - _logger.LogDebug("Generating manifests list for '{Bucket}'", bucket.Uri); + _logger.LogDebug("Generating manifests list for {Bucket}", bucket.Uri); return GetManifestsFromRepositoryAsync(bucket.Uri, cancellationToken); } @@ -36,7 +36,7 @@ private async IAsyncEnumerable GetManifestsFromRepositoryAsync(Uri yield break; } - _logger.LogDebug("Generating manifest infos from repository '{Repository}'", bucketUri); + _logger.LogDebug("Generating manifest infos from repository {Repository}", bucketUri); var files = repository.GetFilesFromIndex().ToArray(); var manifestsSubPath = files.Any(_ => _.StartsWith("bucket/")) ? "bucket" : null; @@ -65,7 +65,7 @@ private async IAsyncEnumerable GetManifestsFromRepositoryAsync(Uri } else { - _logger.LogWarning("Unable to find a commit for manifest '{Manifest}' from '{Repository}'", filePath, bucketUri); + _logger.LogWarning("Unable to find a commit for manifest {Manifest} from {Repository}", filePath, bucketUri); } } @@ -89,7 +89,7 @@ bool IsManifestPredicate(string? manifestsSubPath, string filePath) } catch (Exception ex) { - _logger.LogError(ex, "Unable to parse manifest '{Manifest}' from '{Repository}'", metadata.FilePath, metadata.Repository); + _logger.LogError(ex, "Unable to parse manifest {Manifest} from {Repository}", metadata.FilePath, metadata.Repository); } return null; diff --git a/src/ScoopSearch.Indexer/Processor/IndexingProcessor.cs b/src/ScoopSearch.Indexer/Processor/IndexingProcessor.cs index 1e770a8..d4a49c5 100644 --- a/src/ScoopSearch.Indexer/Processor/IndexingProcessor.cs +++ b/src/ScoopSearch.Indexer/Processor/IndexingProcessor.cs @@ -85,7 +85,7 @@ private void UpdateManifestsMetadataWithDuplicateInfo(ref ManifestInfo[] manifes .ToArray(); var originalManifest = prioritizedManifests.First(); - _logger.LogDebug("Duplicated manifests with hash '{Hash}' found in {Manifests}. Choosing {Manifest} as the original one", + _logger.LogDebug("Duplicated manifests with hash {Hash} found in {Manifests}. Choosing {Manifest} as the original one", duplicatedManifestsGroup.Key, string.Join(", ", duplicatedManifestsGroup.Select(_ => _.manifest.Metadata.Repository + "/" + _.manifest.Metadata.FilePath)), originalManifest.Metadata.Repository + "/" + originalManifest.Metadata.FilePath); From 4f5ef7bf3d4e29e5cf029fc905878df811ef6daa Mon Sep 17 00:00:00 2001 From: Gregoire Pailler Date: Sat, 27 May 2023 21:07:38 +0800 Subject: [PATCH 5/9] Replace CancellationToken.None with new instances --- .../Git/GitRepositoryFactoryTests.cs | 15 ++++++--- .../Git/GitRepositoryTests.cs | 32 ++++++++++++------- .../GitHub/GitHubClientTests.cs | 18 +++++++---- .../Processor/FetchBucketsProcessorTests.cs | 2 +- .../Processor/FetchManifestsProcessorTests.cs | 6 ++-- .../Processor/IndexingProcessorTests.cs | 2 +- 6 files changed, 47 insertions(+), 28 deletions(-) diff --git a/src/ScoopSearch.Indexer.Tests/Git/GitRepositoryFactoryTests.cs b/src/ScoopSearch.Indexer.Tests/Git/GitRepositoryFactoryTests.cs index 991bb21..dd77cff 100644 --- a/src/ScoopSearch.Indexer.Tests/Git/GitRepositoryFactoryTests.cs +++ b/src/ScoopSearch.Indexer.Tests/Git/GitRepositoryFactoryTests.cs @@ -37,9 +37,10 @@ public void Download_NonExistentRepository_ReturnsNull() { // Arrange var repositoryUri = new Uri(Constants.NonExistentTestRepositoryUri); + var cancellationToken = new CancellationToken(); // Act - var result = _sut.Download(repositoryUri, CancellationToken.None); + var result = _sut.Download(repositoryUri, cancellationToken); // Assert result.Should().BeNull(); @@ -53,9 +54,10 @@ public void Download_ValidRepository_ReturnsRepositoryDirectory() // Arrange var repositoryUri = new Uri(Constants.TestRepositoryUri); var expectedRepositoryDirectory = Path.Combine(_repositoriesDirectory, repositoryUri.AbsolutePath[1..]); + var cancellationToken = new CancellationToken(); // Act - var result = _sut.Download(repositoryUri, CancellationToken.None); + var result = _sut.Download(repositoryUri, cancellationToken); // Assert result.Should().NotBeNull(); @@ -71,10 +73,11 @@ public void Download_ValidExistingDirectoryRepository_ReturnsRepositoryDirectory // Arrange var repositoryUri = new Uri(Constants.TestRepositoryUri); var expectedRepositoryDirectory = Path.Combine(_repositoriesDirectory, repositoryUri.AbsolutePath[1..]); + var cancellationToken = new CancellationToken(); Repository.Clone(Constants.TestRepositoryUri, expectedRepositoryDirectory); // Act - var result = _sut.Download(repositoryUri, CancellationToken.None); + var result = _sut.Download(repositoryUri, cancellationToken); // Assert result.Should().NotBeNull(); @@ -107,10 +110,11 @@ public void Download_CorruptedExistingDirectoryRepository_ReturnsDirectoryReposi // Arrange var repositoryUri = new Uri(Constants.TestRepositoryUri); var expectedRepositoryDirectory = Path.Combine(_repositoriesDirectory, repositoryUri.AbsolutePath[1..]); + var cancellationToken = new CancellationToken(); Directory.CreateDirectory(expectedRepositoryDirectory); // Act - var result = _sut.Download(repositoryUri, CancellationToken.None); + var result = _sut.Download(repositoryUri, cancellationToken); // Assert result.Should().NotBeNull(); @@ -127,9 +131,10 @@ public void Download_EmptyRepository_ReturnsNull() // Arrange var repositoryUri = new Uri(Constants.EmptyTestRepositoryUri); var expectedRepositoryDirectory = Path.Combine(_repositoriesDirectory, repositoryUri.AbsolutePath[1..]); + var cancellationToken = new CancellationToken(); // Act - var result = _sut.Download(repositoryUri, CancellationToken.None); + var result = _sut.Download(repositoryUri, cancellationToken); // Assert result.Should().BeNull(); diff --git a/src/ScoopSearch.Indexer.Tests/Git/GitRepositoryTests.cs b/src/ScoopSearch.Indexer.Tests/Git/GitRepositoryTests.cs index 2f3cc08..568c4d3 100644 --- a/src/ScoopSearch.Indexer.Tests/Git/GitRepositoryTests.cs +++ b/src/ScoopSearch.Indexer.Tests/Git/GitRepositoryTests.cs @@ -38,7 +38,8 @@ public void Delete_ExistingRepository_Deleted() // Arrange var repositoryUri = new Uri(Constants.TestRepositoryUri); var expectedRepositoryDirectory = Path.Combine(_repositoriesDirectory, repositoryUri.AbsolutePath[1..]); - var repository = _provider.Download(repositoryUri, CancellationToken.None)!; + var cancellationToken = new CancellationToken(); + var repository = _provider.Download(repositoryUri, cancellationToken)!; // Assert Directory.Exists(expectedRepositoryDirectory).Should().BeTrue(); @@ -55,7 +56,8 @@ public void GetBranchName_Succeeds() { // Arrange var repositoryUri = new Uri(Constants.TestRepositoryUri); - var repository = _provider.Download(repositoryUri, CancellationToken.None)!; + var cancellationToken = new CancellationToken(); + var repository = _provider.Download(repositoryUri, cancellationToken)!; // Act var result = repository.GetBranchName(); @@ -69,7 +71,8 @@ public void Dispose_Succeeds() { // Arrange var repositoryUri = new Uri(Constants.TestRepositoryUri); - var repository = _provider.Download(repositoryUri, CancellationToken.None)!; + var cancellationToken = new CancellationToken(); + var repository = _provider.Download(repositoryUri, cancellationToken)!; // Act + Assert repository.Should().BeAssignableTo().Subject.Dispose(); @@ -79,7 +82,8 @@ public void Dispose_Succeeds() public void GetItemsFromIndex_ReturnsEntries() { // Arrange - var repository = _provider.Download(new Uri(Constants.TestRepositoryUri), CancellationToken.None)!; + var cancellationToken = new CancellationToken(); + var repository = _provider.Download(new Uri(Constants.TestRepositoryUri), cancellationToken)!; // Act var result = repository.GetFilesFromIndex(); @@ -92,10 +96,11 @@ public void GetItemsFromIndex_ReturnsEntries() public async void ReadContentAsync_NonExistentEntry_Throws() { // Arrange - var repository = _provider.Download(new Uri(Constants.TestRepositoryUri), CancellationToken.None)!; + var cancellationToken = new CancellationToken(); + var repository = _provider.Download(new Uri(Constants.TestRepositoryUri), cancellationToken)!; // Act - var result = async () => await repository.ReadContentAsync("foo", CancellationToken.None); + var result = async () => await repository.ReadContentAsync("foo", cancellationToken); // Assert await result.Should().ThrowAsync(); @@ -105,10 +110,11 @@ public async void ReadContentAsync_NonExistentEntry_Throws() public async void ReadContentAsync_ExistentEntry_ReturnsContent() { // Arrange - var repository = _provider.Download(new Uri(Constants.TestRepositoryUri), CancellationToken.None)!; + var cancellationToken = new CancellationToken(); + var repository = _provider.Download(new Uri(Constants.TestRepositoryUri), cancellationToken)!; // Act - var result = async () => await repository.ReadContentAsync("kaxaml.json", CancellationToken.None); + var result = async () => await repository.ReadContentAsync("kaxaml.json", cancellationToken); // Assert var taskResult = await result.Should().NotThrowAsync(); @@ -120,10 +126,11 @@ public async void ReadContentAsync_ExistentEntry_ReturnsContent() public async void GetCommitsCacheAsync_ReturnsExpectedFilesAndCommits(string repositoryUri, Predicate filter, int expectedFiles, int expectedCommits) { // Arrange - var repository = _provider.Download(new Uri(repositoryUri), CancellationToken.None)!; + var cancellationToken = new CancellationToken(); + var repository = _provider.Download(new Uri(repositoryUri), cancellationToken)!; // Act - var result = async () => await repository.GetCommitsCacheAsync(filter, CancellationToken.None); + var result = async () => await repository.GetCommitsCacheAsync(filter, cancellationToken); // Assert var taskResult = await result.Should().NotThrowAsync(); @@ -146,11 +153,12 @@ public static IEnumerable GetCommitsCacheTestCases() public async void GetCommitsCacheAsync_BuildCache_Succeeds(string repositoryUri, double maxSeconds, int minimalManifestsCount) { // Arrange - var repository = _provider.Download(new Uri(repositoryUri), CancellationToken.None)!; + var cancellationToken = new CancellationToken(); + var repository = _provider.Download(new Uri(repositoryUri), cancellationToken)!; bool IsManifestFile(string filePath) => Path.GetExtension(filePath).Equals(".json", StringComparison.OrdinalIgnoreCase); // Act - var result = async () => await repository.GetCommitsCacheAsync(IsManifestFile, CancellationToken.None); + var result = async () => await repository.GetCommitsCacheAsync(IsManifestFile, cancellationToken); // Assert var taskResult = await result.Should().CompleteWithinAsync(maxSeconds.Seconds()); diff --git a/src/ScoopSearch.Indexer.Tests/GitHub/GitHubClientTests.cs b/src/ScoopSearch.Indexer.Tests/GitHub/GitHubClientTests.cs index 83932a6..1c46413 100644 --- a/src/ScoopSearch.Indexer.Tests/GitHub/GitHubClientTests.cs +++ b/src/ScoopSearch.Indexer.Tests/GitHub/GitHubClientTests.cs @@ -24,9 +24,10 @@ public async void GetRepositoryAsync_InvalidRepo_ReturnsNull(string input) { // Arrange var uri = new Uri(input); + var cancellationToken = new CancellationToken(); // Act - var result = () => _sut.GetRepositoryAsync(uri, CancellationToken.None); + var result = () => _sut.GetRepositoryAsync(uri, cancellationToken); // Assert var taskResult = await result.Should().NotThrowAsync(); @@ -39,9 +40,10 @@ public async void GetRepositoryAsync_InvalidDomain_Throws(string input) { // Arrange var uri = new Uri(input); + var cancellationToken = new CancellationToken(); // Act - var result = () => _sut.GetRepositoryAsync(uri, CancellationToken.None); + var result = () => _sut.GetRepositoryAsync(uri, cancellationToken); // Assert await result.Should().ThrowAsync(); @@ -52,9 +54,10 @@ public async void GetRepositoryAsync_NonExistentRepo_ReturnsNull() { // Arrange var uri = new Uri(Constants.NonExistentTestRepositoryUri); + var cancellationToken = new CancellationToken(); // Act - var result = await _sut.GetRepositoryAsync(uri, CancellationToken.None); + var result = await _sut.GetRepositoryAsync(uri, cancellationToken); // Assert result.Should().BeNull(); @@ -67,9 +70,10 @@ public async void GetRepositoryAsync_ValidRepo_ReturnsGitHubRepo(string input, i { // Arrange var uri = new Uri(input); + var cancellationToken = new CancellationToken(); // Act - var result = await _sut.GetRepositoryAsync(uri, CancellationToken.None); + var result = await _sut.GetRepositoryAsync(uri, cancellationToken); // Assert result.Should().NotBeNull(); @@ -84,9 +88,10 @@ public async void GetRepositoryAsync_ValidRepo_ReturnsGitHubRepo(string input, i public async void SearchRepositoriesAsync_InvalidQueryUrl_Throws(string[] input) { // Arrange + Act + var cancellationToken = new CancellationToken(); try { - await _sut.SearchRepositoriesAsync(input, CancellationToken.None).ToArrayAsync(); + await _sut.SearchRepositoriesAsync(input, cancellationToken).ToArrayAsync(cancellationToken); Assert.Fail("Should have thrown"); } catch (AggregateException ex) @@ -105,7 +110,8 @@ public async void SearchRepositoriesAsync_InvalidQueryUrl_Throws(string[] input) public async void SearchRepositoriesAsync_ValidQuery_ReturnsSearchResults(string[] input) { // Arrange + Act - var result = await _sut.SearchRepositoriesAsync(input, CancellationToken.None).ToArrayAsync(); + var cancellationToken = new CancellationToken(); + var result = await _sut.SearchRepositoriesAsync(input, cancellationToken).ToArrayAsync(cancellationToken); // Assert result.Should().NotBeNull(); diff --git a/src/ScoopSearch.Indexer.Tests/Processor/FetchBucketsProcessorTests.cs b/src/ScoopSearch.Indexer.Tests/Processor/FetchBucketsProcessorTests.cs index 1839058..e2a4313 100644 --- a/src/ScoopSearch.Indexer.Tests/Processor/FetchBucketsProcessorTests.cs +++ b/src/ScoopSearch.Indexer.Tests/Processor/FetchBucketsProcessorTests.cs @@ -32,7 +32,7 @@ public FetchBucketsProcessorTests(HostFixture hostFixture, ITestOutputHelper tes public async void FetchBucketsAsync_ReturnsBuckets_Succeeds() { // Arrange - var cancellationToken = CancellationToken.None; + var cancellationToken = new CancellationToken(); var expectedOfficialBucketsCount = 10; var expectedAtLeastBucketsCount = 1400; diff --git a/src/ScoopSearch.Indexer.Tests/Processor/FetchManifestsProcessorTests.cs b/src/ScoopSearch.Indexer.Tests/Processor/FetchManifestsProcessorTests.cs index 34483ff..a869178 100644 --- a/src/ScoopSearch.Indexer.Tests/Processor/FetchManifestsProcessorTests.cs +++ b/src/ScoopSearch.Indexer.Tests/Processor/FetchManifestsProcessorTests.cs @@ -37,7 +37,7 @@ public async void FetchManifestsAsync_ValidRepository_ReturnsManifests() // Arrange var uri = new Uri(Constants.TestRepositoryUri); var bucket = new Bucket(uri, 0); - var cancellationToken = CancellationToken.None; + var cancellationToken = new CancellationToken(); var sut = CreateSut(); // Act @@ -54,7 +54,7 @@ public async void FetchManifestsAsync_EmptyRepository_ReturnsEmptyResults() // Arrange var uri = new Uri(Constants.EmptyTestRepositoryUri); var bucket = new Bucket(uri, 0); - var cancellationToken = CancellationToken.None; + var cancellationToken = new CancellationToken(); var sut = CreateSut(); // Act @@ -71,7 +71,7 @@ public async void FetchManifestsAsync_NonExistentRepository_ReturnsEmptyResults( // Arrange var uri = new Uri(Constants.NonExistentTestRepositoryUri); var bucket = new Bucket(uri, 0); - var cancellationToken = CancellationToken.None; + var cancellationToken = new CancellationToken(); var sut = CreateSut(); // Act diff --git a/src/ScoopSearch.Indexer.Tests/Processor/IndexingProcessorTests.cs b/src/ScoopSearch.Indexer.Tests/Processor/IndexingProcessorTests.cs index 225f528..3032f9e 100644 --- a/src/ScoopSearch.Indexer.Tests/Processor/IndexingProcessorTests.cs +++ b/src/ScoopSearch.Indexer.Tests/Processor/IndexingProcessorTests.cs @@ -56,7 +56,7 @@ public async void UpdateIndexWithManifestsAsync_IndexUpdated( ManifestInfo[] expectedManifestsToRemove) { // Arrange - var cancellationToken = CancellationToken.None; + var cancellationToken = new CancellationToken(); _searchClientMock .Setup(_ => _.GetAllManifestsAsync(cancellationToken)) .Returns(manifestsInIndex.ToAsyncEnumerable()) From ed3441d3e9017bf2c166bdbf4f175a26f6845a91 Mon Sep 17 00:00:00 2001 From: Gregoire Pailler Date: Sat, 27 May 2023 21:08:13 +0800 Subject: [PATCH 6/9] Add a test to check repository redirection --- .../GitHub/GitHubClientTests.cs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/ScoopSearch.Indexer.Tests/GitHub/GitHubClientTests.cs b/src/ScoopSearch.Indexer.Tests/GitHub/GitHubClientTests.cs index 1c46413..c8faa0e 100644 --- a/src/ScoopSearch.Indexer.Tests/GitHub/GitHubClientTests.cs +++ b/src/ScoopSearch.Indexer.Tests/GitHub/GitHubClientTests.cs @@ -63,6 +63,21 @@ public async void GetRepositoryAsync_NonExistentRepo_ReturnsNull() result.Should().BeNull(); } + [Fact] + public async void GetRepositoryAsync_RedirectedRepo_ReturnsNull() + { + // Arrange + var uri = new Uri("https://github.com/MCOfficer/scoop-nirsoft"); + var cancellationToken = new CancellationToken(); + + // Act + var result = await _sut.GetRepositoryAsync(uri, cancellationToken); + + // Assert + result.Should().NotBeNull(); + result!.HtmlUri.Should().Be("https://github.com/ScoopInstaller/Nirsoft"); + } + [Theory] [InlineData("https://github.com/ScoopInstaller/Main", 1000)] [InlineData("https://github.com/ScoopInstaller/Extras", 1500)] From 083278a513b1ac4ff79317a5750dc18d14dc7732 Mon Sep 17 00:00:00 2001 From: Gregoire Pailler Date: Sat, 27 May 2023 21:08:39 +0800 Subject: [PATCH 7/9] Improve coverage on Manifests --- .../Helpers/ManifestInfoExtensions.cs | 12 -- .../Manifest/ManifestComparerTests.cs | 31 ++++- .../ManifestInfoDeserializationTests.cs | 120 +++++++++++++++--- 3 files changed, 127 insertions(+), 36 deletions(-) delete mode 100644 src/ScoopSearch.Indexer.Tests/Helpers/ManifestInfoExtensions.cs diff --git a/src/ScoopSearch.Indexer.Tests/Helpers/ManifestInfoExtensions.cs b/src/ScoopSearch.Indexer.Tests/Helpers/ManifestInfoExtensions.cs deleted file mode 100644 index 7999788..0000000 --- a/src/ScoopSearch.Indexer.Tests/Helpers/ManifestInfoExtensions.cs +++ /dev/null @@ -1,12 +0,0 @@ -using System.Text.Json; -using ScoopSearch.Indexer.Data; - -namespace ScoopSearch.Indexer.Tests.Helpers; - -public static class ManifestInfoExtensions -{ - public static ManifestInfo ToManifestInfo(this (string Id, string Sha, int RepositoryStars) @this) - { - return JsonSerializer.Deserialize(@$"{{ ""Id"": ""{@this.Id}"", ""Metadata"": {{ ""Sha"": ""{@this.Sha}"", ""RepositoryStars"": {@this.RepositoryStars}, ""OfficialRepositoryNumber"": 0 }} }}")!; - } -} diff --git a/src/ScoopSearch.Indexer.Tests/Manifest/ManifestComparerTests.cs b/src/ScoopSearch.Indexer.Tests/Manifest/ManifestComparerTests.cs index 3ab325c..4de5c8d 100644 --- a/src/ScoopSearch.Indexer.Tests/Manifest/ManifestComparerTests.cs +++ b/src/ScoopSearch.Indexer.Tests/Manifest/ManifestComparerTests.cs @@ -36,6 +36,27 @@ public void Equals_Succeeds( && duplicateOf == manifestInfo2.Metadata.DuplicateOf); } + [Theory] + [InlineData(true, true, true)] + [InlineData(true, false, false)] + [InlineData(false, true, false)] + public void Equals_Nulls_ReturnsFalse(bool xNull, bool yNull, bool expectedEquals) + { + // Arrange + var manifestIdComparer = ManifestComparer.ManifestIdComparer; + var manifestExactComparer = ManifestComparer.ManifestExactComparer; + var x = xNull ? null : (ManifestInfo)Faker.CreateManifestInfo(); + var y = yNull ? null : (ManifestInfo)Faker.CreateManifestInfo(); + + // Act + var resultIdEquals = manifestIdComparer.Equals(x, y); + var resultExactEquals = manifestExactComparer.Equals(x, y); + + // Assert + resultIdEquals.Should().Be(expectedEquals); + resultExactEquals.Should().Be(expectedEquals); + } + [Theory] [CombinatorialData] public void GetHashCode_Succeeds( @@ -62,10 +83,10 @@ public void GetHashCode_Succeeds( private static ManifestInfo CreateManifestInfo(string id, string? commitHash, int stars, int? officialRepository, string duplicateOf) { return Faker.CreateManifestInfo(_ => _ - .RuleFor(f => f.Sha, commitHash) - .RuleFor(f => f.RepositoryStars, stars) - .RuleFor(f => f.OfficialRepositoryNumber, officialRepository) - .RuleFor(f => f.DuplicateOf, duplicateOf)) - .RuleFor(f => f.Id, id); + .RuleFor(manifestMetadata => manifestMetadata.Sha, commitHash) + .RuleFor(manifestMetadata => manifestMetadata.RepositoryStars, stars) + .RuleFor(manifestMetadata => manifestMetadata.OfficialRepositoryNumber, officialRepository) + .RuleFor(manifestMetadata => manifestMetadata.DuplicateOf, duplicateOf)) + .RuleFor(manifestInfo => manifestInfo.Id, id); } } diff --git a/src/ScoopSearch.Indexer.Tests/Manifest/ManifestInfoDeserializationTests.cs b/src/ScoopSearch.Indexer.Tests/Manifest/ManifestInfoDeserializationTests.cs index 783080c..34c53fb 100644 --- a/src/ScoopSearch.Indexer.Tests/Manifest/ManifestInfoDeserializationTests.cs +++ b/src/ScoopSearch.Indexer.Tests/Manifest/ManifestInfoDeserializationTests.cs @@ -1,13 +1,72 @@ using FluentAssertions; using ScoopSearch.Indexer.Data; +using ScoopSearch.Indexer.Tests.Helpers; namespace ScoopSearch.Indexer.Tests.Manifest; public class ManifestInfoDeserializationTests { + [Fact] + public void Deserialize_Id_Succeeds() + { + // Arrange + var key = "foo"; + + // + Act + var result = ManifestInfo.Deserialize("{}", key, new ManifestMetadata()); + + // Assert + result.Should().NotBeNull(); + result!.Id.Should().Be("foo"); + } + + [Fact] + public void Deserialize_Metadata_Succeeds() + { + // Arrange + var manifestMetadata = new ManifestMetadata(); + + // + Act + var result = ManifestInfo.Deserialize("{}", "foo", manifestMetadata); + + // Assert + result.Should().NotBeNull(); + result!.Metadata.Should().BeSameAs(manifestMetadata); + } + + [Theory] + [InlineData("foo", "foo")] + [InlineData("FOO", "FOO")] + [InlineData("bucket/foo", "foo")] + public void Deserialize_Name_Succeeds(string filePath, string expectedResult) + { + // Arrange + var manifestMetadata = Faker.CreateManifestMetadata() + .RuleFor(manifestMetadata => manifestMetadata.FilePath, filePath); + + // Act + var result = ManifestInfo.Deserialize("{}", "foo", manifestMetadata); + + // Assert + result.Should().BeOfType(); + result!.Name.Should().Be(expectedResult); + result.NamePartial.Should().Be(expectedResult); + result.NameSortable.Should().Be(expectedResult.ToLowerInvariant()); + result.NameSuffix.Should().Be(expectedResult); + } + [Theory] - [MemberData(nameof(DeserializeLicenseTestCases))] - public void Deserialize_License_ReturnsSucceeds(string jsonContent, string expectedResult) + [InlineData("", null)] + [InlineData(@"""license"": """"", "")] + [InlineData(@"""license"": ""foo""", "foo")] + [InlineData(@"""license"": [ ""foo"", ""bar"" ]", "foo, bar")] + [InlineData(@"""license"": { ""identifier"": ""foo"" }", "foo")] + [InlineData(@"""license"": { ""url"": ""bar"" }", "bar")] + [InlineData(@"""license"": { ""identifier"": ""foo"", ""url"": ""bar"" }", "foo")] + [InlineData(@"""license"": [ { ""identifier"": ""foo"", ""url"": ""bar"" } ]", "foo")] + [InlineData(@"""license"": [ { ""identifier"": ""foo"" }, { ""identifier"": ""bar"" } ]", "foo, bar")] + [InlineData(@"""license"": [ { ""identifier"": ""foo"" }, { ""url"": ""bar"" } ]", "foo, bar")] + public void Deserialize_License_ReturnsSucceeds(string jsonContent, string? expectedResult) { // Arrange jsonContent = $"{{ {jsonContent} }}"; @@ -20,27 +79,50 @@ public void Deserialize_License_ReturnsSucceeds(string jsonContent, string expec result!.License.Should().Be(expectedResult); } - public static IEnumerable DeserializeLicenseTestCases - { - get - { - yield return new object[] { @"""license"": ""foo""", "foo" }; - yield return new object[] { @"""license"": [ ""foo"", ""bar"" ]", "foo, bar" }; - yield return new object[] { @"""license"": { ""identifier"": ""foo"" }", "foo" }; - yield return new object[] { @"""license"": { ""url"": ""bar"" }", "bar" }; - yield return new object[] { @"""license"": { ""identifier"": ""foo"", ""url"": ""bar"" }", "foo" }; - yield return new object[] { @"""license"": [ { ""identifier"": ""foo"", ""url"": ""bar"" } ]", "foo" }; - yield return new object[] { @"""license"": [ { ""identifier"": ""foo"" }, { ""identifier"": ""bar"" } ]", "foo, bar" }; - yield return new object[] { @"""license"": [ { ""identifier"": ""foo"" }, { ""url"": ""bar"" } ]", "foo, bar" }; - } - } - [Theory] + [InlineData("", null)] + [InlineData(@"""description"": """"", "")] [InlineData(@"""description"": ""foo""", "foo")] [InlineData(@"""description"": [ ""foo"" ]", "foo")] [InlineData(@"""description"": [ ""foo"", ""bar"" ]", "foo bar")] [InlineData(@"""description"": [ ""foo"", """", ""bar"" ]", "foo \n bar")] - public void Deserialize_Description_Succeeds(string jsonContent, string expectedDescription) + public void Deserialize_Description_Succeeds(string jsonContent, string? expectedResult) + { + // Arrange + jsonContent = $"{{ {jsonContent} }}"; + + // Act + var result = ManifestInfo.Deserialize(jsonContent, "foo", new ManifestMetadata()); + + // Assert + result.Should().NotBeNull(); + result!.Description.Should().Be(expectedResult?.Replace("\n", Environment.NewLine)); + } + + [Theory] + [InlineData("", null)] + [InlineData(@"""version"": """"", "")] + [InlineData(@"""version"": ""v1""", "v1")] + [InlineData(@"""version"": ""1.2.3""", "1.2.3")] + public void Deserialize_Version_Succeeds(string jsonContent, string? expectedResult) + { + // Arrange + jsonContent = $"{{ {jsonContent} }}"; + + // Act + var result = ManifestInfo.Deserialize(jsonContent, "foo", new ManifestMetadata()); + + // Assert + result.Should().NotBeNull(); + result!.Version.Should().Be(expectedResult); + } + + [Theory] + [InlineData("", null)] + [InlineData(@"""homepage"": """"", "")] + [InlineData(@"""homepage"": ""foo""", "foo")] + [InlineData(@"""homepage"": ""https://www.example.COM""", "https://www.example.COM")] + public void Deserialize_HomePage_Succeeds(string jsonContent, string? expectedResult) { // Arrange jsonContent = $"{{ {jsonContent} }}"; @@ -50,7 +132,7 @@ public void Deserialize_Description_Succeeds(string jsonContent, string expected // Assert result.Should().NotBeNull(); - result!.Description.Should().Be(expectedDescription.Replace("\n", Environment.NewLine)); + result!.Homepage.Should().Be(expectedResult); } [Fact] From 3a794cd701c746e065f85e0c0dde36317c763297 Mon Sep 17 00:00:00 2001 From: Gregoire Pailler Date: Sat, 27 May 2023 21:32:58 +0800 Subject: [PATCH 8/9] Add coverage for BucketsProvider/BucketsSource --- .../Providers/GitHubBucketsProviderTests.cs | 75 ++++++++++ .../Sources/GitHubBucketsSourceTests.cs | 67 +++++++++ .../Sources/ManualBucketsListSourceTests.cs | 130 ++++++++++++++++++ .../Sources/ManualBucketsSourceTests.cs | 65 +++++++++ .../Sources/OfficialBucketsSourceTests.cs | 130 ++++++++++++++++++ .../Helpers/Faker.cs | 21 +++ .../ScoopSearch.Indexer.Tests.csproj | 4 - .../Buckets/Sources/GitHubBucketsSource.cs | 3 +- .../Buckets/Sources/OfficialBucketsSource.cs | 2 +- .../Configuration/AzureLogsMonitorOptions.cs | 12 -- 10 files changed, 490 insertions(+), 19 deletions(-) create mode 100644 src/ScoopSearch.Indexer.Tests/Buckets/Providers/GitHubBucketsProviderTests.cs create mode 100644 src/ScoopSearch.Indexer.Tests/Buckets/Sources/GitHubBucketsSourceTests.cs create mode 100644 src/ScoopSearch.Indexer.Tests/Buckets/Sources/ManualBucketsListSourceTests.cs create mode 100644 src/ScoopSearch.Indexer.Tests/Buckets/Sources/ManualBucketsSourceTests.cs create mode 100644 src/ScoopSearch.Indexer.Tests/Buckets/Sources/OfficialBucketsSourceTests.cs delete mode 100644 src/ScoopSearch.Indexer/Configuration/AzureLogsMonitorOptions.cs diff --git a/src/ScoopSearch.Indexer.Tests/Buckets/Providers/GitHubBucketsProviderTests.cs b/src/ScoopSearch.Indexer.Tests/Buckets/Providers/GitHubBucketsProviderTests.cs new file mode 100644 index 0000000..3e789d4 --- /dev/null +++ b/src/ScoopSearch.Indexer.Tests/Buckets/Providers/GitHubBucketsProviderTests.cs @@ -0,0 +1,75 @@ +using FluentAssertions; +using Moq; +using ScoopSearch.Indexer.Buckets.Providers; +using ScoopSearch.Indexer.GitHub; +using ScoopSearch.Indexer.Tests.Helpers; + +namespace ScoopSearch.Indexer.Tests.Buckets.Providers; + +public class GitHubBucketsProviderTests +{ + private readonly Mock _gitHubClientMock; + private readonly GitHubBucketsProvider _sut; + + public GitHubBucketsProviderTests() + { + _gitHubClientMock = new Mock(); + _sut = new GitHubBucketsProvider(_gitHubClientMock.Object); + } + + [Theory] + [InlineData("http://foo/bar", false)] + [InlineData("https://foo/bar", false)] + [InlineData("http://www.google.fr/foo", false)] + [InlineData("https://www.google.fr/foo", false)] + [InlineData("http://github.com", true)] + [InlineData("https://github.com", true)] + [InlineData("http://www.github.com", true)] + [InlineData("https://www.github.com", true)] + [InlineData("http://www.GitHub.com", true)] + [InlineData("https://www.GitHub.com", true)] + public void IsCompatible_Succeeds(string input, bool expectedResult) + { + // Arrange + var uri = new Uri(input); + + // Act + var result = _sut.IsCompatible(uri); + + // Arrange + result.Should().Be(expectedResult); + } + + [Fact] + public async void GetBucketAsync_ValidRepo_ReturnsBucket() + { + // Arrange + var cancellationToken = new CancellationToken(); + var uri = Faker.CreateUri(); + var gitHubRepo = Faker.CreateGitHubRepo().Generate(); + _gitHubClientMock.Setup(x => x.GetRepositoryAsync(uri, cancellationToken)).ReturnsAsync(gitHubRepo); + + // Act + var result = await _sut.GetBucketAsync(uri, cancellationToken); + + // Assert + result.Should().NotBeNull(); + result!.Uri.Should().Be(gitHubRepo.HtmlUri); + result.Stars.Should().Be(gitHubRepo.Stars); + } + + [Fact] + public async void GetBucketAsync_InvalidRepo_ReturnsNull() + { + // Arrange + var cancellationToken = new CancellationToken(); + var uri = Faker.CreateUri(); + _gitHubClientMock.Setup(x => x.GetRepositoryAsync(uri, cancellationToken)).ReturnsAsync((GitHubRepo?)null); + + // Act + var result = await _sut.GetBucketAsync(uri, cancellationToken); + + // Assert + result.Should().BeNull(); + } +} diff --git a/src/ScoopSearch.Indexer.Tests/Buckets/Sources/GitHubBucketsSourceTests.cs b/src/ScoopSearch.Indexer.Tests/Buckets/Sources/GitHubBucketsSourceTests.cs new file mode 100644 index 0000000..91a1606 --- /dev/null +++ b/src/ScoopSearch.Indexer.Tests/Buckets/Sources/GitHubBucketsSourceTests.cs @@ -0,0 +1,67 @@ +using FluentAssertions; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using Moq; +using ScoopSearch.Indexer.Buckets; +using ScoopSearch.Indexer.Buckets.Sources; +using ScoopSearch.Indexer.Configuration; +using ScoopSearch.Indexer.GitHub; +using ScoopSearch.Indexer.Tests.Helpers; +using Xunit.Abstractions; + +namespace ScoopSearch.Indexer.Tests.Buckets.Sources; + +public class GitHubBucketsSourceTests +{ + private readonly Mock _gitHubClientMock; + private readonly GitHubOptions _gitHubOptions; + private readonly XUnitLogger _logger; + private readonly GitHubBucketsSource _sut; + + public GitHubBucketsSourceTests(ITestOutputHelper testOutputHelper) + { + _gitHubClientMock = new Mock(); + _gitHubOptions = new GitHubOptions(); + _logger = new XUnitLogger(testOutputHelper); + _sut = new GitHubBucketsSource(_gitHubClientMock.Object, new OptionsWrapper(_gitHubOptions), _logger); + } + + [Fact] + public async void GetBucketsAsync_InvalidQueries_ReturnsEmpty() + { + // Arrange + var cancellationToken = new CancellationToken(); + _gitHubOptions.BucketsSearchQueries = null; + + // Act + var result = await _sut.GetBucketsAsync(cancellationToken).ToArrayAsync(cancellationToken); + + // Arrange + result.Should().BeEmpty(); + _logger.Should().Log(LogLevel.Warning, "No buckets search queries found in configuration"); + } + + [Fact] + public async void GetBucketsAsync_Succeeds() + { + // Arrange + var cancellationToken = new CancellationToken(); + var input = new (string[] queries, GitHubRepo[] repos)[] + { + (new[] { "foo", "bar" }, new[] { Faker.CreateGitHubRepo().Generate() }), + (new[] { "bar", "foo" }, new[] { Faker.CreateGitHubRepo().Generate() }), + }; + _gitHubOptions.BucketsSearchQueries = input.Select(x => x.queries).ToArray(); + _gitHubClientMock.Setup(x => x.SearchRepositoriesAsync(input[0].queries, cancellationToken)).Returns(input[0].repos.ToAsyncEnumerable()); + _gitHubClientMock.Setup(x => x.SearchRepositoriesAsync(input[1].queries, cancellationToken)).Returns(input[1].repos.ToAsyncEnumerable()); + + // Act + var result = await _sut.GetBucketsAsync(cancellationToken).ToArrayAsync(cancellationToken); + + // Arrange + result.Should().BeEquivalentTo( + input.SelectMany(x => x.repos), + options => options + .WithMapping(x => x.HtmlUri, y => y.Uri)); + } +} diff --git a/src/ScoopSearch.Indexer.Tests/Buckets/Sources/ManualBucketsListSourceTests.cs b/src/ScoopSearch.Indexer.Tests/Buckets/Sources/ManualBucketsListSourceTests.cs new file mode 100644 index 0000000..759c24b --- /dev/null +++ b/src/ScoopSearch.Indexer.Tests/Buckets/Sources/ManualBucketsListSourceTests.cs @@ -0,0 +1,130 @@ +using System.Net; +using CsvHelper; +using FluentAssertions; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using Moq; +using Moq.Protected; +using ScoopSearch.Indexer.Buckets; +using ScoopSearch.Indexer.Buckets.Providers; +using ScoopSearch.Indexer.Buckets.Sources; +using ScoopSearch.Indexer.Configuration; +using ScoopSearch.Indexer.Tests.Helpers; +using Xunit.Abstractions; +using Faker = ScoopSearch.Indexer.Tests.Helpers.Faker; +using MissingFieldException = CsvHelper.MissingFieldException; + +namespace ScoopSearch.Indexer.Tests.Buckets.Sources; + +public class ManualBucketsListSourceTests +{ + private readonly Mock _httpClientFactoryMock; + private readonly Mock _bucketsProviderMock; + private readonly BucketsOptions _bucketsOptions; + private readonly XUnitLogger _logger; + private readonly ManualBucketsListSource _sut; + + public ManualBucketsListSourceTests(ITestOutputHelper testOutputHelper) + { + _httpClientFactoryMock = new Mock(); + _bucketsProviderMock = new Mock(); + _bucketsOptions = new BucketsOptions(); + _logger = new XUnitLogger(testOutputHelper); + + _sut = new ManualBucketsListSource( + _httpClientFactoryMock.Object, + new[] {_bucketsProviderMock.Object }, + new OptionsWrapper(_bucketsOptions), + _logger); + } + + [Fact] + public async void GetBucketsAsync_InvalidUri_ReturnsEmpty() + { + // Arrange + var cancellationToken = new CancellationToken(); + _bucketsOptions.ManualBucketsListUrl = null; + + // Act + var result = await _sut.GetBucketsAsync(cancellationToken).ToArrayAsync(cancellationToken); + + // Arrange + result.Should().BeEmpty(); + _logger.Should().Log(LogLevel.Warning, "No buckets list url found in configuration"); + } + + [Theory] + [MemberData(nameof(GetBucketsAsyncErrorsTestCases))] +#pragma warning disable xUnit1026 + public async void GetBucketsAsync_InvalidStatusCodeSucceeds(HttpStatusCode statusCode, string content, TExpectedException _) +#pragma warning restore xUnit1026 + where TExpectedException : Exception + { + // Arrange + _bucketsOptions.ManualBucketsListUrl = Faker.CreateUri(); + var cancellationToken = new CancellationToken(); + var httpMessageHandlerMock = new Mock(); + httpMessageHandlerMock + .Protected() + .Setup>( + "SendAsync", + ItExpr.Is(x => x.RequestUri == _bucketsOptions.ManualBucketsListUrl), + ItExpr.IsAny()) + .ReturnsAsync(new HttpResponseMessage() { StatusCode = statusCode, Content = new StringContent(content) }); + _httpClientFactoryMock.Setup(x => x.CreateClient("Default")).Returns(new HttpClient(httpMessageHandlerMock.Object)); + + // Act + var result = async () => await _sut.GetBucketsAsync(cancellationToken).ToArrayAsync(cancellationToken); + + // Assert + await result.Should().ThrowAsync(); + } + + public static IEnumerable GetBucketsAsyncErrorsTestCases() + { + yield return new object[] { HttpStatusCode.NotFound, $"url", new HttpRequestException() }; + yield return new object[] { HttpStatusCode.OK, "", new ReaderException(null) }; + yield return new object[] { HttpStatusCode.OK, $"foo{Environment.NewLine}{Faker.CreateUrl()}", new MissingFieldException(null) }; + } + + [Theory] + [MemberData(nameof(GetBucketsAsyncTestCases))] + public async void GetBucketsAsync_Succeeds(string content, string repositoryUri, bool isCompatible, bool expectedBucket) + { + // Arrange + _bucketsOptions.ManualBucketsListUrl = Faker.CreateUri(); + var cancellationToken = new CancellationToken(); + var httpMessageHandlerMock = new Mock(); + _httpClientFactoryMock.Setup(x => x.CreateClient("Default")).Returns(new HttpClient(httpMessageHandlerMock.Object)); + + httpMessageHandlerMock + .Protected() + .Setup>( + "SendAsync", + ItExpr.Is(x => x.RequestUri == _bucketsOptions.ManualBucketsListUrl), + ItExpr.IsAny()) + .ReturnsAsync(new HttpResponseMessage() { StatusCode = HttpStatusCode.OK, Content = new StringContent(content) }); + Bucket bucket = new Bucket(new Uri(repositoryUri), 123); + _bucketsProviderMock.Setup(x => x.IsCompatible(new Uri(repositoryUri))).Returns(isCompatible); + _bucketsProviderMock.Setup(x => x.GetBucketAsync(new Uri(repositoryUri), cancellationToken)).ReturnsAsync(bucket); + + // Act + var result = await _sut.GetBucketsAsync(cancellationToken).ToArrayAsync(cancellationToken); + + // Arrange + result.Should().HaveCount(expectedBucket ? 1 : 0); + if (expectedBucket) + { + result.Should().BeEquivalentTo(new[] { bucket }); + } + } + + public static IEnumerable GetBucketsAsyncTestCases() + { + yield return new object[] { "url", Faker.CreateUrl(), true, false }; + var url = Faker.CreateUrl(); + yield return new object[] { $"url{Environment.NewLine}{url}", url, false, false }; + yield return new object[] { $"url{Environment.NewLine}{url}", url, true, true }; + yield return new object[] { $"url{Environment.NewLine}{url}.git", url, true, true }; + } +} diff --git a/src/ScoopSearch.Indexer.Tests/Buckets/Sources/ManualBucketsSourceTests.cs b/src/ScoopSearch.Indexer.Tests/Buckets/Sources/ManualBucketsSourceTests.cs new file mode 100644 index 0000000..86f2a57 --- /dev/null +++ b/src/ScoopSearch.Indexer.Tests/Buckets/Sources/ManualBucketsSourceTests.cs @@ -0,0 +1,65 @@ +using FluentAssertions; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using Moq; +using ScoopSearch.Indexer.Buckets; +using ScoopSearch.Indexer.Buckets.Providers; +using ScoopSearch.Indexer.Buckets.Sources; +using ScoopSearch.Indexer.Configuration; +using ScoopSearch.Indexer.Tests.Helpers; +using Xunit.Abstractions; + +namespace ScoopSearch.Indexer.Tests.Buckets.Sources; + +public class ManualBucketsSourceTests +{ + private readonly Mock _bucketsProviderMock; + private readonly BucketsOptions _bucketsOptions; + private readonly XUnitLogger _logger; + private readonly ManualBucketsSource _sut; + + public ManualBucketsSourceTests(ITestOutputHelper testOutputHelper) + { + _bucketsProviderMock = new Mock(); + _bucketsOptions = new BucketsOptions(); + _logger = new XUnitLogger(testOutputHelper); + + _sut = new ManualBucketsSource( + new[] {_bucketsProviderMock.Object }, + new OptionsWrapper(_bucketsOptions), + _logger); + } + + [Fact] + public async void GetBucketsAsync_EmptyManualBuckets_ReturnsEmpty() + { + // Arrange + var cancellationToken = new CancellationToken(); + _bucketsOptions.ManualBuckets = null; + + // Act + var result = await _sut.GetBucketsAsync(cancellationToken).ToArrayAsync(cancellationToken); + + // Arrange + result.Should().BeEmpty(); + _logger.Should().Log(LogLevel.Warning, "No manual buckets found in configuration"); + } + + [Fact] + public async void GetBucketsAsync_ReturnsBuckets() + { + // Arrange + _bucketsOptions.ManualBuckets = new[] { Faker.CreateUri(), Faker.CreateUri() }; + var cancellationToken = new CancellationToken(); + _bucketsProviderMock.Setup(x => x.IsCompatible(_bucketsOptions.ManualBuckets[0])).Returns(false); + _bucketsProviderMock.Setup(x => x.IsCompatible(_bucketsOptions.ManualBuckets[1])).Returns(true); + _bucketsProviderMock.Setup(x => x.GetBucketAsync(_bucketsOptions.ManualBuckets[0], cancellationToken)).ReturnsAsync(new Bucket(_bucketsOptions.ManualBuckets[0], 123)); + _bucketsProviderMock.Setup(x => x.GetBucketAsync(_bucketsOptions.ManualBuckets[1], cancellationToken)).ReturnsAsync(new Bucket(_bucketsOptions.ManualBuckets[1], 123)); + + // Act + var result = await _sut.GetBucketsAsync(cancellationToken).ToArrayAsync(cancellationToken); + + // Assert + result.Should().BeEquivalentTo(new[] { new Bucket(_bucketsOptions.ManualBuckets[1], 123) }); + } +} diff --git a/src/ScoopSearch.Indexer.Tests/Buckets/Sources/OfficialBucketsSourceTests.cs b/src/ScoopSearch.Indexer.Tests/Buckets/Sources/OfficialBucketsSourceTests.cs new file mode 100644 index 0000000..959955a --- /dev/null +++ b/src/ScoopSearch.Indexer.Tests/Buckets/Sources/OfficialBucketsSourceTests.cs @@ -0,0 +1,130 @@ +using System.Net; +using System.Text; +using System.Text.Json; +using FluentAssertions; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using Moq; +using Moq.Protected; +using ScoopSearch.Indexer.Buckets; +using ScoopSearch.Indexer.Buckets.Providers; +using ScoopSearch.Indexer.Buckets.Sources; +using ScoopSearch.Indexer.Configuration; +using ScoopSearch.Indexer.Tests.Helpers; +using Xunit.Abstractions; +using Faker = ScoopSearch.Indexer.Tests.Helpers.Faker; + +namespace ScoopSearch.Indexer.Tests.Buckets.Sources; + +public class OfficialBucketsSourceTests +{ + private readonly Mock _httpClientFactoryMock; + private readonly Mock _bucketsProviderMock; + private readonly BucketsOptions _bucketsOptions; + private readonly XUnitLogger _logger; + private readonly OfficialBucketsSource _sut; + + public OfficialBucketsSourceTests(ITestOutputHelper testOutputHelper) + { + _httpClientFactoryMock = new Mock(); + _bucketsProviderMock = new Mock(); + _bucketsOptions = new BucketsOptions(); + _logger = new XUnitLogger(testOutputHelper); + + _sut = new OfficialBucketsSource( + _httpClientFactoryMock.Object, + new[] {_bucketsProviderMock.Object }, + new OptionsWrapper(_bucketsOptions), + _logger); + } + + [Fact] + public async void GetBucketsAsync_InvalidUri_ReturnsEmpty() + { + // Arrange + var cancellationToken = new CancellationToken(); + _bucketsOptions.OfficialBucketsListUrl = null; + + // Act + var result = await _sut.GetBucketsAsync(cancellationToken).ToArrayAsync(cancellationToken); + + // Arrange + result.Should().BeEmpty(); + _logger.Should().Log(LogLevel.Warning, "No official buckets list url found in configuration"); + } + + [Theory] + [MemberData(nameof(GetBucketsAsyncErrorsTestCases))] +#pragma warning disable xUnit1026 + public async void GetBucketsAsync_InvalidStatusCodeSucceeds(HttpStatusCode statusCode, string content, TExpectedException _) +#pragma warning restore xUnit1026 + where TExpectedException : Exception + { + // Arrange + _bucketsOptions.OfficialBucketsListUrl = Faker.CreateUri(); + var cancellationToken = new CancellationToken(); + var httpMessageHandlerMock = new Mock(); + httpMessageHandlerMock + .Protected() + .Setup>( + "SendAsync", + ItExpr.Is(x => x.RequestUri == _bucketsOptions.OfficialBucketsListUrl), + ItExpr.IsAny()) + .ReturnsAsync(new HttpResponseMessage() { StatusCode = statusCode, Content = new StreamContent(new MemoryStream(Encoding.UTF8.GetBytes(content))) }); + _httpClientFactoryMock.Setup(x => x.CreateClient("Default")).Returns(new HttpClient(httpMessageHandlerMock.Object)); + + // Act + var result = async () => await _sut.GetBucketsAsync(cancellationToken).ToArrayAsync(cancellationToken); + + // Assert + await result.Should().ThrowAsync(); + } + + public static IEnumerable GetBucketsAsyncErrorsTestCases() + { + yield return new object[] { HttpStatusCode.NotFound, $"url", new HttpRequestException() }; + yield return new object[] { HttpStatusCode.OK, "", new JsonException() }; + yield return new object[] { HttpStatusCode.OK, $"foo", new JsonException() }; + } + + [Theory] + [MemberData(nameof(GetBucketsAsyncTestCases))] + public async void GetBucketsAsync_Succeeds(string content, string repositoryUri, bool isCompatible, bool expectedBucket) + { + // Arrange + _bucketsOptions.OfficialBucketsListUrl = Faker.CreateUri(); + var cancellationToken = new CancellationToken(); + var httpMessageHandlerMock = new Mock(); + _httpClientFactoryMock.Setup(x => x.CreateClient("Default")).Returns(new HttpClient(httpMessageHandlerMock.Object)); + + httpMessageHandlerMock + .Protected() + .Setup>( + "SendAsync", + ItExpr.Is(x => x.RequestUri == _bucketsOptions.OfficialBucketsListUrl), + ItExpr.IsAny()) + .ReturnsAsync(new HttpResponseMessage() { StatusCode = HttpStatusCode.OK, Content = new StreamContent(new MemoryStream(Encoding.UTF8.GetBytes(content))) }); + Bucket bucket = new Bucket(new Uri(repositoryUri), 123); + _bucketsProviderMock.Setup(x => x.IsCompatible(new Uri(repositoryUri))).Returns(isCompatible); + _bucketsProviderMock.Setup(x => x.GetBucketAsync(new Uri(repositoryUri), cancellationToken)).ReturnsAsync(bucket); + + // Act + var result = await _sut.GetBucketsAsync(cancellationToken).ToArrayAsync(cancellationToken); + + // Arrange + result.Should().HaveCount(expectedBucket ? 1 : 0); + if (expectedBucket) + { + result.Should().BeEquivalentTo(new[] { bucket }); + } + } + + public static IEnumerable GetBucketsAsyncTestCases() + { + var url = Faker.CreateUrl(); + yield return new object[] { $@"{{ }}", url, false, false }; + yield return new object[] { $@"{{ }}", url, true, false }; + yield return new object[] { $@"{{ ""foo"": ""{url}"" }}", url, false, false }; + yield return new object[] { $@"{{ ""foo"": ""{url}"" }}", url, true, true }; + } +} diff --git a/src/ScoopSearch.Indexer.Tests/Helpers/Faker.cs b/src/ScoopSearch.Indexer.Tests/Helpers/Faker.cs index e3c437e..f52e146 100644 --- a/src/ScoopSearch.Indexer.Tests/Helpers/Faker.cs +++ b/src/ScoopSearch.Indexer.Tests/Helpers/Faker.cs @@ -1,5 +1,6 @@ using Bogus; using ScoopSearch.Indexer.Data; +using ScoopSearch.Indexer.GitHub; namespace ScoopSearch.Indexer.Tests.Helpers; @@ -42,4 +43,24 @@ public static Faker CreateManifestMetadata() return faker; } + + public static Faker CreateGitHubRepo() + { + var faker = new Faker() + .StrictMode(true) + .RuleFor(_ => _.HtmlUri, f => new Uri(f.Internet.Url())) + .RuleFor(_ => _.Stars, f => f.Random.Int(0, 1000)); + + return faker; + } + + public static string CreateUrl() + { + return new Bogus.Faker().Internet.UrlWithPath(); + } + + public static Uri CreateUri() + { + return new Uri(Faker.CreateUrl()); + } } diff --git a/src/ScoopSearch.Indexer.Tests/ScoopSearch.Indexer.Tests.csproj b/src/ScoopSearch.Indexer.Tests/ScoopSearch.Indexer.Tests.csproj index 14b951c..7a6a38a 100644 --- a/src/ScoopSearch.Indexer.Tests/ScoopSearch.Indexer.Tests.csproj +++ b/src/ScoopSearch.Indexer.Tests/ScoopSearch.Indexer.Tests.csproj @@ -22,9 +22,5 @@ - - - - diff --git a/src/ScoopSearch.Indexer/Buckets/Sources/GitHubBucketsSource.cs b/src/ScoopSearch.Indexer/Buckets/Sources/GitHubBucketsSource.cs index a4dbf7d..e6d36d7 100644 --- a/src/ScoopSearch.Indexer/Buckets/Sources/GitHubBucketsSource.cs +++ b/src/ScoopSearch.Indexer/Buckets/Sources/GitHubBucketsSource.cs @@ -1,7 +1,6 @@ using System.Runtime.CompilerServices; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; -using ScoopSearch.Indexer.Buckets.Providers; using ScoopSearch.Indexer.Configuration; using ScoopSearch.Indexer.GitHub; @@ -16,7 +15,7 @@ internal class GitHubBucketsSource : IBucketsSource public GitHubBucketsSource( IGitHubClient gitHubClient, IOptions gitHubOptions, - ILogger logger) + ILogger logger) { _gitHubClient = gitHubClient; _logger = logger; diff --git a/src/ScoopSearch.Indexer/Buckets/Sources/OfficialBucketsSource.cs b/src/ScoopSearch.Indexer/Buckets/Sources/OfficialBucketsSource.cs index fb70ef7..ba57856 100644 --- a/src/ScoopSearch.Indexer/Buckets/Sources/OfficialBucketsSource.cs +++ b/src/ScoopSearch.Indexer/Buckets/Sources/OfficialBucketsSource.cs @@ -19,7 +19,7 @@ public OfficialBucketsSource( IHttpClientFactory httpClientFactory, IEnumerable bucketsProviders, IOptions bucketOptions, - ILogger logger) + ILogger logger) { _httpClientFactory = httpClientFactory; _bucketsProviders = bucketsProviders; diff --git a/src/ScoopSearch.Indexer/Configuration/AzureLogsMonitorOptions.cs b/src/ScoopSearch.Indexer/Configuration/AzureLogsMonitorOptions.cs deleted file mode 100644 index 9c25c9a..0000000 --- a/src/ScoopSearch.Indexer/Configuration/AzureLogsMonitorOptions.cs +++ /dev/null @@ -1,12 +0,0 @@ -namespace ScoopSearch.Indexer.Configuration; - -public class AzureLogsMonitorOptions -{ - public string TenantId { get; set; } = null!; - - public string ClientId { get; set; } = null!; - - public string ClientSecret { get; set; } = null!; - - public string WorkspaceId { get; set; } = null!; -} From df4e9d48c26dfa03ef54323bf160a394daabe4ce Mon Sep 17 00:00:00 2001 From: Gregoire Pailler Date: Tue, 30 May 2023 23:57:51 +0800 Subject: [PATCH 9/9] Add tests for ScoopSearchIndexerTests --- .../Processor/FetchBucketsProcessorTests.cs | 56 --------------- .../ScoopSearch.Indexer.Tests.csproj | 4 -- .../ScoopSearchIndexerTests.cs | 70 +++++++++++++++++++ 3 files changed, 70 insertions(+), 60 deletions(-) delete mode 100644 src/ScoopSearch.Indexer.Tests/Processor/FetchBucketsProcessorTests.cs create mode 100644 src/ScoopSearch.Indexer.Tests/ScoopSearchIndexerTests.cs diff --git a/src/ScoopSearch.Indexer.Tests/Processor/FetchBucketsProcessorTests.cs b/src/ScoopSearch.Indexer.Tests/Processor/FetchBucketsProcessorTests.cs deleted file mode 100644 index e2a4313..0000000 --- a/src/ScoopSearch.Indexer.Tests/Processor/FetchBucketsProcessorTests.cs +++ /dev/null @@ -1,56 +0,0 @@ -using System.Text.RegularExpressions; -using FluentAssertions; -using Microsoft.Extensions.DependencyInjection; -using Microsoft.Extensions.Logging; -using Microsoft.Extensions.Options; -using Moq; -using ScoopSearch.Indexer.Configuration; -using ScoopSearch.Indexer.GitHub; -using ScoopSearch.Indexer.Processor; -using ScoopSearch.Indexer.Tests.Helpers; -using Xunit.Abstractions; - -namespace ScoopSearch.Indexer.Tests.Processor; - -public class FetchBucketsProcessorTests : IClassFixture -{ - private readonly XUnitLogger _logger; - private readonly FetchBucketsProcessor _sut; - - public FetchBucketsProcessorTests(HostFixture hostFixture, ITestOutputHelper testOutputHelper) - { - hostFixture.Configure(testOutputHelper); - - _logger = new XUnitLogger(testOutputHelper); - _sut = new FetchBucketsProcessor( - hostFixture.Instance.Services.GetRequiredService(), - hostFixture.Instance.Services.GetRequiredService>(), - _logger); - } - - [Fact] - public async void FetchBucketsAsync_ReturnsBuckets_Succeeds() - { - // Arrange - var cancellationToken = new CancellationToken(); - var expectedOfficialBucketsCount = 10; - var expectedAtLeastBucketsCount = 1400; - - // Act - var result = await _sut.FetchBucketsAsync(cancellationToken); - - // Assert - result.Should().HaveCountGreaterOrEqualTo(expectedAtLeastBucketsCount); - result.Should().OnlyHaveUniqueItems(_ => _.Uri.AbsoluteUri.ToLowerInvariant()); - - _logger.Should().Log(LogLevel.Information, "Retrieving buckets from sources"); - _logger.Should().Log(LogLevel.Information, _ => Regex.IsMatch(_, $"Found {expectedOfficialBucketsCount} official buckets.+")); - _logger.Should().Log(LogLevel.Information, _ => Regex.IsMatch(_, @"Found \d{4} buckets on GitHub")); - _logger.Should().Log(LogLevel.Information, _ => Regex.IsMatch(_, @"Found \d+ buckets to ignore \(appsettings\.json\)")); - _logger.Should().Log(LogLevel.Information, _ => Regex.IsMatch(_, @"Found \d+ buckets to add \(appsettings\.json\)")); - _logger.Should().Log(LogLevel.Information, _ => Regex.IsMatch(_, @"Found \d+ buckets to add from external list.+")); - _logger.Should().Log(LogLevel.Debug, _ => _.StartsWith("Adding bucket"), Times.AtLeast(expectedAtLeastBucketsCount)); - _logger.Should().Log(LogLevel.Debug, _ => _.StartsWith("Adding bucket 'https://github.com/ScoopInstaller/Main'")); - _logger.Should().Log(LogLevel.Debug, _ => _.StartsWith("Adding bucket 'https://github.com/ScoopInstaller/Extras'")); - } -} diff --git a/src/ScoopSearch.Indexer.Tests/ScoopSearch.Indexer.Tests.csproj b/src/ScoopSearch.Indexer.Tests/ScoopSearch.Indexer.Tests.csproj index 7a6a38a..d73bd9d 100644 --- a/src/ScoopSearch.Indexer.Tests/ScoopSearch.Indexer.Tests.csproj +++ b/src/ScoopSearch.Indexer.Tests/ScoopSearch.Indexer.Tests.csproj @@ -19,8 +19,4 @@ - - - - diff --git a/src/ScoopSearch.Indexer.Tests/ScoopSearchIndexerTests.cs b/src/ScoopSearch.Indexer.Tests/ScoopSearchIndexerTests.cs new file mode 100644 index 0000000..5d4f787 --- /dev/null +++ b/src/ScoopSearch.Indexer.Tests/ScoopSearchIndexerTests.cs @@ -0,0 +1,70 @@ +using System.Text.RegularExpressions; +using FluentAssertions; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using Moq; +using ScoopSearch.Indexer.Buckets; +using ScoopSearch.Indexer.Buckets.Sources; +using ScoopSearch.Indexer.Configuration; +using ScoopSearch.Indexer.Data; +using ScoopSearch.Indexer.Processor; +using ScoopSearch.Indexer.Tests.Helpers; +using Xunit.Abstractions; + +namespace ScoopSearch.Indexer.Tests; + +public class ScoopSearchIndexerTests : IClassFixture +{ + private readonly Mock _indexingProcessorMock; + private readonly XUnitLogger _logger; + private readonly ScoopSearchIndexer _sut; + + public ScoopSearchIndexerTests(HostFixture hostFixture, ITestOutputHelper testOutputHelper) + { + hostFixture.Configure(testOutputHelper); + + _indexingProcessorMock = new Mock(); + + var fetchManifestsProcessorMock = new Mock(); + fetchManifestsProcessorMock + .Setup(x => x.FetchManifestsAsync(It.IsAny(), It.IsAny())) + .Returns(AsyncEnumerable.Empty()); + + _logger = new XUnitLogger(testOutputHelper); + _sut = new ScoopSearchIndexer( + hostFixture.Instance.Services.GetRequiredService>(), + hostFixture.Instance.Services.GetRequiredService(), + fetchManifestsProcessorMock.Object, + _indexingProcessorMock.Object, + hostFixture.Instance.Services.GetRequiredService>(), + _logger); + } + + [Fact] + public async void ExecuteAsync_ReturnsBuckets_Succeeds() + { + // Arrange + const int expectedAtLeastBucketsCount = 1500; + var cancellationToken = new CancellationToken(); + Uri[]? actualBucketsUris = null; + _indexingProcessorMock + .Setup(x => x.CleanIndexFromNonExistentBucketsAsync(It.IsAny(), cancellationToken)) + .Returns(Task.CompletedTask) + .Callback((uris, _) => actualBucketsUris = uris) + .Verifiable(); + + // Act + await _sut.ExecuteAsync(cancellationToken); + + // Assert + _indexingProcessorMock.Verify(); + actualBucketsUris.Should().HaveCountGreaterThan(expectedAtLeastBucketsCount); + actualBucketsUris.Should().OnlyHaveUniqueItems(_ => _.AbsoluteUri.ToLowerInvariant()); + + _logger.Should().Log(LogLevel.Information, _ => Regex.IsMatch(_, @"Found \d+ buckets for a total of \d+ manifests.")); + _logger.Should().Log(LogLevel.Information, _ => _.StartsWith("Processed bucket "), Times.AtLeast(expectedAtLeastBucketsCount)); + _logger.Should().Log(LogLevel.Information, _ => _.StartsWith("Processed bucket https://github.com/ScoopInstaller/Main")); + _logger.Should().Log(LogLevel.Information, _ => _.StartsWith("Processed bucket https://github.com/ScoopInstaller/Extras")); + } +}