Skip to content

Commit

Permalink
* switch the interpolation algorithm being used from the default line…
Browse files Browse the repository at this point in the history
…ar to the nearest @ `RotateMatrix()`

* fix the violation of Roslyn analyzer rule `S1244: Floating point numbers should not be tested for equality`: SonarSource/sonar-dotnet#2326 @ `PreprocessTextBoxes()`
* using `RadiansToDegrees()` introduced in .NET 8: dotnet/runtime#86402 @ `GetRotationDegrees()`
@ TesseractRecognizer.cs

- the no longer effective suppression for Roslyn analyzer rule `SA1119:Statement should not use unnecessary parenthesis` due to DotNetAnalyzers/StyleCopAnalyzers#3730 @ `PaddleOcrRecognizerAndDetector.Initialize()`
* add some ad-hoc suppressions for ReSharper inspection `StringLiteralTypo`
@ imagePipeline

* remove the no longer effective suppression of Roslyn analyzer rule `SA1003 // Symbols should be spaced correctly` and fix the violation of ReSharper inspection `RedundantCast` @ `ThreadLateCrawlerAndSaver.CrawlThread()`
* suppress the violation of ReSharper inspection `InconsistentlySynchronizedField` @ `CrawlerLocks.AcquireFailed()`
* suppress the violations of Roslyn analyzer rule `CA2021:Do not call Enumerable.Cast<T> or Enumerable.OfType<T> with incompatible types` due to it's false positive: dotnet/roslyn-analyzers#7031 @ `ResumeSuspendPostContentsPushingWorker.DoWork()`
@ crawler

* suppress the violations of Roslyn analyzer rule `S6667: Logging in a catch clause should pass the caught exception as a parameter` @ `ErrorableWorker.DoWorkWithExceptionLogging()`
@ shared

* suppress the violation of Roslyn analyzer rule `S6674:Log message template should be syntactically correct` and ReSharper inspection `RedundantUsingDirective` @ GlobalSuppressions.cs
* fix all violations of Roslyn analyzer rule`CA1513: Use ObjectDisposedException throw helper`
* setting `csharp_style_prefer_primary_constructors` @ .editorconfig
* prevent `appsettings.*.json` get copied when publishing following https://weblog.west-wind.com/posts/2022/Aug/24/Keeping-Content-Out-of-the-Publish-Folder-for-WebDeploy#what-works-copytopublishdirectorynever @ `tbm.{Crawler,ImagePipeline}.csproj`
+ `tbm.sln.DotSettings` to store team-shared ReSharper options
@ c#
  • Loading branch information
n0099 committed Mar 23, 2024
1 parent 46f0f66 commit fcd4971
Show file tree
Hide file tree
Showing 13 changed files with 41 additions and 35 deletions.
1 change: 1 addition & 0 deletions c#/.editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ csharp_prefer_simple_using_statement = true:suggestion
csharp_style_namespace_declarations = block_scoped:silent
csharp_style_prefer_method_group_conversion = true:silent
csharp_style_prefer_top_level_statements = true:silent
csharp_style_prefer_primary_constructors = true:suggestion
###############################
# VB Coding Conventions #
###############################
Expand Down
2 changes: 2 additions & 0 deletions c#/GlobalSuppressions.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// ReSharper disable once RedundantUsingDirective
using System.Diagnostics.CodeAnalysis;

[assembly: SuppressMessage("Major Code Smell", "S125:Sections of code should not be commented out")]
Expand All @@ -6,6 +7,7 @@
[assembly: SuppressMessage("Roslynator", "RCS1139:Add summary element to documentation comment.")]
[assembly: SuppressMessage("Roslynator", "RCS1156:Use string.Length instead of comparison with empty string.")]
[assembly: SuppressMessage("Style", "VSTHRD200:Use \"Async\" suffix for async methods")]
[assembly: SuppressMessage("Critical Bug", "S6674:Log message template should be syntactically correct")]

[assembly: SuppressMessage("StyleCop.CSharp.DocumentationRules", "SA1600:Elements should be documented")]
[assembly: SuppressMessage("StyleCop.CSharp.DocumentationRules", "SA1601:Partial elements should be documented")]
Expand Down
3 changes: 1 addition & 2 deletions c#/crawler/src/Tieba/ClientRequester.cs
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,7 @@ await Request(() => PostProtoBuf(url, clientVersion, requestParam, commonParamSe
_ = stream.Seek(0, SeekOrigin.Begin);
using var stream2 = new MemoryStream((int)stream.Length);
stream.CopyTo(stream2);
if (!stream2.TryGetBuffer(out var buffer))
throw new ObjectDisposedException(nameof(stream2));
ObjectDisposedException.ThrowIf(!stream2.TryGetBuffer(out var buffer), stream2);
var responseBody = Encoding.UTF8.GetString(buffer);
// the invalid protoBuf bytes usually is just a plain html string
Expand Down
1 change: 1 addition & 0 deletions c#/crawler/src/Tieba/Crawl/CrawlerLocks.cs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ public void ReleaseRange(LockId lockId, IEnumerable<Page> pages)

public void AcquireFailed(LockId lockId, Page page, FailureCount failureCount)
{
// ReSharper disable once InconsistentlySynchronizedField
var maxRetry = _config.GetValue<FailureCount>("MaxRetryTimes", 5);
if (failureCount >= maxRetry)
{
Expand Down
4 changes: 1 addition & 3 deletions c#/crawler/src/Tieba/Crawl/ThreadLateCrawlerAndSaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@ private async Task<ThreadPost?> CrawlThread
(Tid tid, FailureCount failureCount, CancellationToken stoppingToken = default)
{
var crawlerLockId = new CrawlerLocks.LockId(fid, tid);
#pragma warning disable SA1003 // Symbols should be spaced correctly
if (_locks.AcquireRange(crawlerLockId, [(Page)1]).Count == 0) return null;
#pragma warning restore SA1003 // Symbols should be spaced correctly
if (_locks.AcquireRange(crawlerLockId, [1]).Count == 0) return null;
try
{
var json = await requester.RequestJson(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ public class ResumeSuspendPostContentsPushingWorker(
public static string GetFilePath(string postType) =>
Path.Combine(AppContext.BaseDirectory, $"suspendPostContentsPushIntoSonic.{postType}.csv");

[SuppressMessage("Reliability", "CA2021:Do not call Enumerable.Cast<T> or Enumerable.OfType<T> with incompatible types", Justification = "https://github.com/dotnet/roslyn-analyzers/issues/7031")]
protected override Task DoWork(CancellationToken stoppingToken)
{
foreach (var postType in new[] {"replies", "subReplies"})
Expand Down
16 changes: 4 additions & 12 deletions c#/crawler/tbm.Crawler.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,9 @@
<PackageReference Include="System.IO.Hashing" Version="8.0.0" />
</ItemGroup>
<ItemGroup>
<None Update="appsettings.Development.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="appsettings.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="appsettings.Production.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="nlog.config">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="appsettings.Development.json" CopyToOutputDirectory="PreserveNewest" CopyToPublishDirectory="None" />
<None Update="appsettings.Production.json" CopyToOutputDirectory="PreserveNewest" CopyToPublishDirectory="None" />
<None Update="appsettings.json" CopyToOutputDirectory="PreserveNewest" />
<None Update="nlog.config" CopyToOutputDirectory="PreserveNewest" />
</ItemGroup>
</Project>
3 changes: 1 addition & 2 deletions c#/imagePipeline/src/ImageBatchConsumingWorker.cs
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,7 @@ ImageKeyWithMatrix DecodeFrame(ImageFrame<Rgb24> frame, int frameIndex)
using var frameImage = Image.LoadPixelData<Rgb24>(frameBytes, frame.Width, frame.Height);
using var stream = new MemoryStream();
frameImage.SaveAsPng(stream);
if (!stream.TryGetBuffer(out var buffer))
throw new ObjectDisposedException(nameof(stream));
ObjectDisposedException.ThrowIf(!stream.TryGetBuffer(out var buffer), stream);
#pragma warning disable IDISP001 // Dispose created
var frameMat = Cv2.ImDecode(buffer, ImreadModes.Unchanged);
#pragma warning restore IDISP001 // Dispose created
Expand Down
2 changes: 1 addition & 1 deletion c#/imagePipeline/src/Ocr/PaddleOcrRecognizerAndDetector.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ public PaddleOcrRecognizerAndDetector(IConfiguration config, string script)
public void Dispose() => _ocr?.Dispose();

[SuppressMessage("Major Code Smell", "S3928:Parameter names used into ArgumentException constructors should match an existing one ", Justification = "https://github.com/SonarSource/sonar-dotnet/issues/8386#issuecomment-1847872210")]
[SuppressMessage("StyleCop.CSharp.MaintainabilityRules", "SA1119:Statement should not use unnecessary parenthesis", Justification = "https://github.com/DotNetAnalyzers/StyleCopAnalyzers/issues/3730")]
[SuppressMessage("Usage", "CA2208:Instantiate argument exceptions correctly")]
[SuppressMessage("ReSharper", "StringLiteralTypo")]
public async Task Initialize(CancellationToken stoppingToken = default) =>
_ocr ??= await (_script switch
{ // https://en.wikipedia.org/wiki/Template:ISO_15924_script_codes_and_related_Unicode_data
Expand Down
14 changes: 11 additions & 3 deletions c#/imagePipeline/src/Ocr/TesseractRecognizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ public sealed partial class TesseractRecognizer(IConfiguration config, string sc

public delegate TesseractRecognizer New(string script);

[SuppressMessage("ReSharper", "StringLiteralTypo")]
private Lazy<OCRTesseract> TesseractInstanceHorizontal => _tesseractInstanceHorizontal ??= new(script switch
{ // https://en.wikipedia.org/wiki/Template:ISO_15924_script_codes_and_related_Unicode_data
"Hans" => CreateTesseract("best/chi_sim+best/eng"),
Expand All @@ -19,6 +20,7 @@ public sealed partial class TesseractRecognizer(IConfiguration config, string sc
_ => throw new ArgumentOutOfRangeException(nameof(script), script, "Unsupported script.")
});

[SuppressMessage("ReSharper", "StringLiteralTypo")]
private Lazy<OCRTesseract> TesseractInstanceVertical => _tesseractInstanceVertical ??= new(script switch
{
"Hans" => CreateTesseract("best/chi_sim_vert", isVertical: true),
Expand Down Expand Up @@ -53,6 +55,8 @@ public Func<PreprocessedTextBox, TesseractRecognitionResult> RecognizePreprocess
var (imageKey, box, preprocessedTextBoxMat) = textBox;
using var mat = preprocessedTextBoxMat;
var isVertical = (float)mat.Width / mat.Height < AspectRatioThresholdToConsiderAsVertical;
// ReSharper disable once StringLiteralTypo
if (isVertical && script == "Latn") isVertical = false; // there's no vertical latin
var tesseract = isVertical ? TesseractInstanceVertical : TesseractInstanceHorizontal;
tesseract.Value.Run(mat, out _, out var rects, out var texts, out var confidences);
Expand Down Expand Up @@ -95,7 +99,8 @@ public static IEnumerable<PreprocessedTextBox> PreprocessTextBoxes(
// http://www.fmwconcepts.com/imagemagick/threshold_comparison/index.php
_ = Cv2.Threshold(mat, mat, thresh: 0, maxval: 255, ThresholdTypes.Otsu | ThresholdTypes.Binary);
if (degrees != 0) RotateMatrix(mat, degrees);
// https://stackoverflow.com/questions/9392869/where-do-i-find-the-machine-epsilon-in-c
if (MathF.Abs(degrees) < MathF.Pow(2, -24)) RotateMatrix(mat, degrees);
// https://github.com/tesseract-ocr/tesseract/issues/427
Cv2.CopyMakeBorder(mat, mat, 10, 10, 10, 10, BorderTypes.Constant, new(0, 0, 0));
Expand All @@ -114,8 +119,9 @@ private static float GetRotationDegrees(RotatedRect rotatedRect)
var xAxisDiff = bottomLeft.X - topLeft.X;
var yAxisDiff = bottomLeft.Y - topLeft.Y;

// atan2(y,x) is the radians of the angle C in a right triangle with side b=4 (xAxisDiff) and side c=1 (yAxisDiff)
// https://www.calculator.net/triangle-calculator.html?vc=&vx=4&vy=&va=90&vz=1&vb=&angleunits=d&x=53&y=29
return (float)(Math.Atan2(xAxisDiff, yAxisDiff) * 180 / Math.PI); // radians to degrees
return (float)double.RadiansToDegrees(Math.Atan2(xAxisDiff, yAxisDiff));
}

private static void RotateMatrix(Mat src, float degrees)
Expand All @@ -126,7 +132,9 @@ private static void RotateMatrix(Mat src, float degrees)
var boundingRect = new RotatedRect(default, new(src.Width, src.Height), degrees).BoundingRect();
rotationMat.Set(0, 2, rotationMat.Get<double>(0, 2) + (boundingRect.Width / 2f) - (src.Width / 2f));
rotationMat.Set(1, 2, rotationMat.Get<double>(1, 2) + (boundingRect.Height / 2f) - (src.Height / 2f));
Cv2.WarpAffine(src, src, rotationMat, boundingRect.Size);

// https://stackoverflow.com/questions/39371507/image-loses-quality-with-cv2-warpperspective
Cv2.WarpAffine(src, src, rotationMat, boundingRect.Size, InterpolationFlags.Nearest);
}

public record PreprocessedTextBox(ImageKey ImageKey, RotatedRect TextBox, Mat PreprocessedTextBoxMat);
Expand Down
16 changes: 4 additions & 12 deletions c#/imagePipeline/tbm.ImagePipeline.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,9 @@
<PackageReference Include="ThumbHash" Version="2.1.1" />
</ItemGroup>
<ItemGroup>
<None Update="appsettings.Development.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="appsettings.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="appsettings.Production.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="nlog.config">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="appsettings.Development.json" CopyToOutputDirectory="PreserveNewest" CopyToPublishDirectory="None" />
<None Update="appsettings.Production.json" CopyToOutputDirectory="PreserveNewest" CopyToPublishDirectory="None" />
<None Update="appsettings.json" CopyToOutputDirectory="PreserveNewest" />
<None Update="nlog.config" CopyToOutputDirectory="PreserveNewest" />
</ItemGroup>
</Project>
2 changes: 2 additions & 0 deletions c#/shared/src/ErrorableWorker.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@ protected async Task DoWorkWithExceptionLogging(CancellationToken stoppingToken)
}
catch (OperationCanceledException e) when (e.CancellationToken == stoppingToken)
{
#pragma warning disable S6667 // Logging in a catch clause should pass the caught exception as a parameter.
Logger.LogInformation("{}: {} CancellationToken={}",
e.GetType().FullName, e.Message, e.CancellationToken);
#pragma warning restore S6667 // Logging in a catch clause should pass the caught exception as a parameter.
}
catch (Exception e)
{
Expand Down
11 changes: 11 additions & 0 deletions c#/tbm.sln.DotSettings
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<wpf:ResourceDictionary xml:space="preserve" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" xmlns:s="clr-namespace:System;assembly=mscorlib" xmlns:ss="urn:shemas-jetbrains-com:settings-storage-xaml" xmlns:wpf="http://schemas.microsoft.com/winfx/2006/xaml/presentation">
<s:Boolean x:Key="/Default/CodeStyle/Naming/CSharpNaming/ApplyAutoDetectedRules/@EntryValue">False</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Roslynator/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Tieba/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=tesseract/@EntryIndexedValue">True</s:Boolean>
<!-- table name prefixes -->
<s:Boolean x:Key="/Default/UserDictionary/Words/=tbm_/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=tbmi_/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=tbmc_/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=tbmcr_/@EntryIndexedValue">True</s:Boolean>
</wpf:ResourceDictionary>

0 comments on commit fcd4971

Please sign in to comment.