Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
wrt54g committed Aug 12, 2022
2 parents 3f2a134 + 1f82a83 commit c4b8584
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 9 deletions.
26 changes: 19 additions & 7 deletions OCR.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,9 @@ internal static TextBlock GetTextFromTiffStream(byte[] image)

ComposedBlock cblock = result.PrintSpace.ComposedBlock.OrderByDescending(x => x.WordsInComposedBlock).FirstOrDefault() ?? new ComposedBlock();
TextBlock block = cblock.Blocks.OrderByDescending(x => x.WordsInBlock).FirstOrDefault() ?? new TextBlock();

Regex rgx = new Regex("[^a-zA-Z0-9.,'?!: -]");
foreach (var line in block.Lines)
{
line.Words.RemoveAll(x => string.IsNullOrEmpty(rgx.Replace(x.Content.Replace("|", "I"), "")));
}
block.Lines.RemoveAll(x => x.WordsInLine == 0);
block = TextHelper.ProcessTextBlock(block);
if (block.Lines.Count > 0)
ResetBlockBoundaries(block);

return block;

Expand All @@ -51,5 +47,21 @@ internal static TextBlock GetTextFromTiffStream(byte[] image)
return new TextBlock() { Text = $"Error: {ex.Message}" };
}
}

private static void ResetBlockBoundaries(TextBlock block)
{
foreach (var line in block.Lines)
{
line.HPos = line.Words.Min(x => x.HPos);
line.VPos = line.Words.Min(x => x.VPos);
line.Width = line.Words.Max(x => x.HPos + x.Width) - line.HPos;
line.Height = line.Words.Max(x => x.VPos + x.Height) - line.VPos;
}

block.HPos = block.Lines.Min(x => x.HPos);
block.VPos = block.Lines.Min(x => x.VPos);
block.Width = block.Lines.Max(x => x.HPos + x.Width) - block.HPos + 1;
block.Height = block.Lines.Max(x => x.VPos + x.Height) - block.VPos + 1;
}
}
}
2 changes: 1 addition & 1 deletion OCRResult.cs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public string Text

public override string ToString()
{
return $"{VPos},{Height} {Text}";
return $"{HPos},{VPos},{Width},{Height} : {Text}";
}

}
Expand Down
47 changes: 46 additions & 1 deletion TextHelper.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System.Text;
using System;
using System.Text;
using System.Text.RegularExpressions;

namespace GameOCRTTS
Expand Down Expand Up @@ -54,6 +55,50 @@ internal static string RemoveGarbageText(string text)
return result.ToString();
}

internal static TextBlock ProcessTextBlock(TextBlock block)
{
Regex rgxspec = new Regex("[^a-zA-Z0-9.,'?!: -]");
foreach (var line in block.Lines)
{
line.Words.RemoveAll(x => string.IsNullOrEmpty(rgxspec.Replace(x.Content.Replace("|", "I"), "").Trim()));
line.Words.RemoveAll(x => !IsValidWord(x.Content));
}
block.Lines.RemoveAll(x => x.WordsInLine == 1 && x.Text.Length <= 2);
block.Lines.RemoveAll(x => x.WordsInLine == 0);

return block;
}

private static bool IsValidWord(string word)
{
Regex rgx = new Regex("[^a-zA-Z0-9']");
Regex rgxnum = new Regex("[^0-9]");
Regex rgxvowel = new Regex("[^aeiouy]");

string strip = rgx.Replace(word, "")?.ToLower()?.Trim();
if (string.IsNullOrEmpty(strip))
return false;

if (strip.Length == 1 && strip != "i" && strip != "a")
return false;

string numbers = rgxnum.Replace(word, "");
bool onlynumbers = numbers.Length == strip.Length && numbers.Length > 0;
int vowels = rgxvowel.Replace(strip, "").Length;

if (strip.Length == 1 && !onlynumbers && strip != "i" && strip != "a")
return false;


if (!word.StartsWith("..") && strip.Length <= 5 && strip.Length > 1 && !onlynumbers && !strip.StartsWith("hm"))
{
if (vowels == 0 || vowels == strip.Length)
return false;
}

return true;
}

internal static string StripSpecialCharacters(string text)
{
if (string.IsNullOrEmpty(text))
Expand Down

0 comments on commit c4b8584

Please sign in to comment.