commit 4aa1edb8424aa3c6e082d07aa843a8bbd77f346f Author: Ray Date: Fri May 24 11:55:28 2024 +0100 Initial commit diff --git a/#research/New Text Document.txt b/#research/New Text Document.txt new file mode 100644 index 0000000..034908d --- /dev/null +++ b/#research/New Text Document.txt @@ -0,0 +1,2 @@ +https://github.com/charlesw/tesseract/ +https://github.com/charlesw/tesseract-samples/tree/master \ No newline at end of file diff --git a/#research/tessdata-main.zip b/#research/tessdata-main.zip new file mode 100644 index 0000000..3615cce Binary files /dev/null and b/#research/tessdata-main.zip differ diff --git a/#research/tesseract-samples-master.zip b/#research/tesseract-samples-master.zip new file mode 100644 index 0000000..b30619a Binary files /dev/null and b/#research/tesseract-samples-master.zip differ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cb69396 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +/.git +/.vs +/bin +/obj diff --git a/Form1.Designer.cs b/Form1.Designer.cs new file mode 100644 index 0000000..01ecf79 --- /dev/null +++ b/Form1.Designer.cs @@ -0,0 +1,59 @@ +namespace WinFormsApp3 +{ + partial class Form1 + { + /// + /// Required designer variable. + /// + private System.ComponentModel.IContainer components = null; + + /// + /// Clean up any resources being used. + /// + /// true if managed resources should be disposed; otherwise, false. + protected override void Dispose(bool disposing) + { + if (disposing && (components != null)) + { + components.Dispose(); + } + base.Dispose(disposing); + } + + #region Windows Form Designer generated code + + /// + /// Required method for Designer support - do not modify + /// the contents of this method with the code editor. + /// + private void InitializeComponent() + { + button1 = new Button(); + SuspendLayout(); + // + // button1 + // + button1.Location = new Point(574, 237); + button1.Name = "button1"; + button1.Size = new Size(75, 23); + button1.TabIndex = 0; + button1.Text = "button1"; + button1.UseVisualStyleBackColor = true; + button1.Click += button1_Click; + // + // Form1 + // + AutoScaleDimensions = new SizeF(7F, 15F); + AutoScaleMode = AutoScaleMode.Font; + ClientSize = new Size(800, 450); + Controls.Add(button1); + Name = "Form1"; + Text = "Form1"; + ResumeLayout(false); + } + + #endregion + + private Button button1; + } +} diff --git a/Form1.cs b/Form1.cs new file mode 100644 index 0000000..1b69e1d --- /dev/null +++ b/Form1.cs @@ -0,0 +1,89 @@ +using System.Diagnostics; +using Tesseract; + +namespace WinFormsApp3 +{ + public partial class Form1 : Form + { + public Form1() + { + InitializeComponent(); + } + + private void button1_Click(object sender, EventArgs e) + { + var basePath = Path.GetFullPath(Path.GetDirectoryName(Application.ExecutablePath) + "\\..\\..\\..\\"); + + var testImagePath = Path.Combine(basePath, "Scan_ADFPA_Letter_page-0001.jpg"); + + try + { + using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) + { + using (var img = Pix.LoadFromFile(testImagePath)) + { + using (var page = engine.Process(img)) + { + //var tt1 = page.GetLSTMBoxText(0); + //var tt1 = page.GetAltoText(0); + //var tt1 = page.GetBoxText(0); + //var tt1 = page.GetHOCRText(0); + //var tt1 = page.GetMeanConfidence(); + //var tt1 = page.GetText(); + + var tt1 = page.GetUNLVText(); + + + //Console.WriteLine("Mean confidence: {0}", page.GetMeanConfidence()); + + //Console.WriteLine("Text (GetText): \r\n{0}", text); + //Console.WriteLine("Text (iterator):"); + //using (var iter = page.GetIterator()) + //{ + // iter.Begin(); + + // do + // { + // do + // { + // do + // { + // do + // { + // if (iter.IsAtBeginningOf(PageIteratorLevel.Block)) + // { + // Console.WriteLine(""); + // } + + // Console.Write(iter.GetText(PageIteratorLevel.Word)); + // Console.Write(" "); + + // if (iter.IsAtFinalOf(PageIteratorLevel.TextLine, PageIteratorLevel.Word)) + // { + // Console.WriteLine(); + // } + // } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word)); + + // if (iter.IsAtFinalOf(PageIteratorLevel.Para, PageIteratorLevel.TextLine)) + // { + // Console.WriteLine(); + // } + // } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine)); + // } while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.Para)); + // } while (iter.Next(PageIteratorLevel.Block)); + //} + } + } + } + } + catch (Exception exc) + { + Trace.TraceError(exc.ToString()); + Console.WriteLine("Unexpected Error: " + exc.Message); + Console.WriteLine("Details: "); + Console.WriteLine(exc.ToString()); + } + } + + } +} \ No newline at end of file diff --git a/Form1.resx b/Form1.resx new file mode 100644 index 0000000..af32865 --- /dev/null +++ b/Form1.resx @@ -0,0 +1,120 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text/microsoft-resx + + + 2.0 + + + System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + \ No newline at end of file diff --git a/Program.cs b/Program.cs new file mode 100644 index 0000000..cf31615 --- /dev/null +++ b/Program.cs @@ -0,0 +1,17 @@ +namespace WinFormsApp3 +{ + internal static class Program + { + /// + /// The main entry point for the application. + /// + [STAThread] + static void Main() + { + // To customize application configuration such as set high DPI settings or default font, + // see https://aka.ms/applicationconfiguration. + ApplicationConfiguration.Initialize(); + Application.Run(new Form1()); + } + } +} \ No newline at end of file diff --git a/Scan_ADFPA_Letter_page-0001.jpg b/Scan_ADFPA_Letter_page-0001.jpg new file mode 100644 index 0000000..dedd164 Binary files /dev/null and b/Scan_ADFPA_Letter_page-0001.jpg differ diff --git a/WinFormsApp3.csproj b/WinFormsApp3.csproj new file mode 100644 index 0000000..c8cf3ec --- /dev/null +++ b/WinFormsApp3.csproj @@ -0,0 +1,31 @@ + + + + WinExe + net8.0-windows + enable + true + enable + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/WinFormsApp3.csproj.user b/WinFormsApp3.csproj.user new file mode 100644 index 0000000..7814ea2 --- /dev/null +++ b/WinFormsApp3.csproj.user @@ -0,0 +1,8 @@ + + + + + Form + + + diff --git a/WinFormsApp3.sln b/WinFormsApp3.sln new file mode 100644 index 0000000..7f741e9 --- /dev/null +++ b/WinFormsApp3.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.9.34728.123 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "WinFormsApp3", "WinFormsApp3.csproj", "{3BF8E366-F9EF-484B-8FAF-F2ADB8854C41}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {3BF8E366-F9EF-484B-8FAF-F2ADB8854C41}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {3BF8E366-F9EF-484B-8FAF-F2ADB8854C41}.Debug|Any CPU.Build.0 = Debug|Any CPU + {3BF8E366-F9EF-484B-8FAF-F2ADB8854C41}.Release|Any CPU.ActiveCfg = Release|Any CPU + {3BF8E366-F9EF-484B-8FAF-F2ADB8854C41}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {0EC31419-9703-4A29-BB28-FCE8A78193F6} + EndGlobalSection +EndGlobal