Initial commit
This commit is contained in:
commit
4aa1edb842
2
#research/New Text Document.txt
Normal file
2
#research/New Text Document.txt
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
https://github.com/charlesw/tesseract/
|
||||||
|
https://github.com/charlesw/tesseract-samples/tree/master
|
BIN
#research/tessdata-main.zip
Normal file
BIN
#research/tessdata-main.zip
Normal file
Binary file not shown.
BIN
#research/tesseract-samples-master.zip
Normal file
BIN
#research/tesseract-samples-master.zip
Normal file
Binary file not shown.
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
/.git
|
||||||
|
/.vs
|
||||||
|
/bin
|
||||||
|
/obj
|
59
Form1.Designer.cs
generated
Normal file
59
Form1.Designer.cs
generated
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
namespace WinFormsApp3
|
||||||
|
{
|
||||||
|
partial class Form1
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// Required designer variable.
|
||||||
|
/// </summary>
|
||||||
|
private System.ComponentModel.IContainer components = null;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Clean up any resources being used.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="disposing">true if managed resources should be disposed; otherwise, false.</param>
|
||||||
|
protected override void Dispose(bool disposing)
|
||||||
|
{
|
||||||
|
if (disposing && (components != null))
|
||||||
|
{
|
||||||
|
components.Dispose();
|
||||||
|
}
|
||||||
|
base.Dispose(disposing);
|
||||||
|
}
|
||||||
|
|
||||||
|
#region Windows Form Designer generated code
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Required method for Designer support - do not modify
|
||||||
|
/// the contents of this method with the code editor.
|
||||||
|
/// </summary>
|
||||||
|
private void InitializeComponent()
|
||||||
|
{
|
||||||
|
button1 = new Button();
|
||||||
|
SuspendLayout();
|
||||||
|
//
|
||||||
|
// button1
|
||||||
|
//
|
||||||
|
button1.Location = new Point(574, 237);
|
||||||
|
button1.Name = "button1";
|
||||||
|
button1.Size = new Size(75, 23);
|
||||||
|
button1.TabIndex = 0;
|
||||||
|
button1.Text = "button1";
|
||||||
|
button1.UseVisualStyleBackColor = true;
|
||||||
|
button1.Click += button1_Click;
|
||||||
|
//
|
||||||
|
// Form1
|
||||||
|
//
|
||||||
|
AutoScaleDimensions = new SizeF(7F, 15F);
|
||||||
|
AutoScaleMode = AutoScaleMode.Font;
|
||||||
|
ClientSize = new Size(800, 450);
|
||||||
|
Controls.Add(button1);
|
||||||
|
Name = "Form1";
|
||||||
|
Text = "Form1";
|
||||||
|
ResumeLayout(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
private Button button1;
|
||||||
|
}
|
||||||
|
}
|
89
Form1.cs
Normal file
89
Form1.cs
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
using System.Diagnostics;
|
||||||
|
using Tesseract;
|
||||||
|
|
||||||
|
namespace WinFormsApp3
|
||||||
|
{
|
||||||
|
public partial class Form1 : Form
|
||||||
|
{
|
||||||
|
public Form1()
|
||||||
|
{
|
||||||
|
InitializeComponent();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void button1_Click(object sender, EventArgs e)
|
||||||
|
{
|
||||||
|
var basePath = Path.GetFullPath(Path.GetDirectoryName(Application.ExecutablePath) + "\\..\\..\\..\\");
|
||||||
|
|
||||||
|
var testImagePath = Path.Combine(basePath, "Scan_ADFPA_Letter_page-0001.jpg");
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
|
||||||
|
{
|
||||||
|
using (var img = Pix.LoadFromFile(testImagePath))
|
||||||
|
{
|
||||||
|
using (var page = engine.Process(img))
|
||||||
|
{
|
||||||
|
//var tt1 = page.GetLSTMBoxText(0);
|
||||||
|
//var tt1 = page.GetAltoText(0);
|
||||||
|
//var tt1 = page.GetBoxText(0);
|
||||||
|
//var tt1 = page.GetHOCRText(0);
|
||||||
|
//var tt1 = page.GetMeanConfidence();
|
||||||
|
//var tt1 = page.GetText();
|
||||||
|
|
||||||
|
var tt1 = page.GetUNLVText();
|
||||||
|
|
||||||
|
|
||||||
|
//Console.WriteLine("Mean confidence: {0}", page.GetMeanConfidence());
|
||||||
|
|
||||||
|
//Console.WriteLine("Text (GetText): \r\n{0}", text);
|
||||||
|
//Console.WriteLine("Text (iterator):");
|
||||||
|
//using (var iter = page.GetIterator())
|
||||||
|
//{
|
||||||
|
// iter.Begin();
|
||||||
|
|
||||||
|
// do
|
||||||
|
// {
|
||||||
|
// do
|
||||||
|
// {
|
||||||
|
// do
|
||||||
|
// {
|
||||||
|
// do
|
||||||
|
// {
|
||||||
|
// if (iter.IsAtBeginningOf(PageIteratorLevel.Block))
|
||||||
|
// {
|
||||||
|
// Console.WriteLine("<BLOCK>");
|
||||||
|
// }
|
||||||
|
|
||||||
|
// Console.Write(iter.GetText(PageIteratorLevel.Word));
|
||||||
|
// Console.Write(" ");
|
||||||
|
|
||||||
|
// if (iter.IsAtFinalOf(PageIteratorLevel.TextLine, PageIteratorLevel.Word))
|
||||||
|
// {
|
||||||
|
// Console.WriteLine();
|
||||||
|
// }
|
||||||
|
// } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));
|
||||||
|
|
||||||
|
// if (iter.IsAtFinalOf(PageIteratorLevel.Para, PageIteratorLevel.TextLine))
|
||||||
|
// {
|
||||||
|
// Console.WriteLine();
|
||||||
|
// }
|
||||||
|
// } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));
|
||||||
|
// } while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.Para));
|
||||||
|
// } while (iter.Next(PageIteratorLevel.Block));
|
||||||
|
//}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception exc)
|
||||||
|
{
|
||||||
|
Trace.TraceError(exc.ToString());
|
||||||
|
Console.WriteLine("Unexpected Error: " + exc.Message);
|
||||||
|
Console.WriteLine("Details: ");
|
||||||
|
Console.WriteLine(exc.ToString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
120
Form1.resx
Normal file
120
Form1.resx
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<root>
|
||||||
|
<!--
|
||||||
|
Microsoft ResX Schema
|
||||||
|
|
||||||
|
Version 2.0
|
||||||
|
|
||||||
|
The primary goals of this format is to allow a simple XML format
|
||||||
|
that is mostly human readable. The generation and parsing of the
|
||||||
|
various data types are done through the TypeConverter classes
|
||||||
|
associated with the data types.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
... ado.net/XML headers & schema ...
|
||||||
|
<resheader name="resmimetype">text/microsoft-resx</resheader>
|
||||||
|
<resheader name="version">2.0</resheader>
|
||||||
|
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
|
||||||
|
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
|
||||||
|
<data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
|
||||||
|
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
|
||||||
|
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
|
||||||
|
<value>[base64 mime encoded serialized .NET Framework object]</value>
|
||||||
|
</data>
|
||||||
|
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
|
||||||
|
<value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
|
||||||
|
<comment>This is a comment</comment>
|
||||||
|
</data>
|
||||||
|
|
||||||
|
There are any number of "resheader" rows that contain simple
|
||||||
|
name/value pairs.
|
||||||
|
|
||||||
|
Each data row contains a name, and value. The row also contains a
|
||||||
|
type or mimetype. Type corresponds to a .NET class that support
|
||||||
|
text/value conversion through the TypeConverter architecture.
|
||||||
|
Classes that don't support this are serialized and stored with the
|
||||||
|
mimetype set.
|
||||||
|
|
||||||
|
The mimetype is used for serialized objects, and tells the
|
||||||
|
ResXResourceReader how to depersist the object. This is currently not
|
||||||
|
extensible. For a given mimetype the value must be set accordingly:
|
||||||
|
|
||||||
|
Note - application/x-microsoft.net.object.binary.base64 is the format
|
||||||
|
that the ResXResourceWriter will generate, however the reader can
|
||||||
|
read any of the formats listed below.
|
||||||
|
|
||||||
|
mimetype: application/x-microsoft.net.object.binary.base64
|
||||||
|
value : The object must be serialized with
|
||||||
|
: System.Runtime.Serialization.Formatters.Binary.BinaryFormatter
|
||||||
|
: and then encoded with base64 encoding.
|
||||||
|
|
||||||
|
mimetype: application/x-microsoft.net.object.soap.base64
|
||||||
|
value : The object must be serialized with
|
||||||
|
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
|
||||||
|
: and then encoded with base64 encoding.
|
||||||
|
|
||||||
|
mimetype: application/x-microsoft.net.object.bytearray.base64
|
||||||
|
value : The object must be serialized into a byte array
|
||||||
|
: using a System.ComponentModel.TypeConverter
|
||||||
|
: and then encoded with base64 encoding.
|
||||||
|
-->
|
||||||
|
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
|
||||||
|
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" />
|
||||||
|
<xsd:element name="root" msdata:IsDataSet="true">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:choice maxOccurs="unbounded">
|
||||||
|
<xsd:element name="metadata">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="value" type="xsd:string" minOccurs="0" />
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="name" use="required" type="xsd:string" />
|
||||||
|
<xsd:attribute name="type" type="xsd:string" />
|
||||||
|
<xsd:attribute name="mimetype" type="xsd:string" />
|
||||||
|
<xsd:attribute ref="xml:space" />
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
<xsd:element name="assembly">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:attribute name="alias" type="xsd:string" />
|
||||||
|
<xsd:attribute name="name" type="xsd:string" />
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
<xsd:element name="data">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
|
||||||
|
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="name" type="xsd:string" use="required" msdata:Ordinal="1" />
|
||||||
|
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
|
||||||
|
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
|
||||||
|
<xsd:attribute ref="xml:space" />
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
<xsd:element name="resheader">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="name" type="xsd:string" use="required" />
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
</xsd:choice>
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
</xsd:schema>
|
||||||
|
<resheader name="resmimetype">
|
||||||
|
<value>text/microsoft-resx</value>
|
||||||
|
</resheader>
|
||||||
|
<resheader name="version">
|
||||||
|
<value>2.0</value>
|
||||||
|
</resheader>
|
||||||
|
<resheader name="reader">
|
||||||
|
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
|
||||||
|
</resheader>
|
||||||
|
<resheader name="writer">
|
||||||
|
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
|
||||||
|
</resheader>
|
||||||
|
</root>
|
17
Program.cs
Normal file
17
Program.cs
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
namespace WinFormsApp3
|
||||||
|
{
|
||||||
|
internal static class Program
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// The main entry point for the application.
|
||||||
|
/// </summary>
|
||||||
|
[STAThread]
|
||||||
|
static void Main()
|
||||||
|
{
|
||||||
|
// To customize application configuration such as set high DPI settings or default font,
|
||||||
|
// see https://aka.ms/applicationconfiguration.
|
||||||
|
ApplicationConfiguration.Initialize();
|
||||||
|
Application.Run(new Form1());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
BIN
Scan_ADFPA_Letter_page-0001.jpg
Normal file
BIN
Scan_ADFPA_Letter_page-0001.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 692 KiB |
31
WinFormsApp3.csproj
Normal file
31
WinFormsApp3.csproj
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<OutputType>WinExe</OutputType>
|
||||||
|
<TargetFramework>net8.0-windows</TargetFramework>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
<UseWindowsForms>true</UseWindowsForms>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<PackageReference Include="Tesseract" Version="5.2.0" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<None Remove="C:\Users\rxl\.nuget\packages\tesseract\5.2.0\build\\..\x64\leptonica-1.82.0.dll" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<None Remove="C:\Users\rxl\.nuget\packages\tesseract\5.2.0\build\\..\x64\tesseract50.dll" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<None Remove="C:\Users\rxl\.nuget\packages\tesseract\5.2.0\build\\..\x86\leptonica-1.82.0.dll" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<None Remove="C:\Users\rxl\.nuget\packages\tesseract\5.2.0\build\\..\x86\tesseract50.dll" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
8
WinFormsApp3.csproj.user
Normal file
8
WinFormsApp3.csproj.user
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<Project ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<ItemGroup>
|
||||||
|
<Compile Update="Form1.cs">
|
||||||
|
<SubType>Form</SubType>
|
||||||
|
</Compile>
|
||||||
|
</ItemGroup>
|
||||||
|
</Project>
|
25
WinFormsApp3.sln
Normal file
25
WinFormsApp3.sln
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
|
||||||
|
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||||
|
# Visual Studio Version 17
|
||||||
|
VisualStudioVersion = 17.9.34728.123
|
||||||
|
MinimumVisualStudioVersion = 10.0.40219.1
|
||||||
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "WinFormsApp3", "WinFormsApp3.csproj", "{3BF8E366-F9EF-484B-8FAF-F2ADB8854C41}"
|
||||||
|
EndProject
|
||||||
|
Global
|
||||||
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
|
Debug|Any CPU = Debug|Any CPU
|
||||||
|
Release|Any CPU = Release|Any CPU
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||||
|
{3BF8E366-F9EF-484B-8FAF-F2ADB8854C41}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{3BF8E366-F9EF-484B-8FAF-F2ADB8854C41}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{3BF8E366-F9EF-484B-8FAF-F2ADB8854C41}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{3BF8E366-F9EF-484B-8FAF-F2ADB8854C41}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
|
HideSolutionNode = FALSE
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||||
|
SolutionGuid = {0EC31419-9703-4A29-BB28-FCE8A78193F6}
|
||||||
|
EndGlobalSection
|
||||||
|
EndGlobal
|
Loading…
Reference in New Issue
Block a user