Initial commit
This commit is contained in:
commit
4aa1edb842
2
#research/New Text Document.txt
Normal file
2
#research/New Text Document.txt
Normal file
@ -0,0 +1,2 @@
|
||||
https://github.com/charlesw/tesseract/
|
||||
https://github.com/charlesw/tesseract-samples/tree/master
|
BIN
#research/tessdata-main.zip
Normal file
BIN
#research/tessdata-main.zip
Normal file
Binary file not shown.
BIN
#research/tesseract-samples-master.zip
Normal file
BIN
#research/tesseract-samples-master.zip
Normal file
Binary file not shown.
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
/.git
|
||||
/.vs
|
||||
/bin
|
||||
/obj
|
59
Form1.Designer.cs
generated
Normal file
59
Form1.Designer.cs
generated
Normal file
@ -0,0 +1,59 @@
|
||||
namespace WinFormsApp3
|
||||
{
|
||||
partial class Form1
|
||||
{
|
||||
/// <summary>
|
||||
/// Required designer variable.
|
||||
/// </summary>
|
||||
private System.ComponentModel.IContainer components = null;
|
||||
|
||||
/// <summary>
|
||||
/// Clean up any resources being used.
|
||||
/// </summary>
|
||||
/// <param name="disposing">true if managed resources should be disposed; otherwise, false.</param>
|
||||
protected override void Dispose(bool disposing)
|
||||
{
|
||||
if (disposing && (components != null))
|
||||
{
|
||||
components.Dispose();
|
||||
}
|
||||
base.Dispose(disposing);
|
||||
}
|
||||
|
||||
#region Windows Form Designer generated code
|
||||
|
||||
/// <summary>
|
||||
/// Required method for Designer support - do not modify
|
||||
/// the contents of this method with the code editor.
|
||||
/// </summary>
|
||||
private void InitializeComponent()
|
||||
{
|
||||
button1 = new Button();
|
||||
SuspendLayout();
|
||||
//
|
||||
// button1
|
||||
//
|
||||
button1.Location = new Point(574, 237);
|
||||
button1.Name = "button1";
|
||||
button1.Size = new Size(75, 23);
|
||||
button1.TabIndex = 0;
|
||||
button1.Text = "button1";
|
||||
button1.UseVisualStyleBackColor = true;
|
||||
button1.Click += button1_Click;
|
||||
//
|
||||
// Form1
|
||||
//
|
||||
AutoScaleDimensions = new SizeF(7F, 15F);
|
||||
AutoScaleMode = AutoScaleMode.Font;
|
||||
ClientSize = new Size(800, 450);
|
||||
Controls.Add(button1);
|
||||
Name = "Form1";
|
||||
Text = "Form1";
|
||||
ResumeLayout(false);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
private Button button1;
|
||||
}
|
||||
}
|
89
Form1.cs
Normal file
89
Form1.cs
Normal file
@ -0,0 +1,89 @@
|
||||
using System.Diagnostics;
|
||||
using Tesseract;
|
||||
|
||||
namespace WinFormsApp3
|
||||
{
|
||||
public partial class Form1 : Form
|
||||
{
|
||||
public Form1()
|
||||
{
|
||||
InitializeComponent();
|
||||
}
|
||||
|
||||
private void button1_Click(object sender, EventArgs e)
|
||||
{
|
||||
var basePath = Path.GetFullPath(Path.GetDirectoryName(Application.ExecutablePath) + "\\..\\..\\..\\");
|
||||
|
||||
var testImagePath = Path.Combine(basePath, "Scan_ADFPA_Letter_page-0001.jpg");
|
||||
|
||||
try
|
||||
{
|
||||
using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
|
||||
{
|
||||
using (var img = Pix.LoadFromFile(testImagePath))
|
||||
{
|
||||
using (var page = engine.Process(img))
|
||||
{
|
||||
//var tt1 = page.GetLSTMBoxText(0);
|
||||
//var tt1 = page.GetAltoText(0);
|
||||
//var tt1 = page.GetBoxText(0);
|
||||
//var tt1 = page.GetHOCRText(0);
|
||||
//var tt1 = page.GetMeanConfidence();
|
||||
//var tt1 = page.GetText();
|
||||
|
||||
var tt1 = page.GetUNLVText();
|
||||
|
||||
|
||||
//Console.WriteLine("Mean confidence: {0}", page.GetMeanConfidence());
|
||||
|
||||
//Console.WriteLine("Text (GetText): \r\n{0}", text);
|
||||
//Console.WriteLine("Text (iterator):");
|
||||
//using (var iter = page.GetIterator())
|
||||
//{
|
||||
// iter.Begin();
|
||||
|
||||
// do
|
||||
// {
|
||||
// do
|
||||
// {
|
||||
// do
|
||||
// {
|
||||
// do
|
||||
// {
|
||||
// if (iter.IsAtBeginningOf(PageIteratorLevel.Block))
|
||||
// {
|
||||
// Console.WriteLine("<BLOCK>");
|
||||
// }
|
||||
|
||||
// Console.Write(iter.GetText(PageIteratorLevel.Word));
|
||||
// Console.Write(" ");
|
||||
|
||||
// if (iter.IsAtFinalOf(PageIteratorLevel.TextLine, PageIteratorLevel.Word))
|
||||
// {
|
||||
// Console.WriteLine();
|
||||
// }
|
||||
// } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));
|
||||
|
||||
// if (iter.IsAtFinalOf(PageIteratorLevel.Para, PageIteratorLevel.TextLine))
|
||||
// {
|
||||
// Console.WriteLine();
|
||||
// }
|
||||
// } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));
|
||||
// } while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.Para));
|
||||
// } while (iter.Next(PageIteratorLevel.Block));
|
||||
//}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception exc)
|
||||
{
|
||||
Trace.TraceError(exc.ToString());
|
||||
Console.WriteLine("Unexpected Error: " + exc.Message);
|
||||
Console.WriteLine("Details: ");
|
||||
Console.WriteLine(exc.ToString());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
120
Form1.resx
Normal file
120
Form1.resx
Normal file
@ -0,0 +1,120 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<root>
|
||||
<!--
|
||||
Microsoft ResX Schema
|
||||
|
||||
Version 2.0
|
||||
|
||||
The primary goals of this format is to allow a simple XML format
|
||||
that is mostly human readable. The generation and parsing of the
|
||||
various data types are done through the TypeConverter classes
|
||||
associated with the data types.
|
||||
|
||||
Example:
|
||||
|
||||
... ado.net/XML headers & schema ...
|
||||
<resheader name="resmimetype">text/microsoft-resx</resheader>
|
||||
<resheader name="version">2.0</resheader>
|
||||
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
|
||||
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
|
||||
<data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
|
||||
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
|
||||
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
|
||||
<value>[base64 mime encoded serialized .NET Framework object]</value>
|
||||
</data>
|
||||
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
|
||||
<value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
|
||||
<comment>This is a comment</comment>
|
||||
</data>
|
||||
|
||||
There are any number of "resheader" rows that contain simple
|
||||
name/value pairs.
|
||||
|
||||
Each data row contains a name, and value. The row also contains a
|
||||
type or mimetype. Type corresponds to a .NET class that support
|
||||
text/value conversion through the TypeConverter architecture.
|
||||
Classes that don't support this are serialized and stored with the
|
||||
mimetype set.
|
||||
|
||||
The mimetype is used for serialized objects, and tells the
|
||||
ResXResourceReader how to depersist the object. This is currently not
|
||||
extensible. For a given mimetype the value must be set accordingly:
|
||||
|
||||
Note - application/x-microsoft.net.object.binary.base64 is the format
|
||||
that the ResXResourceWriter will generate, however the reader can
|
||||
read any of the formats listed below.
|
||||
|
||||
mimetype: application/x-microsoft.net.object.binary.base64
|
||||
value : The object must be serialized with
|
||||
: System.Runtime.Serialization.Formatters.Binary.BinaryFormatter
|
||||
: and then encoded with base64 encoding.
|
||||
|
||||
mimetype: application/x-microsoft.net.object.soap.base64
|
||||
value : The object must be serialized with
|
||||
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
|
||||
: and then encoded with base64 encoding.
|
||||
|
||||
mimetype: application/x-microsoft.net.object.bytearray.base64
|
||||
value : The object must be serialized into a byte array
|
||||
: using a System.ComponentModel.TypeConverter
|
||||
: and then encoded with base64 encoding.
|
||||
-->
|
||||
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
|
||||
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" />
|
||||
<xsd:element name="root" msdata:IsDataSet="true">
|
||||
<xsd:complexType>
|
||||
<xsd:choice maxOccurs="unbounded">
|
||||
<xsd:element name="metadata">
|
||||
<xsd:complexType>
|
||||
<xsd:sequence>
|
||||
<xsd:element name="value" type="xsd:string" minOccurs="0" />
|
||||
</xsd:sequence>
|
||||
<xsd:attribute name="name" use="required" type="xsd:string" />
|
||||
<xsd:attribute name="type" type="xsd:string" />
|
||||
<xsd:attribute name="mimetype" type="xsd:string" />
|
||||
<xsd:attribute ref="xml:space" />
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
<xsd:element name="assembly">
|
||||
<xsd:complexType>
|
||||
<xsd:attribute name="alias" type="xsd:string" />
|
||||
<xsd:attribute name="name" type="xsd:string" />
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
<xsd:element name="data">
|
||||
<xsd:complexType>
|
||||
<xsd:sequence>
|
||||
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
|
||||
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
|
||||
</xsd:sequence>
|
||||
<xsd:attribute name="name" type="xsd:string" use="required" msdata:Ordinal="1" />
|
||||
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
|
||||
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
|
||||
<xsd:attribute ref="xml:space" />
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
<xsd:element name="resheader">
|
||||
<xsd:complexType>
|
||||
<xsd:sequence>
|
||||
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
|
||||
</xsd:sequence>
|
||||
<xsd:attribute name="name" type="xsd:string" use="required" />
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
</xsd:choice>
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
</xsd:schema>
|
||||
<resheader name="resmimetype">
|
||||
<value>text/microsoft-resx</value>
|
||||
</resheader>
|
||||
<resheader name="version">
|
||||
<value>2.0</value>
|
||||
</resheader>
|
||||
<resheader name="reader">
|
||||
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
|
||||
</resheader>
|
||||
<resheader name="writer">
|
||||
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
|
||||
</resheader>
|
||||
</root>
|
17
Program.cs
Normal file
17
Program.cs
Normal file
@ -0,0 +1,17 @@
|
||||
namespace WinFormsApp3
|
||||
{
|
||||
internal static class Program
|
||||
{
|
||||
/// <summary>
|
||||
/// The main entry point for the application.
|
||||
/// </summary>
|
||||
[STAThread]
|
||||
static void Main()
|
||||
{
|
||||
// To customize application configuration such as set high DPI settings or default font,
|
||||
// see https://aka.ms/applicationconfiguration.
|
||||
ApplicationConfiguration.Initialize();
|
||||
Application.Run(new Form1());
|
||||
}
|
||||
}
|
||||
}
|
BIN
Scan_ADFPA_Letter_page-0001.jpg
Normal file
BIN
Scan_ADFPA_Letter_page-0001.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 692 KiB |
31
WinFormsApp3.csproj
Normal file
31
WinFormsApp3.csproj
Normal file
@ -0,0 +1,31 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>WinExe</OutputType>
|
||||
<TargetFramework>net8.0-windows</TargetFramework>
|
||||
<Nullable>enable</Nullable>
|
||||
<UseWindowsForms>true</UseWindowsForms>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Tesseract" Version="5.2.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Remove="C:\Users\rxl\.nuget\packages\tesseract\5.2.0\build\\..\x64\leptonica-1.82.0.dll" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Remove="C:\Users\rxl\.nuget\packages\tesseract\5.2.0\build\\..\x64\tesseract50.dll" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Remove="C:\Users\rxl\.nuget\packages\tesseract\5.2.0\build\\..\x86\leptonica-1.82.0.dll" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Remove="C:\Users\rxl\.nuget\packages\tesseract\5.2.0\build\\..\x86\tesseract50.dll" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
8
WinFormsApp3.csproj.user
Normal file
8
WinFormsApp3.csproj.user
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<Compile Update="Form1.cs">
|
||||
<SubType>Form</SubType>
|
||||
</Compile>
|
||||
</ItemGroup>
|
||||
</Project>
|
25
WinFormsApp3.sln
Normal file
25
WinFormsApp3.sln
Normal file
@ -0,0 +1,25 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio Version 17
|
||||
VisualStudioVersion = 17.9.34728.123
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "WinFormsApp3", "WinFormsApp3.csproj", "{3BF8E366-F9EF-484B-8FAF-F2ADB8854C41}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
Release|Any CPU = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{3BF8E366-F9EF-484B-8FAF-F2ADB8854C41}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{3BF8E366-F9EF-484B-8FAF-F2ADB8854C41}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{3BF8E366-F9EF-484B-8FAF-F2ADB8854C41}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{3BF8E366-F9EF-484B-8FAF-F2ADB8854C41}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||
SolutionGuid = {0EC31419-9703-4A29-BB28-FCE8A78193F6}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
Loading…
Reference in New Issue
Block a user