ocr-tesseract/Form1.cs

198 lines
7.1 KiB
C#
Raw Normal View History

2024-05-24 10:55:28 +00:00
using System.Diagnostics;
using System.Windows.Forms;
using Inlite.ClearImageNet;
2024-05-24 10:55:28 +00:00
using Tesseract;
namespace WinFormsApp3
{
public partial class Form1 : Form
{
private readonly string _basePath;
private readonly string _tessDataPath;
2024-05-24 10:55:28 +00:00
public Form1()
{
InitializeComponent();
_basePath = Path.GetFullPath(Path.GetDirectoryName(Application.ExecutablePath) + "\\..\\..\\..\\");
_tessDataPath = Path.Combine(_basePath, "#research", "tessdata");
}
protected override void OnLoad(EventArgs e)
{
base.OnLoad(e);
richTextBox1.Clear();
2024-05-24 10:55:28 +00:00
}
private void button1_Click(object sender, EventArgs e)
{
var testImagePath = Path.Combine(_basePath, "#research", "Passport Demand Study Sample Letter.jpg");
richTextBox1.Text += ParseOCR(testImagePath);
richTextBox1.Text += Environment.NewLine;
}
private void button2_Click(object sender, EventArgs e)
{
var testImagePath = Path.Combine(_basePath, "#research", "Passport Demand Study Sample Letter.jpg");
var barcodes = ReadBarcode(testImagePath);
foreach (var barcode in barcodes)
{
richTextBox1.Text += $"Barcode Type = {barcode.Type} | Value = {barcode.Text} | Page = {barcode.Page}";
richTextBox1.Text += Environment.NewLine;
}
}
private void button3_Click(object sender, EventArgs e)
{
var testImagePath = Path.Combine(_basePath, "#research", "Passport Demand Study Sample Letter.jpg");
richTextBox1.Text += Environment.NewLine;
}
////var basePath = Path.GetFullPath(Path.GetDirectoryName(Application.ExecutablePath) + "\\..\\..\\..\\");
////var tt2 = File.Exists(testImagePath);
//try
//{
// using (var engine = new TesseractEngine(_tessDataPath, "eng", EngineMode.Default))
// {
// using (var img = Pix.LoadFromFile(testImagePath))
// {
// using (var page = engine.Process(img))
// {
// Console.WriteLine("Mean Confidence = {0}", page.GetMeanConfidence());
// //var tt1 = page.GetLSTMBoxText(0);
// //var tt1 = page.GetAltoText(0);
// //var tt1 = page.GetBoxText(0);
// //var tt1 = page.GetHOCRText(0);
// //var tt1 = page.GetMeanConfidence();
// var tt1 = page.GetText();
// //var tt1 = page.GetUNLVText();
2024-05-24 10:55:28 +00:00
// //Console.WriteLine("Text (GetText): \r\n{0}", text);
// //Console.WriteLine("Text (iterator):");
// //using (var iter = page.GetIterator())
// //{
// // iter.Begin();
// // do
// // {
// // do
// // {
// // do
// // {
// // do
// // {
// // if (iter.IsAtBeginningOf(PageIteratorLevel.Block))
// // {
// // Console.WriteLine("<BLOCK>");
// // }
// // Console.Write(iter.GetText(PageIteratorLevel.Word));
// // Console.Write(" ");
// // if (iter.IsAtFinalOf(PageIteratorLevel.TextLine, PageIteratorLevel.Word))
// // {
// // Console.WriteLine();
// // }
// // } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));
// // if (iter.IsAtFinalOf(PageIteratorLevel.Para, PageIteratorLevel.TextLine))
// // {
// // Console.WriteLine();
// // }
// // } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));
// // } while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.Para));
// // } while (iter.Next(PageIteratorLevel.Block));
// //}
// }
// }
// }
//}
//catch (Exception exc)
//{
// //Trace.TraceError(exc.ToString());
// Console.WriteLine("Unexpected Error: " + exc.Message);
// //Console.WriteLine("Details: ");
// //Console.WriteLine(exc.ToString());
//}
//}
private string ParseOCR(string filename)
{
string response = null;
2024-05-24 10:55:28 +00:00
try
{
using (var engine = new TesseractEngine(_tessDataPath, "eng", EngineMode.Default))
2024-05-24 10:55:28 +00:00
{
using (var img = Pix.LoadFromFile(filename))
2024-05-24 10:55:28 +00:00
{
using (var page = engine.Process(img))
{
Debug.WriteLine("Mean Confidence = {0}", page.GetMeanConfidence());
//var tt2 = page.GetAltoText(0);
//var tt3 = page.GetBoxText(0);
//var tt4 = page.GetHOCRText(0);
2024-05-24 10:55:28 +00:00
//var tt1 = page.GetLSTMBoxText(0);
//var tt5 = page.GetMeanConfidence();
//var tt7 = page.GetTsvText(0);
//var tt6 = page.GetUNLVText();
response = page.GetText();
2024-05-24 10:55:28 +00:00
}
}
}
}
catch (Exception exc)
{
// error
}
return response;
}
private Barcode[] ReadBarcode(string filename)
{
var result = new Barcode[0];
try
{
using (BarcodeReader reader = new BarcodeReader())
{
reader.Code39 = true;
reader.Code128 = true;
reader.Upca = true;
reader.Upce = true;
reader.QR = true;
result = reader.Read(filename, 0);
//foreach (var barcode in result)
//{
// Debug.WriteLine($"Barcode type: {barcode.Type} Barcode page: {barcode.Page} Text: {barcode.Text}");
//}
}
}
catch (Exception exc)
{
Debug.WriteLine(exc.Message);
2024-05-24 10:55:28 +00:00
}
return result;
2024-05-24 10:55:28 +00:00
}
}
}