ocr-tesseract/Form1.cs
2024-05-24 11:55:28 +01:00

89 lines
3.8 KiB
C#

using System.Diagnostics;
using Tesseract;
namespace WinFormsApp3
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
var basePath = Path.GetFullPath(Path.GetDirectoryName(Application.ExecutablePath) + "\\..\\..\\..\\");
var testImagePath = Path.Combine(basePath, "Scan_ADFPA_Letter_page-0001.jpg");
try
{
using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
{
using (var img = Pix.LoadFromFile(testImagePath))
{
using (var page = engine.Process(img))
{
//var tt1 = page.GetLSTMBoxText(0);
//var tt1 = page.GetAltoText(0);
//var tt1 = page.GetBoxText(0);
//var tt1 = page.GetHOCRText(0);
//var tt1 = page.GetMeanConfidence();
//var tt1 = page.GetText();
var tt1 = page.GetUNLVText();
//Console.WriteLine("Mean confidence: {0}", page.GetMeanConfidence());
//Console.WriteLine("Text (GetText): \r\n{0}", text);
//Console.WriteLine("Text (iterator):");
//using (var iter = page.GetIterator())
//{
// iter.Begin();
// do
// {
// do
// {
// do
// {
// do
// {
// if (iter.IsAtBeginningOf(PageIteratorLevel.Block))
// {
// Console.WriteLine("<BLOCK>");
// }
// Console.Write(iter.GetText(PageIteratorLevel.Word));
// Console.Write(" ");
// if (iter.IsAtFinalOf(PageIteratorLevel.TextLine, PageIteratorLevel.Word))
// {
// Console.WriteLine();
// }
// } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));
// if (iter.IsAtFinalOf(PageIteratorLevel.Para, PageIteratorLevel.TextLine))
// {
// Console.WriteLine();
// }
// } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));
// } while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.Para));
// } while (iter.Next(PageIteratorLevel.Block));
//}
}
}
}
}
catch (Exception exc)
{
Trace.TraceError(exc.ToString());
Console.WriteLine("Unexpected Error: " + exc.Message);
Console.WriteLine("Details: ");
Console.WriteLine(exc.ToString());
}
}
}
}