Code: Select all
namespace ConsoleApp1
{
class Program
{
static void Main(string[] args)
{
string imageFilePath = "OCR.tif";
string tesseractOcrPath = @"..\..\TesseractOCR";
// create the OCR engine
using (Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr tesseractOcr = new Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr(tesseractOcrPath))
{
// specify that OCR engine will recognize English text
Vintasoft.Imaging.Ocr.OcrLanguage language = Vintasoft.Imaging.Ocr.OcrLanguage.English;
// create the OCR engine settings
Vintasoft.Imaging.Ocr.Tesseract.TesseractOcrSettings settings = new Vintasoft.Imaging.Ocr.Tesseract.TesseractOcrSettings(language);
// initialize the OCR engine
tesseractOcr.Init(settings);
// load an image with text
using (Vintasoft.Imaging.VintasoftImage image = new Vintasoft.Imaging.VintasoftImage(imageFilePath))
{
// preprocess image before text recognition
// remove noise from image
Vintasoft.Imaging.ImageProcessing.Document.DespeckleCommand despeckleCommand = new Vintasoft.Imaging.ImageProcessing.Document.DespeckleCommand();
despeckleCommand.ExecuteInPlace(image);
// remove lines from image
Vintasoft.Imaging.ImageProcessing.Document.LineRemovalCommand lineRemovalCommand = new Vintasoft.Imaging.ImageProcessing.Document.LineRemovalCommand();
lineRemovalCommand.ExecuteInPlace(image);
// specify the image, where text must be recognized
tesseractOcr.SetImage(image);
// recognize text in the image
Vintasoft.Imaging.Ocr.Results.OcrPage ocrResult = tesseractOcr.Recognize();
// get the recognition result as NOT formatted text
string ocrResultAsText = ocrResult.GetText();
// save the recognition result as NOT formatted text
System.IO.File.WriteAllText("OCR-notFormatted.txt", ocrResultAsText, System.Text.Encoding.UTF8);
// get the recognition result as formatted text
string ocrResultAsFormattedText = ocrResult.GetFormattedText();
// save the recognition result as formatted text
System.IO.File.WriteAllText("OCR-formatted.txt", ocrResultAsFormattedText, System.Text.Encoding.UTF8);
// clear the image
tesseractOcr.ClearImage();
}
// shutdown the OCR engine
tesseractOcr.Shutdown();
}
}
}
}