Console: Convert an image to a text.

Code samples for VintaSoft Imaging .NET SDK. Here you can request a code sample.

Moderator: Alex

Post Reply
Alex
Site Admin
Posts: 2364
Joined: Thu Jul 10, 2008 2:21 pm

Console: Convert an image to a text.

Post by Alex »

Here is C# example that shows how to convert an image to a text:

Code: Select all


namespace ConsoleApp1
{
    class Program
    {
        static void Main(string[] args)
        {
            string imageFilePath = "OCR.tif";

            string tesseractOcrPath = @"..\..\TesseractOCR";
            // create the OCR engine
            using (Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr tesseractOcr = new Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr(tesseractOcrPath))
            {
                // specify that OCR engine will recognize English text
                Vintasoft.Imaging.Ocr.OcrLanguage language = Vintasoft.Imaging.Ocr.OcrLanguage.English;
                // create the OCR engine settings
                Vintasoft.Imaging.Ocr.Tesseract.TesseractOcrSettings settings = new Vintasoft.Imaging.Ocr.Tesseract.TesseractOcrSettings(language);
                // initialize the OCR engine
                tesseractOcr.Init(settings);

                // load an image with text
                using (Vintasoft.Imaging.VintasoftImage image = new Vintasoft.Imaging.VintasoftImage(imageFilePath))
                {
                    // preprocess image before text recognition

                    // remove noise from image
                    Vintasoft.Imaging.ImageProcessing.Document.DespeckleCommand despeckleCommand = new Vintasoft.Imaging.ImageProcessing.Document.DespeckleCommand();
                    despeckleCommand.ExecuteInPlace(image);
                    // remove lines from image
                    Vintasoft.Imaging.ImageProcessing.Document.LineRemovalCommand lineRemovalCommand = new Vintasoft.Imaging.ImageProcessing.Document.LineRemovalCommand();
                    lineRemovalCommand.ExecuteInPlace(image);


                    // specify the image, where text must be recognized
                    tesseractOcr.SetImage(image);

                    // recognize text in the image
                    Vintasoft.Imaging.Ocr.Results.OcrPage ocrResult = tesseractOcr.Recognize();

                    // get the recognition result as NOT formatted text
                    string ocrResultAsText = ocrResult.GetText();
                    // save the recognition result as NOT formatted text
                    System.IO.File.WriteAllText("OCR-notFormatted.txt", ocrResultAsText, System.Text.Encoding.UTF8);

                    // get the recognition result as formatted text
                    string ocrResultAsFormattedText = ocrResult.GetFormattedText();
                    // save the recognition result as formatted text
                    System.IO.File.WriteAllText("OCR-formatted.txt", ocrResultAsFormattedText, System.Text.Encoding.UTF8);

                    // clear the image
                    tesseractOcr.ClearImage();
                }
                // shutdown the OCR engine
                tesseractOcr.Shutdown();
            }
        }
    }
}
Source codes of console application for VintaSoft Imaging .NET SDK 12 can be downloaded from here.
Post Reply