OCR: How to run OCR in the region of interest on image?
In This Topic
Here is an example that shows how to run OCR in the region of interest on image:
class OcrEngineRecognitionRegionExample
{
/// <summary>
/// Recognizes text in the specified image region using Tesseract OCR engine.
/// </summary>
/// <param name="filename">The name of file, which stores images with text.</param>
/// <param name="ocrLanguage">The language of recognizing text.</param>
public static void OcrImageUsingTesseractEngine(string filename, Vintasoft.Imaging.Ocr.OcrLanguage ocrLanguage)
{
// create an image collection
using (Vintasoft.Imaging.ImageCollection images = new Vintasoft.Imaging.ImageCollection())
{
// add images from file to the image collection
images.Add(filename);
System.Console.WriteLine("Create Tesseract OCR engine...");
// create the Tesseract OCR engine
using (Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr tesseractOcr =
new Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr())
{
System.Console.WriteLine("Initialize OCR engine...");
// init the Tesseract OCR engine
tesseractOcr.Init(new Vintasoft.Imaging.Ocr.OcrEngineSettings(ocrLanguage));
// for each image
foreach (Vintasoft.Imaging.VintasoftImage image in images)
{
// set the recognition region
tesseractOcr.RecognitionRegion =
new Vintasoft.Imaging.RegionOfInterest(0, 0, image.Width, image.Height / 2);
System.Console.WriteLine("Recognize the image...");
// recognize text in image
Vintasoft.Imaging.Ocr.Results.OcrPage ocrResult = tesseractOcr.Recognize(image);
// output the recognized text
System.Console.WriteLine("Page Text:");
System.Console.WriteLine(ocrResult.GetText());
System.Console.WriteLine();
}
// shutdown the Tesseract OCR engine
tesseractOcr.Shutdown();
}
// free images
images.ClearAndDisposeItems();
}
}
}
Class OcrEngineRecognitionRegionExample
''' <summary>
''' Recognizes text in the specified image region using Tesseract OCR engine.
''' </summary>
''' <param name="filename">The name of file, which stores images with text.</param>
''' <param name="ocrLanguage">The language of recognizing text.</param>
Public Shared Sub OcrImageUsingTesseractEngine(filename As String, ocrLanguage As Vintasoft.Imaging.Ocr.OcrLanguage)
' create an image collection
Using images As New Vintasoft.Imaging.ImageCollection()
' add images from file to the image collection
images.Add(filename)
System.Console.WriteLine("Create Tesseract OCR engine...")
' create the Tesseract OCR engine
Using tesseractOcr As New Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr()
System.Console.WriteLine("Initialize OCR engine...")
' init the Tesseract OCR engine
tesseractOcr.Init(New Vintasoft.Imaging.Ocr.OcrEngineSettings(ocrLanguage))
' for each image
For Each image As Vintasoft.Imaging.VintasoftImage In images
' set the recognition region
tesseractOcr.RecognitionRegion = New Vintasoft.Imaging.RegionOfInterest(0, 0, image.Width, image.Height \ 2)
System.Console.WriteLine("Recognize the image...")
' recognize text in image
Dim ocrResult As Vintasoft.Imaging.Ocr.Results.OcrPage = tesseractOcr.Recognize(image)
' output the recognized text
System.Console.WriteLine("Page Text:")
System.Console.WriteLine(ocrResult.GetText())
System.Console.WriteLine()
Next
' shutdown the Tesseract OCR engine
tesseractOcr.Shutdown()
End Using
' free images
images.ClearAndDisposeItems()
End Using
End Sub
End Class