OCR: How to recognize text in two languages?
In This Topic
Here is C#/VB.NET code that shows how to recognize text written in English and German languages:
class RecognitionRegionExample
{
/// <summary>
/// Recognizes the English and German text in 2 image regions.
/// </summary>
/// <param name="filename">The name of file, which stores images with text.</param>
public static void OcrImageUsingTesseractEngine(string filename)
{
// create the image collection
using (Vintasoft.Imaging.ImageCollection images = new Vintasoft.Imaging.ImageCollection())
{
// add images to image collection
images.Add(filename);
System.Console.WriteLine("Create Tesseract OCR engine...");
using (Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr tesseractOcr =
new Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr())
{
// create OCR engine manager
Vintasoft.Imaging.Ocr.OcrEngineManager engineManager =
new Vintasoft.Imaging.Ocr.OcrEngineManager(tesseractOcr);
// create OCR settings
Vintasoft.Imaging.Ocr.OcrEngineSettings settings =
new Vintasoft.Imaging.Ocr.OcrEngineSettings();
// specify that image can contain English and German text
settings.Languages = new Vintasoft.Imaging.Ocr.OcrLanguage[] {
Vintasoft.Imaging.Ocr.OcrLanguage.English,
Vintasoft.Imaging.Ocr.OcrLanguage.German };
// for each image
foreach (Vintasoft.Imaging.VintasoftImage image in images)
{
System.Console.WriteLine("Recognize the image...");
// recognize text in image regions
Vintasoft.Imaging.Ocr.Results.OcrPage ocrResult = engineManager.Recognize(image, settings);
// output the recognition result
System.Console.WriteLine("Page Text:");
System.Console.WriteLine(ocrResult.GetText());
System.Console.WriteLine();
}
}
// free images
images.ClearAndDisposeItems();
}
}
}
Class RecognitionRegionExample
''' <summary>
''' Recognizes the English and German text in 2 image regions.
''' </summary>
''' <param name="filename">The name of file, which stores images with text.</param>
Public Shared Sub OcrImageUsingTesseractEngine(filename As String)
' create the image collection
Using images As New Vintasoft.Imaging.ImageCollection()
' add images to image collection
images.Add(filename)
System.Console.WriteLine("Create Tesseract OCR engine...")
Using tesseractOcr As New Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr()
' create OCR engine manager
Dim engineManager As New Vintasoft.Imaging.Ocr.OcrEngineManager(tesseractOcr)
' create OCR settings
Dim settings As New Vintasoft.Imaging.Ocr.OcrEngineSettings()
' specify that image can contain English and German text
settings.Languages = New Vintasoft.Imaging.Ocr.OcrLanguage() {Vintasoft.Imaging.Ocr.OcrLanguage.English, Vintasoft.Imaging.Ocr.OcrLanguage.German}
' for each image
For Each image As Vintasoft.Imaging.VintasoftImage In images
System.Console.WriteLine("Recognize the image...")
' recognize text in image regions
Dim ocrResult As Vintasoft.Imaging.Ocr.Results.OcrPage = engineManager.Recognize(image, settings)
' output the recognition result
System.Console.WriteLine("Page Text:")
System.Console.WriteLine(ocrResult.GetText())
System.Console.WriteLine()
Next
End Using
' free images
images.ClearAndDisposeItems()
End Using
End Sub
End Class