VintaSoft Imaging .NET SDK v8.7
In This Topic
    OCR: How to recognize text in two languages?
    In This Topic

    Here is an example that shows how to recognize text written in English and German in consecutive order:

    ' The project, which uses this code, must have references to the following assemblies:
    ' - Vintasoft.Imaging
    ' - Vintasoft.Imaging.Ocr
    ' - Vintasoft.Imaging.Ocr.Tesseract
    
    Class RecognitionRegionExample
        ''' <summary>
        ''' Recognizes the English and German text in 2 image regions.
        ''' </summary>
        ''' <param name="filename">The name of file, which stores images with text.</param>
        Public Shared Sub OcrImageUsingTesseractEngine(filename As String)
            ' create the image collection
            Using images As New Vintasoft.Imaging.ImageCollection()
                ' add images to image collection
                images.Add(filename)
    
                System.Console.WriteLine("Create Tesseract OCR engine...")
                Using tesseractOcr As New Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr()
                    ' create OCR engine manager
                    Dim engineManager As New Vintasoft.Imaging.Ocr.OcrEngineManager(tesseractOcr)
    
                    Dim englishLanguage As Vintasoft.Imaging.Ocr.OcrLanguage = Vintasoft.Imaging.Ocr.OcrLanguage.English
                    Dim germanLanguage As Vintasoft.Imaging.Ocr.OcrLanguage = Vintasoft.Imaging.Ocr.OcrLanguage.German
    
                    ' create OCR settings
                    Dim settings As New Vintasoft.Imaging.Ocr.OcrEngineSettings(englishLanguage)
    
                    ' specify that image can contain English and German text
                    settings.Languages = New Vintasoft.Imaging.Ocr.OcrLanguage() {Vintasoft.Imaging.Ocr.OcrLanguage.English, Vintasoft.Imaging.Ocr.OcrLanguage.German}
    
                    ' for each image
                    For Each image As Vintasoft.Imaging.VintasoftImage In images
                        System.Console.WriteLine("Recognize the image...")
    
                        ' recognize text in image regions
                        Dim ocrResult As Vintasoft.Imaging.Ocr.Results.OcrPage = engineManager.Recognize(image, settings)
    
                        ' output the recognition result
                        System.Console.WriteLine("Page Text:")
                        System.Console.WriteLine(ocrResult.GetText())
                        System.Console.WriteLine()
                    Next
                End Using
    
                ' free images
                images.ClearAndDisposeItems()
            End Using
        End Sub
    End Class
                  
    
    // The project, which uses this code, must have references to the following assemblies:
    // - Vintasoft.Imaging
    // - Vintasoft.Imaging.Ocr
    // - Vintasoft.Imaging.Ocr.Tesseract
    
    class RecognitionRegionExample
    {
        /// <summary>
        /// Recognizes the English and German text in 2 image regions.
        /// </summary>
        /// <param name="filename">The name of file, which stores images with text.</param>
        public static void OcrImageUsingTesseractEngine(string filename)
        {
            // create the image collection
            using (Vintasoft.Imaging.ImageCollection images = new Vintasoft.Imaging.ImageCollection())
            {
                // add images to image collection
                images.Add(filename);
    
                System.Console.WriteLine("Create Tesseract OCR engine...");
                using (Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr tesseractOcr = 
                    new Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr())
                {
                    // create OCR engine manager
                    Vintasoft.Imaging.Ocr.OcrEngineManager engineManager = 
                        new Vintasoft.Imaging.Ocr.OcrEngineManager(tesseractOcr);
    
                    // create OCR settings
                    Vintasoft.Imaging.Ocr.OcrEngineSettings settings = 
                        new Vintasoft.Imaging.Ocr.OcrEngineSettings();
    
                    // specify that image can contain English and German text
                    settings.Languages = new Vintasoft.Imaging.Ocr.OcrLanguage[] {
                         Vintasoft.Imaging.Ocr.OcrLanguage.English,
                         Vintasoft.Imaging.Ocr.OcrLanguage.German };
    
                    // for each image
                    foreach (Vintasoft.Imaging.VintasoftImage image in images)
                    {
                        System.Console.WriteLine("Recognize the image...");
                        
                        // recognize text in image regions
                        Vintasoft.Imaging.Ocr.Results.OcrPage ocrResult = engineManager.Recognize(image, settings);
    
                        // output the recognition result
                        System.Console.WriteLine("Page Text:");
                        System.Console.WriteLine(ocrResult.GetText());
                        System.Console.WriteLine();
                    }
                }
    
                // free images
                images.ClearAndDisposeItems();
            }
        }
    }