VintaSoft Imaging .NET SDK v8.7
In This Topic
    OCR: How to recognize text in two languages?
    In This Topic

    Here is an example that shows how to recognize text written in English and German in consecutive order:

    ' The project, which uses this code, must have references to the following assemblies:
    ' - Vintasoft.Imaging
    ' - Vintasoft.Imaging.Ocr
    ' - Vintasoft.Imaging.Ocr.Tesseract
    
    Class RecognitionRegionExample
            ''' <summary>
            ''' Recognizes the English and German text in 2 image regions.
            ''' </summary>
            ''' <param name="filename">The name of file, which stores images with text.</param>
            Public Shared Sub OcrImageUsingTesseractEngine(filename As String)
                    ' create the image collection
                    Using images As New Vintasoft.Imaging.ImageCollection()
                            ' add images to image collection
                            images.Add(filename)
    
                            System.Console.WriteLine("Create Tesseract OCR engine...")
                            Using tesseractOcr As New Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr()
                                    ' create OCR engine manager
                                    Dim engineManager As New Vintasoft.Imaging.Ocr.OcrEngineManager(tesseractOcr)
    
                                    Dim englishLanguage As Vintasoft.Imaging.Ocr.OcrLanguage = Vintasoft.Imaging.Ocr.OcrLanguage.English
                                    Dim germanLanguage As Vintasoft.Imaging.Ocr.OcrLanguage = Vintasoft.Imaging.Ocr.OcrLanguage.German
    
                                    ' create OCR settings
                                    Dim settings As New Vintasoft.Imaging.Ocr.OcrEngineSettings(englishLanguage)
    
                                    ' for each image
                                    For Each image As Vintasoft.Imaging.VintasoftImage In images
                                            ' create the recognition regions
                                            Dim regions As Vintasoft.Imaging.Ocr.RecognitionRegion() = New Vintasoft.Imaging.Ocr.RecognitionRegion(1) {}
                                            regions(0) = New Vintasoft.Imaging.Ocr.RecognitionRegion(New Vintasoft.Imaging.RegionOfInterest(0, 0, image.Width, image.Height \ 2), englishLanguage)
                                            regions(1) = New Vintasoft.Imaging.Ocr.RecognitionRegion(New Vintasoft.Imaging.RegionOfInterest(0, image.Height \ 2, image.Width, image.Height \ 2), germanLanguage)
    
                                            System.Console.WriteLine("Recognize the image...")
    
                                            ' recognize text in image regions
                                            Dim ocrResult As Vintasoft.Imaging.Ocr.Results.OcrPage = engineManager.Recognize(image, settings, regions)
    
                                            ' output the recognition result
                                            System.Console.WriteLine("Page Text:")
                                            System.Console.WriteLine(ocrResult.GetText())
                                            System.Console.WriteLine()
                                    Next
                            End Using
    
                            ' free images
                            images.ClearAndDisposeItems()
                    End Using
            End Sub
    End Class
                  
    
    // The project, which uses this code, must have references to the following assemblies:
    // - Vintasoft.Imaging
    // - Vintasoft.Imaging.Ocr
    // - Vintasoft.Imaging.Ocr.Tesseract
    
    class RecognitionRegionExample
    {
        /// <summary>
        /// Recognizes the English and German text in 2 image regions.
        /// </summary>
        /// <param name="filename">The name of file, which stores images with text.</param>
        public static void OcrImageUsingTesseractEngine(string filename)
        {
            // create the image collection
            using (Vintasoft.Imaging.ImageCollection images = new Vintasoft.Imaging.ImageCollection())
            {
                // add images to image collection
                images.Add(filename);
    
                System.Console.WriteLine("Create Tesseract OCR engine...");
                using (Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr tesseractOcr = 
                    new Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr())
                {
                    // create OCR engine manager
                    Vintasoft.Imaging.Ocr.OcrEngineManager engineManager = 
                        new Vintasoft.Imaging.Ocr.OcrEngineManager(tesseractOcr);
    
                    Vintasoft.Imaging.Ocr.OcrLanguage englishLanguage = Vintasoft.Imaging.Ocr.OcrLanguage.English;
                    Vintasoft.Imaging.Ocr.OcrLanguage germanLanguage = Vintasoft.Imaging.Ocr.OcrLanguage.German;
    
                    // create OCR settings
                    Vintasoft.Imaging.Ocr.OcrEngineSettings settings = 
                        new Vintasoft.Imaging.Ocr.OcrEngineSettings(englishLanguage);
    
                    // for each image
                    foreach (Vintasoft.Imaging.VintasoftImage image in images)
                    {
                        // create the recognition regions
                        Vintasoft.Imaging.Ocr.RecognitionRegion[] regions = 
                            new Vintasoft.Imaging.Ocr.RecognitionRegion[2];
                        regions[0] = new Vintasoft.Imaging.Ocr.RecognitionRegion(
                            new Vintasoft.Imaging.RegionOfInterest(0, 0, image.Width, image.Height / 2),
                            englishLanguage);
                        regions[1] = new Vintasoft.Imaging.Ocr.RecognitionRegion(
                            new Vintasoft.Imaging.RegionOfInterest(0, image.Height / 2, image.Width, image.Height / 2),
                            germanLanguage);
    
                        System.Console.WriteLine("Recognize the image...");
                        
                        // recognize text in image regions
                        Vintasoft.Imaging.Ocr.Results.OcrPage ocrResult = engineManager.Recognize(image, settings, regions);
    
                        // output the recognition result
                        System.Console.WriteLine("Page Text:");
                        System.Console.WriteLine(ocrResult.GetText());
                        System.Console.WriteLine();
                    }
                }
    
                // free images
                images.ClearAndDisposeItems();
            }
        }
    }