VintaSoft Imaging .NET SDK 14.0: Documentation for .NET developer
In This Topic
    Identify and recognize a document that contains completed form with text
    In This Topic
    If you want to identify and recognize document that contains completed form with text, you need to do the following steps:
    Here is C#/VB.NET code that demonstrates how to identify and recognize a completed form that contains text.
    /// <summary>
    /// Recognizes the form with OCR fields.
    /// </summary>
    /// <param name="formRecognitionManager">The form recognition manager.</param>
    /// <param name="image">The image.</param>
    public static void RecognizeFormWithOcrFields(
        Vintasoft.Imaging.FormsProcessing.FormRecognitionManager formRecognitionManager,
        Vintasoft.Imaging.VintasoftImage image)
    {
        // check whether OCR engine manager of the OCR field templates is initialized
        // (this initialization can be moved to the start of your application)
        if (Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrFieldTemplate.OcrEngineManager == null)
        {
            // get or create text OCR engine
            Vintasoft.Imaging.Ocr.OcrEngine textOcrEngine = GetOcrEngine();
    
            // create Handwritten digits OCR engine
            Vintasoft.Imaging.Ocr.OcrEngine handwrittenDigitsOcrEngine = new Vintasoft.Imaging.Ocr.ML.HandwrittenDigits.HandwrittenDigitsOcrEngine();
    
            // create and set OCR engine manager of the OCR field templates
            Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrFieldTemplate.OcrEngineManager = 
                new Vintasoft.Imaging.Ocr.OcrEngineManager(textOcrEngine, handwrittenDigitsOcrEngine);
        }
    
        // recognize filled form in an image
        Vintasoft.Imaging.FormsProcessing.FormRecognitionResult recognitionResult = 
            formRecognitionManager.Recognize(image);
    
        // get the result of image comparison
        Vintasoft.Imaging.FormsProcessing.TemplateMatching.ImageImprintCompareResult imageCompareResult =
            recognitionResult.TemplateMatchingResult.ImageCompareResult;
        // if result is not reliable
        if (!imageCompareResult.IsReliable)
        {
            // matching template is not found
            System.Console.WriteLine("Matching template is not found.");
        }
        else
        {
            // get recognized page
            Vintasoft.Imaging.FormsProcessing.FormRecognition.FormPage recognizedPage = recognitionResult.RecognizedPage;
            // get form field count
            if (recognizedPage.Items.Count == 0)
            {
                System.Console.WriteLine("No form fields were recognized.");
            }
            else
            {
                System.Console.WriteLine(string.Format(
                    "Recognized form field count: {0}",
                    recognizedPage.Items.Count));
                // for each recognized form field
                foreach (Vintasoft.Imaging.FormsProcessing.FormRecognition.FormField recognizedField in recognizedPage.Items)
                {
                    if (recognizedField is Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrField)
                    {
                        Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrField ocrField = 
                            (Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrField)recognizedField;
                        // write field info
                        System.Console.WriteLine(string.Format(
                            "  OCR field: name: {0}; value: {1}; confidence: {2:F1}%",
                            ocrField.Name,
                            ocrField.Value,
                            ocrField.Confidence * 100));
                        Vintasoft.Imaging.Ocr.Results.OcrPage ocrResult = ocrField.OcrResult;
                        // get all words
                        Vintasoft.Imaging.Ocr.Results.OcrObject[] words = ocrResult.GetWords(75, 75);
                        // write words info
                        for (int i = 0; i < words.Length; i++)
                        {
                            Vintasoft.Imaging.Ocr.Results.OcrObject word = words[i];
                            System.Console.WriteLine(string.Format(
                                "    OCR word: {0}; confidence: {1:F1}%",
                                word.ToString(),
                                word.Confidence));
                        }
                    }
                }
            }
        }
    }
    
    /// <summary>
    /// Gets the OCR engine used for OCR field recognition.
    /// </summary>
    /// <remarks>
    /// To create a Tesseract OCR engine,
    /// add a reference to Vintasoft.Imaging.Ocr.Tesseract.dll
    /// into your project.
    /// </remarks>
    private static Vintasoft.Imaging.Ocr.OcrEngine GetOcrEngine()
    {
        // full path to the Tesseract5.Vintasoft.xXX.dll files
        // NOTE: specify here the actual path to the Tesseract OCR dll files
        string tesseractDllDirectory = @"C:\Program Files\VintaSoft\VintaSoft Imaging .NET\Bin\TesseractOCR\";
        // create Tesseract OCR engine (Vintasoft.Imaging.Ocr.Tesseract.dll is required)
        return new Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr(tesseractDllDirectory);
    }
    
    ''' <summary>
    ''' Recognizes the form with OCR fields.
    ''' </summary>
    ''' <param name="formRecognitionManager">The form recognition manager.</param>
    ''' <param name="image">The image.</param>
    Public Shared Sub RecognizeFormWithOcrFields(formRecognitionManager As Vintasoft.Imaging.FormsProcessing.FormRecognitionManager, image As Vintasoft.Imaging.VintasoftImage)
        ' check whether OCR engine manager of the OCR field templates is initialized
        ' (this initialization can be moved to the start of your application)
        If Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrFieldTemplate.OcrEngineManager Is Nothing Then
            ' get or create text OCR engine
            Dim textOcrEngine As Vintasoft.Imaging.Ocr.OcrEngine = GetOcrEngine()
    
            ' create Handwritten digits OCR engine
            Dim handwrittenDigitsOcrEngine As Vintasoft.Imaging.Ocr.OcrEngine = New Vintasoft.Imaging.Ocr.ML.HandwrittenDigits.HandwrittenDigitsOcrEngine()
    
            ' create and set OCR engine manager of the OCR field templates
            Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrFieldTemplate.OcrEngineManager = New Vintasoft.Imaging.Ocr.OcrEngineManager(textOcrEngine, handwrittenDigitsOcrEngine)
        End If
    
        ' recognize filled form in an image
        Dim recognitionResult As Vintasoft.Imaging.FormsProcessing.FormRecognitionResult = formRecognitionManager.Recognize(image)
    
        ' get the result of image comparison
        Dim imageCompareResult As Vintasoft.Imaging.FormsProcessing.TemplateMatching.ImageImprintCompareResult = recognitionResult.TemplateMatchingResult.ImageCompareResult
        ' if result is not reliable
        If Not imageCompareResult.IsReliable Then
            ' matching template is not found
            System.Console.WriteLine("Matching template is not found.")
        Else
            ' get recognized page
            Dim recognizedPage As Vintasoft.Imaging.FormsProcessing.FormRecognition.FormPage = recognitionResult.RecognizedPage
            ' get form field count
            If recognizedPage.Items.Count = 0 Then
                System.Console.WriteLine("No form fields were recognized.")
            Else
                System.Console.WriteLine(String.Format("Recognized form field count: {0}", recognizedPage.Items.Count))
                ' for each recognized form field
                For Each recognizedField As Vintasoft.Imaging.FormsProcessing.FormRecognition.FormField In recognizedPage.Items
                    If TypeOf recognizedField Is Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrField Then
                        Dim ocrField As Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrField = DirectCast(recognizedField, Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrField)
                        ' write field info
                        System.Console.WriteLine(String.Format("  OCR field: name: {0}; value: {1}; confidence: {2:F1}%", ocrField.Name, ocrField.Value, ocrField.Confidence * 100))
                        Dim ocrResult As Vintasoft.Imaging.Ocr.Results.OcrPage = ocrField.OcrResult
                        ' get all words
                        Dim words As Vintasoft.Imaging.Ocr.Results.OcrObject() = ocrResult.GetWords(75, 75)
                        ' write words info
                        For i As Integer = 0 To words.Length - 1
                            Dim word As Vintasoft.Imaging.Ocr.Results.OcrObject = words(i)
                            System.Console.WriteLine(String.Format("    OCR word: {0}; confidence: {1:F1}%", word.ToString(), word.Confidence))
                        Next
                    End If
                Next
            End If
        End If
    End Sub
    
    ''' <summary>
    ''' Gets the OCR engine used for OCR field recognition.
    ''' </summary>
    ''' <remarks>
    ''' To create a Tesseract OCR engine,
    ''' add a reference to Vintasoft.Imaging.Ocr.Tesseract.dll
    ''' into your project.
    ''' </remarks>
    Private Shared Function GetOcrEngine() As Vintasoft.Imaging.Ocr.OcrEngine
        ' full path to the Tesseract5.Vintasoft.xXX.dll files
        ' NOTE: specify here the actual path to the Tesseract OCR dll files
        Dim tesseractDllDirectory As String = "C:\Program Files\VintaSoft\VintaSoft Imaging .NET\Bin\TesseractOCR\"
        ' create Tesseract OCR engine (Vintasoft.Imaging.Ocr.Tesseract.dll is required)
        Return New Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr(tesseractDllDirectory)
    End Function