ICR: Recognize handwritten digits
In This Topic
Intelligent character recognition (ICR) is used to extract handwritten text from images. It is a more sophisticated type of OCR technology that recognizes different handwriting styles and fonts to intelligently interpret data on forms and physical documents.
Starting from version 12.5 the VintaSoft OCR .NET Plug-in provides Vintasoft.Imaging.Ocr.ML.dll assembly for recognition of handwritten digits (symbols "0123456789)(+-/") using neural network.
Vintasoft.Imaging.Ocr.ML.dll assembly does machine learning using "Microsoft.ML" library and has references to the version 4.0.0 of nuget-packages "Microsoft.ML" and "Microsoft.ML.LightGbm".
Vintasoft.Imaging.Ocr.ML.dll assembly contains the following classes for recognition of handwritten digits:
- The HandwrittenDigitsLineRecognizer class represents recognizer that can recognize single line of handwritten digits in image using neural network. The HandwrittenDigitsLineRecognizer class must be used if you have image with single line of handwritten digits and you want to recognize single line of handwritten digits in image.
- The HandwrittenDigitsOcrEngine class represents OCR engine that can recognize single line of handwritten digits in image using neural network. The HandwrittenDigitsOcrEngine class must be used if you have image with text content and you want to recognize line of handwritten digits in image region using the OcrEngine API.
Also Vintasoft.Imaging.Ocr.ML.dll assembly can be used together with VintaSoft Forms Processing .NET Plug-in for recognition of handwritten digits in fields of completed form.
The
OcrFieldTemplate class allows to define a template of OCR field that contains single line of handwritten digits.
Here is C#/VB.NET code that shows how to recognize single line of handwritten digits in image using
HandwrittenDigitsLineRecognizer class:
/// <summary>
/// Recognizes the line of handwritten digits using HandwrittenDigitsLineRecognizer class.
/// </summary>
/// <param name="image">The image.</param>
/// <returns>Recognized line.</returns>
public static string RecognizeHandwrittenLineUsingHandwrittenDigitsLineRecognizer(Vintasoft.Imaging.VintasoftImage image)
{
// create handwritten digits recognizer
Vintasoft.Imaging.Ocr.ML.HandwrittenDigits.HandwrittenDigitsLineRecognizer lineRecognizer =
new Vintasoft.Imaging.Ocr.ML.HandwrittenDigits.HandwrittenDigitsLineRecognizer(image, "1234567890");
// recognize line of handwritten digits
Vintasoft.Imaging.Ocr.ML.HandwrittenDigits.HandwrittenDigitsRecognitionResult result = lineRecognizer.RecognizeLine();
// print result
for (int i = 0; i < result.Symbols.Length; i++)
System.Console.WriteLine(string.Format("{0}: {1}%", result.Symbols[i], System.Math.Round(result.Confidences[i] * 100)));
return result.ToString();
}
''' <summary>
''' Recognizes the line of handwritten digits using HandwrittenDigitsLineRecognizer class.
''' </summary>
''' <param name="image">The image.</param>
''' <returns>Recognized line.</returns>
Public Shared Function RecognizeHandwrittenLineUsingHandwrittenDigitsLineRecognizer(image As Vintasoft.Imaging.VintasoftImage) As String
' create handwritten digits recognizer
Dim lineRecognizer As New Vintasoft.Imaging.Ocr.ML.HandwrittenDigits.HandwrittenDigitsLineRecognizer(image, "1234567890")
' recognize line of handwritten digits
Dim result As Vintasoft.Imaging.Ocr.ML.HandwrittenDigits.HandwrittenDigitsRecognitionResult = lineRecognizer.RecognizeLine()
' print result
For i As Integer = 0 To result.Symbols.Length - 1
System.Console.WriteLine(String.Format("{0}: {1}%", result.Symbols(i), System.Math.Round(result.Confidences(i) * 100)))
Next
Return result.ToString()
End Function
Here is C#/VB.NET code that shows how to recognize single line of handwritten digits in image region using
HandwrittenDigitsOcrEngine class:
/// <summary>
/// Recognizes the line of handwritten digits.
/// </summary>
/// <param name="image">The image.</param>
/// <returns>Recognized line.</returns>
public static string RecognizeHandwrittenLineUsingHandwrittenDigitsOcrEngine(Vintasoft.Imaging.VintasoftImage image)
{
// create OCR engine for recognition of handwritten digits
Vintasoft.Imaging.Ocr.ML.HandwrittenDigits.HandwrittenDigitsOcrEngine ocrEngine =
new Vintasoft.Imaging.Ocr.ML.HandwrittenDigits.HandwrittenDigitsOcrEngine();
// create OCR settings for recognition of handwritten digits
Vintasoft.Imaging.Ocr.ML.HandwrittenDigits.HandwrittenDigitsOcrSettings ocrSettings =
new Vintasoft.Imaging.Ocr.ML.HandwrittenDigits.HandwrittenDigitsOcrSettings();
ocrSettings.CharWhiteList = "1234567890";
// init OCR engine
ocrEngine.Init(ocrSettings);
// recognize line of handwritten digits
Vintasoft.Imaging.Ocr.Results.OcrPage page = ocrEngine.Recognize(image);
Vintasoft.Imaging.Ocr.Results.OcrTextRegion textRegion = (Vintasoft.Imaging.Ocr.Results.OcrTextRegion)page.Regions[0];
// print result
System.Console.WriteLine(string.Format("{0}: {1}%", textRegion.Text, textRegion.Confidence));
return textRegion.Text;
}
''' <summary>
''' Recognizes the line of handwritten digits.
''' </summary>
''' <param name="image">The image.</param>
''' <returns>Recognized line.</returns>
Public Shared Function RecognizeHandwrittenLineUsingHandwrittenDigitsOcrEngine(image As Vintasoft.Imaging.VintasoftImage) As String
' create OCR engine for recognition of handwritten digits
Dim ocrEngine As New Vintasoft.Imaging.Ocr.ML.HandwrittenDigits.HandwrittenDigitsOcrEngine()
' create OCR settings for recognition of handwritten digits
Dim ocrSettings As New Vintasoft.Imaging.Ocr.ML.HandwrittenDigits.HandwrittenDigitsOcrSettings()
ocrSettings.CharWhiteList = "1234567890"
' init OCR engine
ocrEngine.Init(ocrSettings)
' recognize line of handwritten digits
Dim page As Vintasoft.Imaging.Ocr.Results.OcrPage = ocrEngine.Recognize(image)
Dim textRegion As Vintasoft.Imaging.Ocr.Results.OcrTextRegion = DirectCast(page.Regions(0), Vintasoft.Imaging.Ocr.Results.OcrTextRegion)
' print result
System.Console.WriteLine(String.Format("{0}: {1}%", textRegion.Text, textRegion.Confidence))
Return textRegion.Text
End Function
Here is C#/VB.NET code that shows how to recognize field with single line of handwritten digits in completed form:
/// <summary>
/// Recognizes the form with OCR fields, which contain handwritten digits.
/// </summary>
/// <param name="formRecognitionManager">The form recognition manager.</param>
/// <param name="image">The image.</param>
public static void RecognizeHandwrittenDigitsInFilledForm(
Vintasoft.Imaging.FormsProcessing.FormRecognitionManager formRecognitionManager,
Vintasoft.Imaging.VintasoftImage image)
{
// check whether OCR engine manager of the OCR field templates is initialized
// (this initialization can be moved to the start of your application)
if (Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrFieldTemplate.OcrEngineManager == null)
{
// create OCR engine for recognition of handwritten digits
Vintasoft.Imaging.Ocr.OcrEngine handwrittenDigitsOcrEngine = new Vintasoft.Imaging.Ocr.ML.HandwrittenDigits.HandwrittenDigitsOcrEngine();
// create and set OCR engine manager for the OCR field templates
Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrFieldTemplate.OcrEngineManager =
new Vintasoft.Imaging.Ocr.OcrEngineManager(handwrittenDigitsOcrEngine);
}
// recognize filled form in an image
Vintasoft.Imaging.FormsProcessing.FormRecognitionResult recognitionResult =
formRecognitionManager.Recognize(image);
// get the result of image comparison
Vintasoft.Imaging.FormsProcessing.TemplateMatching.ImageImprintCompareResult imageCompareResult =
recognitionResult.TemplateMatchingResult.ImageCompareResult;
// if result is not reliable
if (!imageCompareResult.IsReliable)
{
// matching template is not found
System.Console.WriteLine("Matching template is not found.");
}
else
{
// get recognized page
Vintasoft.Imaging.FormsProcessing.FormRecognition.FormPage recognizedPage = recognitionResult.RecognizedPage;
// get form field count
if (recognizedPage.Items.Count == 0)
{
System.Console.WriteLine("No form fields were recognized.");
}
else
{
System.Console.WriteLine(string.Format(
"Recognized form field count: {0}",
recognizedPage.Items.Count));
// for each recognized form field
foreach (Vintasoft.Imaging.FormsProcessing.FormRecognition.FormField recognizedField in recognizedPage.Items)
{
if (recognizedField is Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrField)
{
Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrField ocrField =
(Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrField)recognizedField;
// write field info
System.Console.WriteLine(string.Format(
" OCR field: name: {0}; value: {1}; confidence: {2:F1}%",
ocrField.Name,
ocrField.Value,
ocrField.Confidence * 100));
Vintasoft.Imaging.Ocr.Results.OcrPage ocrResult = ocrField.OcrResult;
// get all words
Vintasoft.Imaging.Ocr.Results.OcrObject[] words = ocrResult.GetWords(75, 75);
// write words info
for (int i = 0; i < words.Length; i++)
{
Vintasoft.Imaging.Ocr.Results.OcrObject word = words[i];
System.Console.WriteLine(string.Format(
" OCR word: {0}; confidence: {1:F1}%",
word.ToString(),
word.Confidence));
}
}
}
}
}
}
''' <summary>
''' Recognizes the form with OCR fields, which contain handwritten digits.
''' </summary>
''' <param name="formRecognitionManager">The form recognition manager.</param>
''' <param name="image">The image.</param>
Public Shared Sub RecognizeHandwrittenDigitsInFilledForm(formRecognitionManager As Vintasoft.Imaging.FormsProcessing.FormRecognitionManager, image As Vintasoft.Imaging.VintasoftImage)
' check whether OCR engine manager of the OCR field templates is initialized
' (this initialization can be moved to the start of your application)
If Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrFieldTemplate.OcrEngineManager Is Nothing Then
' create OCR engine for recognition of handwritten digits
Dim handwrittenDigitsOcrEngine As Vintasoft.Imaging.Ocr.OcrEngine = New Vintasoft.Imaging.Ocr.ML.HandwrittenDigits.HandwrittenDigitsOcrEngine()
' create and set OCR engine manager for the OCR field templates
Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrFieldTemplate.OcrEngineManager = New Vintasoft.Imaging.Ocr.OcrEngineManager(handwrittenDigitsOcrEngine)
End If
' recognize filled form in an image
Dim recognitionResult As Vintasoft.Imaging.FormsProcessing.FormRecognitionResult = formRecognitionManager.Recognize(image)
' get the result of image comparison
Dim imageCompareResult As Vintasoft.Imaging.FormsProcessing.TemplateMatching.ImageImprintCompareResult = recognitionResult.TemplateMatchingResult.ImageCompareResult
' if result is not reliable
If Not imageCompareResult.IsReliable Then
' matching template is not found
System.Console.WriteLine("Matching template is not found.")
Else
' get recognized page
Dim recognizedPage As Vintasoft.Imaging.FormsProcessing.FormRecognition.FormPage = recognitionResult.RecognizedPage
' get form field count
If recognizedPage.Items.Count = 0 Then
System.Console.WriteLine("No form fields were recognized.")
Else
System.Console.WriteLine(String.Format("Recognized form field count: {0}", recognizedPage.Items.Count))
' for each recognized form field
For Each recognizedField As Vintasoft.Imaging.FormsProcessing.FormRecognition.FormField In recognizedPage.Items
If TypeOf recognizedField Is Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrField Then
Dim ocrField As Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrField = DirectCast(recognizedField, Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrField)
' write field info
System.Console.WriteLine(String.Format(" OCR field: name: {0}; value: {1}; confidence: {2:F1}%", ocrField.Name, ocrField.Value, ocrField.Confidence * 100))
Dim ocrResult As Vintasoft.Imaging.Ocr.Results.OcrPage = ocrField.OcrResult
' get all words
Dim words As Vintasoft.Imaging.Ocr.Results.OcrObject() = ocrResult.GetWords(75, 75)
' write words info
For i As Integer = 0 To words.Length - 1
Dim word As Vintasoft.Imaging.Ocr.Results.OcrObject = words(i)
System.Console.WriteLine(String.Format(" OCR word: {0}; confidence: {1:F1}%", word.ToString(), word.Confidence))
Next
End If
Next
End If
End If
End Sub