VintaSoft Imaging .NET SDK 12.4: Documentation for .NET developer
In This Topic
    OCR: Save the OCR result to a text file
    In This Topic
    OCR result can be saved to a text file as formatted text (OcrPage.GetFormattedText method) or not formatted text (OcrPage.GetText method).


    Here is C#/VB.NET code that shows how to save OCR result to a text file as formatted text:
    string imageFilePath = @"D:\TestImage.png";
    // create the OCR engine
    using (Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr tesseractOcr = new Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr())
    {
        // specify that OCR engine will recognize English text
        Vintasoft.Imaging.Ocr.OcrLanguage language = Vintasoft.Imaging.Ocr.OcrLanguage.English;
        // create the OCR engine settings
        Vintasoft.Imaging.Ocr.Tesseract.TesseractOcrSettings settings = 
            new Vintasoft.Imaging.Ocr.Tesseract.TesseractOcrSettings(language);
        // initialize the OCR engine
        tesseractOcr.Init(settings);
    
        // load an image
        using (Vintasoft.Imaging.VintasoftImage image = new Vintasoft.Imaging.VintasoftImage(imageFilePath))
        {
            // specify the image, where text must be recognized
            tesseractOcr.SetImage(image);
    
            // recognize text in the image
            Vintasoft.Imaging.Ocr.Results.OcrPage ocrResult = tesseractOcr.Recognize();
    
            // get the recognized text as formatted text
            string ocrResultAsFormattedContent = ocrResult.GetFormattedText();
    
            string textFilePath = System.IO.Path.Combine(
                System.IO.Path.GetDirectoryName(imageFilePath),
                System.IO.Path.GetFileNameWithoutExtension(imageFilePath) + ".txt");
            // save the formatted text in a file
            System.IO.File.WriteAllText(textFilePath, ocrResultAsFormattedContent, System.Text.Encoding.UTF8);
    
            // clear the image
            tesseractOcr.ClearImage();
        }
        // shutdown the OCR engine
        tesseractOcr.Shutdown();
    }
    
    Dim imageFilePath As String = "D:\TestImage.png"
    ' create the OCR engine
    Using tesseractOcr As New Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr()
        ' specify that OCR engine will recognize English text
        Dim language As Vintasoft.Imaging.Ocr.OcrLanguage = Vintasoft.Imaging.Ocr.OcrLanguage.English
        ' create the OCR engine settings
        Dim settings As New Vintasoft.Imaging.Ocr.Tesseract.TesseractOcrSettings(language)
        ' initialize the OCR engine
        tesseractOcr.Init(settings)
    
        ' load an image
        Using image As New Vintasoft.Imaging.VintasoftImage(imageFilePath)
            ' specify the image, where text must be recognized
            tesseractOcr.SetImage(image)
    
            ' recognize text in the image
            Dim ocrResult As Vintasoft.Imaging.Ocr.Results.OcrPage = tesseractOcr.Recognize()
    
            ' get the recognized text as formatted text
            Dim ocrResultAsFormattedContent As String = ocrResult.GetFormattedText()
    
            Dim textFilePath As String = System.IO.Path.Combine(System.IO.Path.GetDirectoryName(imageFilePath), System.IO.Path.GetFileNameWithoutExtension(imageFilePath) & ".txt")
            ' save the formatted text in a file
            System.IO.File.WriteAllText(textFilePath, ocrResultAsFormattedContent, System.Text.Encoding.UTF8)
    
            ' clear the image
            tesseractOcr.ClearImage()
        End Using
        ' shutdown the OCR engine
        tesseractOcr.Shutdown()
    End Using