VintaSoft Imaging .NET SDK 12.4: Documentation for .NET developer
In This Topic
    OCR: How to export the OCR result to hOCR file?
    In This Topic
    OCR results can be exported to hOCR file.


    Here is C#/VB.NET code that shows how to export OCR result to hOCR file:
    string imageFilePath = @"D:\TestImage.png";
    // create the OCR engine
    using (Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr tesseractOcr = new Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr())
    {
        // specify that OCR engine will recognize English text
        Vintasoft.Imaging.Ocr.OcrLanguage language = Vintasoft.Imaging.Ocr.OcrLanguage.English;
        // create the OCR engine settings
        Vintasoft.Imaging.Ocr.Tesseract.TesseractOcrSettings settings =
            new Vintasoft.Imaging.Ocr.Tesseract.TesseractOcrSettings(language);
        // initialize the OCR engine
        tesseractOcr.Init(settings);
    
        // load an image
        using (Vintasoft.Imaging.VintasoftImage image = new Vintasoft.Imaging.VintasoftImage(imageFilePath))
        {
            // specify the image, where text must be recognized
            tesseractOcr.SetImage(image);
    
            // recognize text in the image
            Vintasoft.Imaging.Ocr.Results.OcrPage ocrResult = tesseractOcr.Recognize();
    
            string hocrFilePath = System.IO.Path.Combine(
                System.IO.Path.GetDirectoryName(imageFilePath),
                System.IO.Path.GetFileNameWithoutExtension(imageFilePath) + ".hocr");
            // create the file
            using (System.IO.Stream stream = System.IO.File.Open(hocrFilePath, System.IO.FileMode.Create))
            {
                // create the HOcr codec
                Vintasoft.Imaging.Ocr.Results.HOcrCodec hOcrCodec = new Vintasoft.Imaging.Ocr.Results.HOcrCodec();
                // save the OCR result to hOCR file
                hOcrCodec.Export(ocrResult, stream);
            }
    
            // clear the image
            tesseractOcr.ClearImage();
        }
        // shutdown the OCR engine
        tesseractOcr.Shutdown();
    }
    
    Dim imageFilePath As String = "D:\TestImage.png"
    ' create the OCR engine
    Using tesseractOcr As New Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr()
        ' specify that OCR engine will recognize English text
        Dim language As Vintasoft.Imaging.Ocr.OcrLanguage = Vintasoft.Imaging.Ocr.OcrLanguage.English
        ' create the OCR engine settings
        Dim settings As New Vintasoft.Imaging.Ocr.Tesseract.TesseractOcrSettings(language)
        ' initialize the OCR engine
        tesseractOcr.Init(settings)
    
        ' load an image
        Using image As New Vintasoft.Imaging.VintasoftImage(imageFilePath)
            ' specify the image, where text must be recognized
            tesseractOcr.SetImage(image)
    
            ' recognize text in the image
            Dim ocrResult As Vintasoft.Imaging.Ocr.Results.OcrPage = tesseractOcr.Recognize()
    
            Dim hocrFilePath As String = System.IO.Path.Combine(System.IO.Path.GetDirectoryName(imageFilePath), System.IO.Path.GetFileNameWithoutExtension(imageFilePath) & ".hocr")
            ' create the file
            Using stream As System.IO.Stream = System.IO.File.Open(hocrFilePath, System.IO.FileMode.Create)
                ' create the HOcr codec
                Dim hOcrCodec As New Vintasoft.Imaging.Ocr.Results.HOcrCodec()
                ' save the OCR result to hOCR file
                hOcrCodec.Export(ocrResult, stream)
            End Using
    
            ' clear the image
            tesseractOcr.ClearImage()
        End Using
        ' shutdown the OCR engine
        tesseractOcr.Shutdown()
    End Using