VintaSoft Imaging .NET SDK 12.0
In This Topic
    OCR: How to export the OCR result to hOCR file?
    In This Topic
    OCR results can be exported to hOCR file.

    Here is an example that shows how to export OCR result to hOCR file:
    string imageFilePath = @"D:\TestImage.png";
    // create the OCR engine
    using (Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr tesseractOcr = new Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr())
    {
        // specify that OCR engine will recognize English text
        Vintasoft.Imaging.Ocr.OcrLanguage language = Vintasoft.Imaging.Ocr.OcrLanguage.English;
        // create the OCR engine settings
        Vintasoft.Imaging.Ocr.Tesseract.TesseractOcrSettings settings =
            new Vintasoft.Imaging.Ocr.Tesseract.TesseractOcrSettings(language);
        // initialize the OCR engine
        tesseractOcr.Init(settings);
    
        // load an image
        using (Vintasoft.Imaging.VintasoftImage image = new Vintasoft.Imaging.VintasoftImage(imageFilePath))
        {
            // specify the image, where text must be recognized
            tesseractOcr.SetImage(image);
    
            // recognize text in the image
            Vintasoft.Imaging.Ocr.Results.OcrPage ocrResult = tesseractOcr.Recognize();
    
            string hocrFilePath = System.IO.Path.Combine(
                System.IO.Path.GetDirectoryName(imageFilePath),
                System.IO.Path.GetFileNameWithoutExtension(imageFilePath) + ".hocr");
            // create the file
            using (System.IO.Stream stream = System.IO.File.Open(hocrFilePath, System.IO.FileMode.Create))
            {
                // create the HOcr codec
                Vintasoft.Imaging.Ocr.Results.HOcrCodec hOcrCodec = new Vintasoft.Imaging.Ocr.Results.HOcrCodec();
                // save the OCR result to hOCR file
                hOcrCodec.Export(ocrResult, stream);
            }
    
            // clear the image
            tesseractOcr.ClearImage();
        }
        // shutdown the OCR engine
        tesseractOcr.Shutdown();
    }
    
    Dim imageFilePath As String = "D:\TestImage.png"
    ' create the OCR engine
    Using tesseractOcr As New Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr()
        ' specify that OCR engine will recognize English text
        Dim language As Vintasoft.Imaging.Ocr.OcrLanguage = Vintasoft.Imaging.Ocr.OcrLanguage.English
        ' create the OCR engine settings
        Dim settings As New Vintasoft.Imaging.Ocr.Tesseract.TesseractOcrSettings(language)
        ' initialize the OCR engine
        tesseractOcr.Init(settings)
    
        ' load an image
        Using image As New Vintasoft.Imaging.VintasoftImage(imageFilePath)
            ' specify the image, where text must be recognized
            tesseractOcr.SetImage(image)
    
            ' recognize text in the image
            Dim ocrResult As Vintasoft.Imaging.Ocr.Results.OcrPage = tesseractOcr.Recognize()
    
            Dim hocrFilePath As String = System.IO.Path.Combine(System.IO.Path.GetDirectoryName(imageFilePath), System.IO.Path.GetFileNameWithoutExtension(imageFilePath) & ".hocr")
            ' create the file
            Using stream As System.IO.Stream = System.IO.File.Open(hocrFilePath, System.IO.FileMode.Create)
                ' create the HOcr codec
                Dim hOcrCodec As New Vintasoft.Imaging.Ocr.Results.HOcrCodec()
                ' save the OCR result to hOCR file
                hOcrCodec.Export(ocrResult, stream)
            End Using
    
            ' clear the image
            tesseractOcr.ClearImage()
        End Using
        ' shutdown the OCR engine
        tesseractOcr.Shutdown()
    End Using