
public abstract class TextSearchEngine
public __gc abstract class TextSearchEngine
public ref class TextSearchEngine abstract
'Declaration Public MustInherit Class TextSearchEngine
This C#/VB.NET code shows how to create the text search engine for searching digits on PDF page.
''' <summary> ''' Outputs the information about digits in content of PDF document. ''' </summary> ''' <param name="document">PDF document where digits should be searched.</param> Public Sub SearchDigitsInTextOfPdfDocumentUsingTextSearchEngine(document As Vintasoft.Imaging.Pdf.PdfDocument) System.Console.WriteLine("Searching the digits in text of PDF document.") For i As Integer = 0 To document.Pages.Count - 1 Dim textRegions As Vintasoft.Imaging.Text.TextRegion() = AdvancedDigitsSearchOnPdfPage(document.Pages(i)) If textRegions IsNot Nothing Then For j As Integer = 0 To textRegions.Length - 1 System.Console.WriteLine(String.Format("- Text={0}, Rectangle={1}", textRegions(j).TextContent, textRegions(j).Rectangle)) Next End If Next System.Console.WriteLine("Searching the digits in text of PDF document is finished.") End Sub ''' <summary> ''' Searches digits on PDF page. ''' </summary> ''' <param name="page">PDF page where digits should be searched.</param> ''' <returns>An array of text regions on PDF page where text was found.</returns> Public Function AdvancedDigitsSearchOnPdfPage(page As Vintasoft.Imaging.Pdf.Tree.PdfPage) As Vintasoft.Imaging.Text.TextRegion() Dim textRegions As New System.Collections.Generic.List(Of Vintasoft.Imaging.Text.TextRegion)() Dim digitsSearchEngine As New DigitsSearchEngine() Dim textRegion As Vintasoft.Imaging.Text.TextRegion = Nothing Dim startIndex As Integer = 0 Do ' search text textRegion = page.TextRegion.FindText(digitsSearchEngine, startIndex, False) If textRegion IsNot Nothing Then ' add result textRegions.Add(textRegion) ' shitf start index startIndex += textRegion.TextContent.Length End If Loop While textRegion IsNot Nothing Return textRegions.ToArray() End Function ''' <summary> ''' Class for searching the digits in text of PDF page. ''' </summary> Private Class DigitsSearchEngine Inherits Vintasoft.Imaging.Text.TextSearchEngine ''' <summary> ''' Searches the first text matching in the string of PDF page. ''' </summary> ''' <param name="sourceString">Source string (string of PDF page) where text must be searched.</param> ''' <param name="startIndex">The zero-based index, in the sourceString, from which text must be searched.</param> ''' <param name="length">The number of characters, in the sourceString, to analyze.</param> ''' <param name="rightToLeft">Indicates that text should be searched from right to left.</param> ''' <returns> ''' Vintasoft.Imaging.Pdf.Content.TextExtraction.TextSearchResult object that ''' contains information about searched text if text is found; otherwise, null. ''' </returns> Public Overrides Function Find(sourceString As String, startIndex As Integer, length As Integer, rightToLeft As Boolean) As Vintasoft.Imaging.Text.TextSearchResult Dim startDigitIndex As Integer = -1 Dim endDigitIndex As Integer = -1 Dim start As Integer = 0 Dim [end] As Integer = 0 ' if searching text from the right to the left If rightToLeft Then start = startIndex + length [end] = 0 For index As Integer = start - 1 To [end] Step -1 If Char.IsDigit(sourceString(index)) AndAlso endDigitIndex = -1 Then endDigitIndex = index + 1 ElseIf Not Char.IsDigit(sourceString(index)) AndAlso endDigitIndex <> -1 Then startDigitIndex = index + 1 Exit For End If Next If endDigitIndex <> -1 AndAlso startDigitIndex = -1 Then startDigitIndex = 0 End If Else ' if searching text from the left to the right start = startIndex [end] = startIndex + length For index As Integer = start To [end] - 1 If Char.IsDigit(sourceString(index)) AndAlso startDigitIndex = -1 Then startDigitIndex = index ElseIf Not Char.IsDigit(sourceString(index)) AndAlso startDigitIndex <> -1 Then endDigitIndex = index Exit For End If Next If startDigitIndex <> -1 AndAlso endDigitIndex = -1 Then endDigitIndex = [end] End If End If ' if digit is not found If startDigitIndex = -1 Then Return Nothing End If ' return the text search result Return New Vintasoft.Imaging.Text.TextSearchResult(startDigitIndex, endDigitIndex - startDigitIndex) End Function End Class
/// <summary> /// Outputs the information about digits in content of PDF document. /// </summary> /// <param name="document">PDF document where digits should be searched.</param> public void SearchDigitsInTextOfPdfDocumentUsingTextSearchEngine(Vintasoft.Imaging.Pdf.PdfDocument document) { System.Console.WriteLine("Searching the digits in text of PDF document."); for (int i = 0; i < document.Pages.Count; i++) { Vintasoft.Imaging.Text.TextRegion[] textRegions = AdvancedDigitsSearchOnPdfPage(document.Pages[i]); if (textRegions != null) { for (int j = 0; j < textRegions.Length; j++) { System.Console.WriteLine(string.Format("- Text={0}, Rectangle={1}", textRegions[j].TextContent, textRegions[j].Rectangle)); } } } System.Console.WriteLine("Searching the digits in text of PDF document is finished."); } /// <summary> /// Searches digits on PDF page. /// </summary> /// <param name="page">PDF page where digits should be searched.</param> /// <returns>An array of text regions on PDF page where text was found.</returns> public Vintasoft.Imaging.Text.TextRegion[] AdvancedDigitsSearchOnPdfPage( Vintasoft.Imaging.Pdf.Tree.PdfPage page) { System.Collections.Generic.List<Vintasoft.Imaging.Text.TextRegion> textRegions = new System.Collections.Generic.List<Vintasoft.Imaging.Text.TextRegion>(); DigitsSearchEngine digitsSearchEngine = new DigitsSearchEngine(); Vintasoft.Imaging.Text.TextRegion textRegion = null; int startIndex = 0; do { // search text textRegion = page.TextRegion.FindText(digitsSearchEngine, ref startIndex, false); if (textRegion != null) { // add result textRegions.Add(textRegion); // shitf start index startIndex += textRegion.TextContent.Length; } } while (textRegion != null); return textRegions.ToArray(); } /// <summary> /// Class for searching the digits in text of PDF page. /// </summary> class DigitsSearchEngine : Vintasoft.Imaging.Text.TextSearchEngine { /// <summary> /// Searches the first text matching in the string of PDF page. /// </summary> /// <param name="sourceString">Source string (string of PDF page) where text must be searched.</param> /// <param name="startIndex">The zero-based index, in the sourceString, from which text must be searched.</param> /// <param name="length">The number of characters, in the sourceString, to analyze.</param> /// <param name="rightToLeft">Indicates that text should be searched from right to left.</param> /// <returns> /// Vintasoft.Imaging.Pdf.Content.TextExtraction.TextSearchResult object that /// contains information about searched text if text is found; otherwise, null. /// </returns> public override Vintasoft.Imaging.Text.TextSearchResult Find( string sourceString, int startIndex, int length, bool rightToLeft) { int startDigitIndex = -1; int endDigitIndex = -1; int start = 0; int end = 0; // if searching text from the right to the left if (rightToLeft) { start = startIndex + length; end = 0; for (int index = start - 1; index >= end; index--) { if (char.IsDigit(sourceString[index]) && endDigitIndex == -1) endDigitIndex = index + 1; else if (!char.IsDigit(sourceString[index]) && endDigitIndex != -1) { startDigitIndex = index + 1; break; } } if (endDigitIndex != -1 && startDigitIndex == -1) startDigitIndex = 0; } // if searching text from the left to the right else { start = startIndex; end = startIndex + length; for (int index = start; index < end; index++) { if (char.IsDigit(sourceString[index]) && startDigitIndex == -1) startDigitIndex = index; else if (!char.IsDigit(sourceString[index]) && startDigitIndex != -1) { endDigitIndex = index; break; } } if (startDigitIndex != -1 && endDigitIndex == -1) endDigitIndex = end; } // if digit is not found if (startDigitIndex == -1) return null; // return the text search result return new Vintasoft.Imaging.Text.TextSearchResult( startDigitIndex, endDigitIndex - startDigitIndex); } }
System.Object
Vintasoft.Imaging.Text.TextSearchEngine
Target Platforms: .NET9; .NET 8; .NET 7; .NET 6; .NET Framework 4.8, 4.7, 4.6, 4.5, 4.0, 3.5