GetWords() Method (TextRegion)
Returns the words of this text region.
This C#/VB.NET code shows how to get all words with punctuation from PDF page.
''' <summary>
''' Returns words from PDF page.
''' </summary>
''' <param name="page">PDF page.</param>
''' <returns>Words from PDF page.</returns>
Public Shared Function GetWordsWithPunctuationFromPdfPage(page As Vintasoft.Imaging.Pdf.Tree.PdfPage) As String
' get words of the page
Dim words As Vintasoft.Imaging.Text.TextRegion() = page.TextRegion.GetWords()
Dim result As New System.Text.StringBuilder()
For Each word As Vintasoft.Imaging.Text.TextRegion In words
result.AppendLine(word.TextContent)
Next
Return result.ToString()
End Function
/// <summary>
/// Returns words from PDF page.
/// </summary>
/// <param name="page">PDF page.</param>
/// <returns>Words from PDF page.</returns>
public static string GetWordsWithPunctuationFromPdfPage(Vintasoft.Imaging.Pdf.Tree.PdfPage page)
{
// get words of the page
Vintasoft.Imaging.Text.TextRegion[] words = page.TextRegion.GetWords();
System.Text.StringBuilder result = new System.Text.StringBuilder();
foreach (Vintasoft.Imaging.Text.TextRegion word in words)
{
result.AppendLine(word.TextContent);
}
return result.ToString();
}
Target Platforms: .NET9; .NET 8; .NET 7; .NET 6; .NET Framework 4.8, 4.7, 4.6, 4.5, 4.0, 3.5