Convert large text file to multi page pdf

Questions, comments and suggestions concerning VintaSoft PDF .NET Plug-in.

Moderator: Alex

Post Reply
IntegraHarlan
Posts: 65
Joined: Fri Jan 24, 2020 3:37 am

Convert large text file to multi page pdf

Post by IntegraHarlan »

I am trying to convert a large text file to a multipage pdf document.
I tried the code in this example https://www.vintasoft.com/docs/vsimagin ... ument.html.
This works fine for 1 page, but when the text file is large enough to be on multiple pages, the text is truncated.
I could probably use the Measurestring method to find out how size of the string and parse it to fit the pages. However, this is cumbersome and from experience, not very accurate.
I am hoping you have an easier way to do this.

Thanks
Alex
Site Admin
Posts: 1943
Joined: Thu Jul 10, 2008 2:21 pm

Re: Convert large text file to multi page pdf

Post by Alex »

Hi Harlan,

Here is example that allows to convert large text file to a multipage PDF document:

Code: Select all

/// <summary>
/// Converts a text file to a PDF document.
/// </summary>
/// <param name="sourceTextFilename">The filename of source text file.</param>
/// <param name="destPdfFilename">The filename of destination PDF document.</param>
public static void ConvertTextFileToPdfDocument(
    string sourceTextFilename,
    string destPdfFilename)
{
    // font name
    string fontName = "Arial";

    // font size, in points
    int fontSize = 12;

    // text padding, in points
    Vintasoft.Imaging.PaddingF textPadding = new Vintasoft.Imaging.PaddingF(30);

    // get text from text file
    string text = System.IO.File.ReadAllText(sourceTextFilename, System.Text.Encoding.UTF8);

    // set new line to '\n' character
    text = text.Replace("\r\n", "\n");
    text = text.Replace("\n\r", "\n");
    text = text.Replace("\r", "\n");

    // create PDF document
    using (Vintasoft.Imaging.Pdf.PdfDocument document =
        new Vintasoft.Imaging.Pdf.PdfDocument(destPdfFilename, Vintasoft.Imaging.Pdf.PdfFormat.Pdf_14))
    {
        Vintasoft.Imaging.Pdf.Tree.Fonts.PdfFont font;
        // find TTF font that should be used for drawing a text
        using (System.IO.Stream fontProgramStream =
            document.FontProgramsController.GetTrueTypeFontProgram(fontName))
        {
            // create PDF font based on TTF font program
            font = document.FontManager.CreateCIDFontFromTrueTypeFont(fontProgramStream);
        }

        do
        {
            // create page of A4 size
            Vintasoft.Imaging.Pdf.Tree.PdfPage pdfPage =
                new Vintasoft.Imaging.Pdf.Tree.PdfPage(document,
                    Vintasoft.Imaging.ImageSize.FromPaperKind(Vintasoft.Imaging.PaperSizeKind.A4));

            // add page to the PDF document
            document.Pages.Add(pdfPage);

            // get PdfGraphics that is associated with PDF page
            using (Vintasoft.Imaging.Pdf.Drawing.PdfGraphics graphics =
                Vintasoft.Imaging.Pdf.Drawing.PdfGraphics.FromPage(pdfPage))
            {
                // create a brush that should be used for drawing a text
                Vintasoft.Imaging.Pdf.Drawing.PdfBrush brush =
                    new Vintasoft.Imaging.Pdf.Drawing.PdfBrush(System.Drawing.Color.Black);

                // specify a rectangle where text should be drawn
                System.Drawing.RectangleF rect = pdfPage.MediaBox;
                // apply padding
                rect = textPadding.ApplyTo(rect);

                // specify a string format that should be used for drawing a text 
                System.Drawing.StringFormat stringFormat = System.Drawing.StringFormat.GenericDefault;

                // get text that must be drawn on current page
                string drawnText = GetDrawnText(graphics, font, fontSize, rect.Width, rect.Height, ref text);

                // draw text on the PDF page
                graphics.DrawString(drawnText, font, fontSize, brush, rect, Vintasoft.Imaging.Pdf.Drawing.PdfContentAlignment.Left, true);
            }
        } while (!string.IsNullOrEmpty(text));

        // subset font
        document.FontManager.PackAllFonts();

        // pack PDF document
        document.Pack();
    }
}

/// <summary>
/// Returns the text portion that must be drawn on specified PDF graphics in rectange with specified size.
/// </summary>
/// <param name="graphics">The PDF graphics.</param>
/// <param name="font">The text font.</param>
/// <param name="fontSize">The text font size.</param>
/// <param name="width">The width of text rectangle.</param>
/// <param name="height">The height of text rectangle.</param>
/// <param name="text">The text.</param>
/// <returns>The text portion that must be drawn.</returns>
private static string GetDrawnText(
    Vintasoft.Imaging.Pdf.Drawing.PdfGraphics graphics,
    Vintasoft.Imaging.Pdf.Tree.Fonts.PdfFont font,
    float fontSize,
    float width,
    float height,
    ref string text)
{
    if (string.IsNullOrEmpty(text))
    {
        text = "";
        return text;
    }

    float step = text.Length;
    float position = text.Length;
    int prevTextLength = 0;
    string drawnText = text;
    int drawnTextLength = text.Length;

    // measure all text
    float currentWidth, currentHeight;
    graphics.MeasureString(text.Substring(0, drawnTextLength), font, fontSize, width, true, out currentWidth, out currentHeight);

    // if text height is greater than maximum height
    if (currentHeight > height)
    {
        ////////////////////////////////////////////
        // find drawn text using bisection algorithm
        ////////////////////////////////////////////
        while (drawnTextLength != prevTextLength)
        {
            step /= 2;
            if (currentHeight > height)
                position -= step;
            else
                position += step;

            prevTextLength = drawnTextLength;
            drawnTextLength = (int)System.Math.Ceiling(position);

            drawnText = text.Substring(0, drawnTextLength).Trim();
            graphics.MeasureString(drawnText, font, fontSize, width, true, out currentWidth, out currentHeight);
        }
        while (currentHeight > height)
        {
            drawnTextLength--;
            drawnText = text.Substring(0, drawnTextLength).Trim();
            graphics.MeasureString(drawnText, font, fontSize, width, true, out currentWidth, out currentHeight);
        }
        ////////////////////////////////////////////

        // add last '\n' character to the drawn text
        if (drawnTextLength < text.Length && text[drawnTextLength] == '\n')
            drawnTextLength++;

        if (drawnTextLength == 0)
        {
            drawnTextLength = 1;
            drawnText = text.Substring(0, 1);
        }
    }

    text = text.Substring(drawnTextLength);

    return drawnText;
}
Best regards, Alexander
IntegraHarlan
Posts: 65
Joined: Fri Jan 24, 2020 3:37 am

Re: Convert large text file to multi page pdf

Post by IntegraHarlan »

Hi Alex,
Thank you for the example. It gets me pretty close.
However when using text files with long text string with no spaces, the text is truncated at the right border of the page.
I think this is happening because the DrawString method will word wrap but not wrap on character.
I was able to get around this by using a System.Drawing font and using the DrawString method that takes a System.Drawing font and the StringFormat enum.

This worked well, except the text does not get added to the page TextRegion. I assume that the reason for this is that I am using a System.Drawing font.
Here is the code I used:

Code: Select all

 /// <summary>
        /// Converts a text file to a PDF document.
        /// </summary>
        /// <remarks>This CAN throw.</remarks>
        /// <param name="filePath">The full-path to the file to convert.</param>
        /// <returns>The full-path to the converted PDF.</returns>
        public static string ConvertTextFileToPDF(string filePath)
        {
            	
            	// Path to the created PDF file.
            	string outputPath = string.Concat(filePath, ".pdf");

		//Get text from text file
		string text = File.ReadAllText(filePath, System.Text.Encoding.UTF8);

		// Set new line to '\n' character
		text = text.Replace("\r\n", "\n");
		text = text.Replace("\n\r", "\n");
		text = text.Replace("\r", "\n");

		// Create PDF document
		using (PdfDocument pdfDocument = new PdfDocument())
		{
			do
			{
				float textMargin = 25f;

				PdfPage pdfPage = new PdfPage(pdfDocument, Vintasoft.Imaging.ImageSize.FromPaperKind(Vintasoft.Imaging.PaperSizeKind.Letter));

				// Add page to the PDF document
				pdfDocument.Pages.Add(pdfPage);

				// The DrawString method will not wrap on character when a PDF font is used. Using a System mono space font 
				// so the DrawString method will wrap on character. The 15.25 em-size is the closest size that emulates the font size we want.
				Font font = new Font(FontFamily.GenericMonospace, 15.25f, GraphicsUnit.Pixel);

				// Create a brush that should be used for drawing a text
				PdfBrush brush = new PdfBrush(Color.Black);

				// Create area on document to write the text to.
				Vintasoft.Imaging.PaddingF textPadding = new Vintasoft.Imaging.PaddingF(textMargin);
				RectangleF rect = pdfPage.MediaBox;
				rect = textPadding.ApplyTo(rect);

				RectangleF regionRect = pdfPage.TextRegion.Rectangle;
				regionRect = textPadding.ApplyTo(regionRect);

				// Convert unit size to Pixel size because measuring the text size is done in pixels.
				float pageWidthPixels = PdfPage.ConvertFromUserUnitsToUnitOfMeasure(rect.Width, Vintasoft.Imaging.UnitOfMeasure.Pixels);
				float pageHeightPixels = PdfPage.ConvertFromUserUnitsToUnitOfMeasure(rect.Height, Vintasoft.Imaging.UnitOfMeasure.Pixels);

				// Get PdfGraphics that is associated with PDF page
				using (PdfGraphics graphics = PdfGraphics.FromPage(pdfPage))
				{
					// Get text that must be drawn on current page
					string drawnText = GetDrawnText(font, pageWidthPixels, pageHeightPixels, ref text);

					// Draw text on the PDF page
					graphics.DrawString(drawnText, font, brush, rect, StringFormat.GenericDefault);
				}
			} while (string.IsNullOrEmpty(text) == false);

			pdfDocument.FontManager.PackAllFonts();

			//Pack the document. This will also save it to the outputPath.
			pdfDocument.Pack(outputPath);
		}

            return outputPath;
        }
        
        
        /// <summary>
        /// Returns the text portion that must be drawn in PDF graphics on the rectangle with specified size.
        /// </summary>
        /// <param name="font">The text font.</param>
        /// <param name="pageWidth">The width of text rectangle.</param>
        /// <param name="pageHeight">The height of text rectangle.</param>
        /// <param name="text">The text.</param>
        /// <returns>The text portion that must be drawn.</returns>
        private static string GetDrawnText(Font font, float pageWidth, float pageHeight, ref string text)
        {
            if (string.IsNullOrEmpty(text))
            {
                text = "";
                return text;
            }

            float step = text.Length;
            float position = text.Length;
            int drawnTextLength = text.Length;
            string drawnText = text;
            int prevTextLength = 0;

            // Use System.Drawing.Graphics because it uses GDI+. This will more accurately measure the text length.
            using (Graphics graphics = Graphics.FromHwnd(IntPtr.Zero))
            {
                graphics.PageUnit = GraphicsUnit.Pixel;

                SizeF textSize = graphics.MeasureString(text.Substring(0, drawnTextLength), font, new SizeF(pageWidth, 1500f), StringFormat.GenericDefault);

                // If text height is greater than maximum height
                if (textSize.Height > pageHeight)
                {
                    // Find drawn text using bisection algorithm
                    while (drawnTextLength != prevTextLength)
                    {
                        step /= 2;
                        if (textSize.Height > pageHeight)
                        {
                            position -= step;
                        }
                        else
                        {
                            position += step;
                        }

                        // Get the adjusted text length and text to compare with the page height.
                        prevTextLength = drawnTextLength;
                        drawnTextLength = (int)Math.Ceiling(position);

                        drawnText = text.Substring(0, drawnTextLength).Trim();
                        textSize = graphics.MeasureString(drawnText, font, new SizeF(pageWidth, 1500f), StringFormat.GenericDefault);
                    }

                    // Fine tune text length to fit in to the page height.
                    while (textSize.Height > pageHeight)
                    {
                        drawnTextLength--;
                        drawnText = text.Substring(0, drawnTextLength).Trim();
                        textSize = graphics.MeasureString(drawnText, font, new SizeF(pageWidth, 1500f), StringFormat.GenericDefault);
                    }

                    // Add last '\n' character to the drawn text
                    if (drawnTextLength < text.Length && text[drawnTextLength] == '\n')
                    {
                        drawnTextLength++;
                    }

                    if (drawnTextLength == 0)
                    {
                        drawnTextLength = 1;
                        drawnText = text.Substring(0, 1);
                    }
                }
            }

            // Update the remaining text to determine if another page is created by caller.
            text = text.Substring(drawnTextLength);

            return drawnText;
        }
        
I cannot parse the string and add new line characters to it because the our requirements are that no new line characters are to be added to the text.

Is there a way to use the Drawing method with a PDFFont that will wrap on character?
or
Is there a way to use the Draw method with a System.Drawing font that will add the text to the page TextRegion?

Here is a sample of test text I have been using. You can just copy and paste into a txt file. :
abcdefghijklmnopqrstuvwx1abcdefghijklmnopqrstuvwx2abcdefghijklmnopqrstuvwx3abcdefghijklmnopqrstuvwx4abcdefghijklmnopqrstuvwx5abcdefghijklmnopqrstuvwx6abcdefghijklmnopqrstuvwx7abcdefghijklmnopqrstuvwx8abcdefghijklmnopqrstuvwx9abcdefghijklmnopqrstuvw10ab255
abcdefghijklmnopqrstuvwx1abcdefghijklmnopqrstuvwx2abcdefghijklmnopqrstuvwx3abcdefghijklmnopqrstuvwx4abcdefghijklmnopqrstuvwx5abcdefghijklmnopqrstuvwx6abcdefghijklmnopqrstuvwx7abcdefghijklmnopqrstuvwx8abcdefghijklmnopqrstuvwx9abcdefghijklmnopqrstuvw10ab510
abcdefghijklmnopqrstuvwx1abcdefghijklmnopqrstuvwx2abcdefghijklmnopqrstuvwx3abcdefghijklmnopqrstuvwx4abcdefghijklmnopqrstuvwx5abcdefghijklmnopqrstuvwx6abcdefghijklmnopqrstuvwx7abcdefghijklmnopqrstuvwx8abcdefghijklmnopqrstuvwx9abcdefghijklmnopqrstuvw10ab765
abcdefghijklmnopqrstuvwx1abcdefghijklmnopqrstuvwx2abcdefghijklmnopqrstuvwx3abcdefghijklmnopqrstuvwx4abcdefghijklmnopqrstuvwx5abcdefghijklmnopqrstuvwx6abcdefghijklmnopqrstuvwx7abcdefghijklmnopqrstuvwx8abcdefghijklmnopqrstuvwx9abcdefghijklmnopqrstuvw10a1020
abcdefghijklmnopqrstuvwx1abcdefghijklmnopqrstuvwx2abcdefghijklmnopqrstuvwx3abcdefghijklmnopqrstuvwx4abcdefghijklmnopqrstuvwx5abcdefghijklmnopqrstuvwx6abcdefghijklmnopqrstuvwx7abcdefghijklmnopqrstuvwx8abcdefghijklmnopqrstuvwx9abcdefghijklmnopqrstuvw10a1275
abcdefghijklmnopqrstuvwx1abcdefghijklmnopqrstuvwx2abcdefghijklmnopqrstuvwx3abcdefghijklmnopqrstuvwx4abcdefghijklmnopqrstuvwx5abcdefghijklmnopqrstuvwx6abcdefghijklmnopqrstuvwx7abcdefghijklmnopqrstuvwx8abcdefghijklmnopqrstuvwx9abcdefghijklmnopqrstuvw10a1530
abcdefghijklmnopqrstuvwx1abcdefghijklmnopqrstuvwx2abcdefghijklmnopqrstuvwx3abcdefghijklmnopqrstuvwx4abcdefghijklmnopqrstuvwx5abcdefghijklmnopqrstuvwx6abcdefghijklmnopqrstuvwx7abcdefghijklmnopqrstuvwx8abcdefghijklmnopqrstuvwx9abcdefghijklmnopqrstuvw10a1785
Alex
Site Admin
Posts: 1943
Joined: Thu Jul 10, 2008 2:21 pm

Re: Convert large text file to multi page pdf

Post by Alex »

Hi Harlan,

Thank you for information. We improved the code of example.

Here is new example that allows to convert large text file to a multipage PDF document:

Code: Select all

/// <summary>
/// Converts a text file to a PDF document.
/// </summary>
/// <param name="sourceTextFilename">The filename of source text file.</param>
/// <param name="destPdfFilename">The filename of destination PDF document.</param>
public static void ConvertTextFileToPdfDocument(
    string sourceTextFilename,
    string destPdfFilename)
{
    // font name
    string fontName = "Arial";

    // font size, in points
    int fontSize = 12;

    // text padding, in points
    Vintasoft.Imaging.PaddingF textPadding = new Vintasoft.Imaging.PaddingF(30);

    // get text from text file
    string text = System.IO.File.ReadAllText(sourceTextFilename, System.Text.Encoding.UTF8);

    // set new line to '\n' character
    text = text.Replace("\r\n", "\n");
    text = text.Replace("\n\r", "\n");
    text = text.Replace("\r", "\n");

    // create PDF document
    using (Vintasoft.Imaging.Pdf.PdfDocument document =
        new Vintasoft.Imaging.Pdf.PdfDocument(destPdfFilename, Vintasoft.Imaging.Pdf.PdfFormat.Pdf_14))
    {
        Vintasoft.Imaging.Pdf.Tree.Fonts.PdfFont font;
        // find TTF font that should be used for drawing a text
        using (System.IO.Stream fontProgramStream =
            document.FontProgramsController.GetTrueTypeFontProgram(fontName))
        {
            // create PDF font based on TTF font program
            font = document.FontManager.CreateCIDFontFromTrueTypeFont(fontProgramStream);
        }

        do
        {
            // create page of A4 size
            Vintasoft.Imaging.Pdf.Tree.PdfPage pdfPage =
                new Vintasoft.Imaging.Pdf.Tree.PdfPage(document,
                    Vintasoft.Imaging.ImageSize.FromPaperKind(Vintasoft.Imaging.PaperSizeKind.A4));

            // add page to the PDF document
            document.Pages.Add(pdfPage);

            // get PdfGraphics that is associated with PDF page
            using (Vintasoft.Imaging.Pdf.Drawing.PdfGraphics graphics =
                Vintasoft.Imaging.Pdf.Drawing.PdfGraphics.FromPage(pdfPage))
            {
                // create a brush that should be used for drawing a text
                Vintasoft.Imaging.Pdf.Drawing.PdfBrush brush =
                    new Vintasoft.Imaging.Pdf.Drawing.PdfBrush(System.Drawing.Color.Black);

                // specify a rectangle where text should be drawn
                System.Drawing.RectangleF rect = pdfPage.MediaBox;
                // apply padding
                rect = textPadding.ApplyTo(rect);

                // get text that must be drawn on current page
                string drawnText = GetDrawnText(graphics, font, fontSize, rect.Width, rect.Height, ref text);

                // draw text on the PDF page
                graphics.DrawString(drawnText, font, fontSize, brush, rect, Vintasoft.Imaging.Pdf.Drawing.PdfContentAlignment.Left, true);
            }
        } while (!string.IsNullOrEmpty(text));

        // subset font
        document.FontManager.PackAllFonts();

        // pack PDF document
        document.Pack();
    }
}

/// <summary>
/// Returns the text portion that can be drawn on specified PDF graphics in rectange with specified size.
/// </summary>
/// <param name="graphics">The PDF graphics.</param>
/// <param name="font">The text font.</param>
/// <param name="fontSize">The text font size.</param>
/// <param name="maxWidth">The width of text rectangle.</param>
/// <param name="maxHeight">The height of text rectangle.</param>
/// <param name="text">The text.</param>
/// <returns>The text portion that can be drawn in rectange.</returns>
private static string GetDrawnText(
    Vintasoft.Imaging.Pdf.Drawing.PdfGraphics graphics,
    Vintasoft.Imaging.Pdf.Tree.Fonts.PdfFont font,
    float fontSize,
    float maxWidth,
    float maxHeight,
    ref string text)
{
    float lineHeight = fontSize;

    System.Text.StringBuilder textToDraw = new System.Text.StringBuilder();

    float totalHeight = 0;
    string line = null;

    while (text != null || line != null)
    {
        // if there is not text line to process
        if (line == null)
            // cut next line from source text
            line = CutTextPart(ref text, '\n');

        // add line to the result text
        textToDraw.Append(GetDrawnLine(graphics, font, fontSize, maxWidth, ref line));
        textToDraw.Append("\n");

        // increase height of result text
        totalHeight += lineHeight;
        // if height of result text is greater than rectangle height
        if (totalHeight + lineHeight > maxHeight)
            break;
    }

    if (line != null)
        text = line + '\n' + text;

    return textToDraw.ToString();
}

/// <summary>
/// Returns the line portion that must be drawn on specified PDF graphics in rectange with specified size.
/// </summary>
/// <param name="graphics">The PDF graphics.</param>
/// <param name="font">The text font.</param>
/// <param name="fontSize">The text font size.</param>
/// <param name="maxWidth">The width of text rectangle.</param>
/// <param name="line">The text line.</param>
/// <returns>The text portion that must be drawn.</returns>
private static string GetDrawnLine(
    Vintasoft.Imaging.Pdf.Drawing.PdfGraphics graphics,
    Vintasoft.Imaging.Pdf.Tree.Fonts.PdfFont font,
    float fontSize,
    float maxWidth,
    ref string line)
{
    string drawnLine = null;

    string word = null;

    while (line != null)
    {
        // cut next word from line
        word = CutTextPart(ref line, ' ');

        // create next drawn line
        string nextDrawnLine;
        if (drawnLine != null)
            nextDrawnLine = drawnLine.ToString() + ' ' + word;
        else
            nextDrawnLine = word;

        // measure next drawn line
        float currentWidth, currentHeight;
        graphics.MeasureString(nextDrawnLine, font, fontSize, float.MaxValue, false, out currentWidth, out currentHeight);

        // if next draw line width greater than max width
        if (currentWidth > maxWidth)
        {
            // if drawn line is empty
            if (drawnLine == null)
            {
                // add part of word to drawn line
                drawnLine = word;
                word = "";
                do
                {
                    word = drawnLine.Substring(drawnLine.Length - 1) + word;
                    drawnLine = drawnLine.Substring(0, drawnLine.Length - 1);
                    graphics.MeasureString(drawnLine.ToString(), font, fontSize, float.MaxValue, false, out currentWidth, out currentHeight);
                }
                while (currentWidth > maxWidth);
            }
            break;
        }

        if (drawnLine != null)
            drawnLine += ' ';
        drawnLine += word;
        word = null;
    }

    if (word != null)
        line = word + ' ' + line;

    return drawnLine;
}

/// <summary>
/// Cuts a text part from text start to the specified separator.
/// </summary>
/// <param name="text">The source text.</param>
/// <param name="separator">The separator.</param>
/// <returns>The result text.</returns>
private static string CutTextPart(ref string text, char separator)
{
    int newLineIndex = text.IndexOf(separator);
    string result;
    if (newLineIndex < 0)
    {
        result = text;
        text = null;
        return result;
    }
    result = text.Substring(0, newLineIndex);
    if (newLineIndex == text.Length - 1)
        text = null;
    else
        text = text.Substring(newLineIndex + 1);
    return result;
}
Best regards, Alexander
Post Reply