BCL easyConverter SDK HTML
easyConverter SDK Usermanual
PDF-to-HTML Programming API  |  Download Free Trial  |  Contact Us to Purchase

ConvertToHTML2 Method

Convert a PDF document to an Array containing HTML stream, image streams and image file name streams as they are referenced from HTML. Method ignores AbsolutePositioning property.

byte[][] ConvertToHTML2(string InputFileName,
                        string Password,
                        int From,
                        int To)

Function ConvertToHTML2(InputFileName As String(),
                        Password As String,
                        From As Integer,
                        To As Integer)
                        As Byte()()
def ConvertToHTML2(self, strInputFileName, strPassword, intFrom, intTo)




byte[][] ConvertToHTML2(String InputFileName,
                        String Password,
                        int From,
                        int To) throws PDF2HTMLException

function ConvertToHTML2($strInputFileName, $strPassword, $intFrom, $intTo)




BclHthrError ConvertToHTML2(const wchar_t * InputFileName,
                            const wchar_t * Password,
                            int From,
                            int To,
                            unsigned char * * OutStream);
Function ConvertToHTML2(InputFileName As String,
                        [Password] As Variant,
                        [From] As Variant,
                        [To] As Variant)
                        As Variant

Parameters

InputFileName

Input file name in absolute path format.

Password (optional)

Password to open the PDF document if any.

From (optional)

The starting page number to convert.

To (optional)

The ending page number to convert.

Return Values

Array of streams. Array has 1 + 2N elements, where N is the number of images referenced in the HTML document. First stream in the array contains HTML code. Second contains the name of the first image referenced in the HTML (if exists). Third contains image data for the first image, and etc...

Exception Handling

Please refer to the list of return exceptions.

Example Usage in C# COM object

using BCL.easyConverter.HTML;
...
PDF2HTML pdf2html = new PDF2HTML();
try
{
   byte[][] data = pdf2html.ConvertToHTML2(pdfFileName);
   System.IO.File.WriteAllBytes(htmlFileName, data[0]);
   for(int i = 1; i < data.Length - 1; i += 2)
   {
      System.IO.File.WriteAllBytes(htmlFilePath + System.Text.Encoding.ASCII.GetString(data[i]), data[i + 1]);
   }
}
catch(PDF2HTMLException ex)
{
   Console.WriteLine(ex.Message);
}
finally
{
   pdf2html.Dispose();
}
Imports BCL.easyConverter.HTML
...
Dim pdf2html As New PDF2HTML()
Dim data As Byte()()
Dim i As Integer
Try
   data = pdf2html.ConvertToHTML2(pdfFileName)
   System.IO.File.WriteAllBytes(htmlFileName, data(0))
   i = 1
   While i < data.Length - 1
      System.IO.File.WriteAllBytes(htmlFilePath + System.Text.Encoding.ASCII.GetString(data(i)), data(i + 1))
      i = i + 2
   End While
Catch ex As PDF2HTMLException
   System.Console.WriteLine(ex.Message)
Finally
   pdf2html.Dispose()
End Try
import PDF2HTML
import os.path

# writes a binary memory stream to a file
def file_put_contents(filename, data):
   f = open(filename, "wb")
   try:
      f.write(data)
   finally:
      f.close()

pdf2html = PDF2HTML.PDF2HTML()
try:    
   inputFilename = "c:\\test\\input.pdf"
   outputFilename = "c:\\test\\output.html"
   output = pdf2html.ConvertToHTML2(inputFilename)
   file_put_contents(outputFilename, output[0])
   i = 1
   while i < len(output) - 1:
      file_put_contents(os.path.dirname(outputFilename) + "\\" + str(output[i], "utf-8"), output[i + 1])
      i += 2
except PDF2HTML.PDF2HTMLException as ex:
   print(ex)
import com.bcl.easyconverter.html.*;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;

public class TestConverterMem
{
   public static void main(String[] args) throws Exception
   {
      if (args.length == 2)
      {
         File inputFile = new File(args[0]);
         String inputFileName = inputFile.getCanonicalPath();

         File htmlFile = new File(args[1]);
         String htmlFileName = htmlFile.getCanonicalPath();

         IPDF2HTML pdf2html = new IPDF2HTML();

         try
         {
            byte[][] output = pdf2html.ConvertToHTML2(inputFileName, "", -1, -1);
            int outputCount = output.length;
            byte[] htmlStream = output[0];
            FileOutputStream htmlFileStream = new FileOutputStream(htmlFile.getCanonicalPath());
            htmlFileStream.write(htmlStream);
            int imagesCount = (outputCount - 1) / 2;
            System.out.print("Number of images = ");
            System.out.println(imagesCount);
            for(int i = 0; i < imagesCount; ++i)
            {   
               String imageFilename = new String(output[i * 2 + 1], "US-ASCII");
               byte[] imageStream = output[i * 2 + 2];
               File imageFile = new File(new File(htmlFileName).getParent(), imageFilename); // compose path from output directory + image filename
               FileOutputStream imageFileStream = new FileOutputStream(imageFile);
               imageFileStream.write(imageStream);
            }
         }
         finally
         {
            pdf2html.dispose();
         }

      }
      else
      {
         System.out.println("Usage: java TestConverterMem  
"); System.out.println("For example:"); System.out.println("java TestConverterMem c:\\input\\smile.pdf c:\\output\\smile.html"); } } }
<?php
require("PDF2HTML.php");
$pdf2html = new BCL\easyConverter\HTML\PDF2HTML();
$inputFilename = "c:\\test\\input.pdf";
$outputFilename = "c:\\test\\output.html";
$output = $pdf2html->ConvertToHTML2($inputFilename);
file_put_contents($outputFilename, $output[0]);
for($i = 1; $i < count($output) - 1; $i += 2)
   file_put_contents(dirname($outputFilename) . "\\" . $output[$i], $output[$i + 1]);
?>

Sample Code Explanation

Here is how the function works. ConvertToHTML2 and ConvertToHTML3 return an array of streams, where each stream is an array of bytes. In other words, the output is an array of array of bytes.

The output array has 1 + 2 * N elements, where N is the number of images. Even when there are no images, there is at least one stream, which contains the HTML output.

Stream 0 is always the HTML content. If other streams are present, stream 1 is the name of the first image file (ASCII encoded). Stream 2 is the first image file. Stream 3 is the name of the second image file. Stream 4 is the second image file. And so on and so forth. There is always exactly one HTML stream, but any number of images may be present (even zero).

Since the image file names are always ASCII byte arrays, they must be converted into a proper Unicode string first. We can consider ASCII to be the same as UTF-8. The fact that the file name is ASCII instead of real UTF-8 doesn't cause any problems, since we only have basic letters, numbers, .jpg and .png in the filename.

Note that the image filename is not a full path, only the name + extension portion, such as "1x1.jpg". The image files must go to the same directory as the HTML file, because that path is hard-coded inside the HTML.

The HTML file's content is not a string, but a byte array, and it should be considered binary, instead of text. The image files are pure binary as well.