BCL easyConverter SDK HTML
easyConverter SDK Usermanual
PDF-to-HTML Programming API  |  Download Free Trial  |  Contact Us to Purchase

ConvertToHTML3 Method

Convert a PDF stream to an Array containing HTML stream, image streams and image file name streams as they are referenced from HTML. Method ignores AbsolutePositioning property.

byte[][] ConvertToHTML3(byte[] InStream,
                        string Password,
                        int From,
                        int To)

Function ConvertToHTML3(InStream As Byte(),
                        Password As String,
                        From As Integer,
                        To As Integer)
                        As Byte()()
def ConvertToHTML3(self, binaryInStream, strPassword, intFrom, intTo)




byte[][] ConvertToHTML3(byte[] InStream,
                        String Password,
                        int From,
                        int To) throws PDF2HTMLException

function ConvertToHTML3($binaryInStream, $strPassword, $intFrom, $intTo)




BclHthrError ConvertToHTML3(const unsigned char * InStream,
                            const wchar_t * Password,
                            int From,
                            int To,
                            unsigned char * * OutStream);
Function ConvertToHTML3(InStream As Variant,
                        [Password] As Variant,
                        [From] As Variant,
                        [To] As Variant)
                        As Variant

Parameters

InStream

Input PDF stream. This is a variant array of bytes.

Password (optional)

Password to open the PDF document if any.

From (optional)

The starting page number to convert.

To (optional)

The ending page number to convert.

Return Values

Array of streams (array of array of bytes). The array has 1 + 2N elements, where N is the number of images referenced in the HTML document. The first stream in the array contains the HTML code. The second stream contains the name of the first image referenced in the HTML (if exists), as an ASCII-encoded byte array. The third stream contains the image data for the first image. The image name and data streams keep alternating until all images have been listed.

Exception Handling

Please refer to the list of return exceptions.

Example Usage

using BCL.easyConverter.HTML;
...
PDF2HTML pdf2html = new PDF2HTML();
byte[] pdfBytes = System.IO.File.ReadAllBytes(pdfFileName);
try
{
   byte[][] data = pdf2html.ConvertToHTML3(pdfBytes);
   System.IO.File.WriteAllBytes(htmlFileName, data[0]);
   for(int i = 1; i < data.Length - 1; i += 2)
   {
      System.IO.File.WriteAllBytes(htmlFilePath + System.Text.Encoding.ASCII.GetString(data[i]), data[i + 1]);
   }
}
catch(PDF2HTMLException ex)
{
   Console.WriteLine(ex.Message);
}
finally
{
   pdf2html.Dispose();
}
Imports BCL.easyConverter.HTML
...
Dim pdf2html As New PDF2HTML()
Dim pdfBytes As Byte()
Dim data As Byte()()
Dim i As Integer
pdfBytes = System.IO.File.ReadAllBytes(pdfFileName)
Try
   data = pdf2html.ConvertToHTML3(pdfBytes)
   System.IO.File.WriteAllBytes(htmlFileName, data(0))
   i = 1
   While i < data.Length - 1
      System.IO.File.WriteAllBytes(htmlFilePath + System.Text.Encoding.ASCII.GetString(data(i)), data(i + 1))
      i = i + 2
   End While
Catch ex As PDF2HTMLException
   System.Console.WriteLine(ex.Message)
Finally
   pdf2html.Dispose()
End Try
import PDF2HTML
import os.path

# reads a file into a binary memory stream
def file_get_contents(filename):
   f = open(filename, "rb")
   try:
      return f.read()
   finally:
      f.close()

# writes a binary memory stream to a file
def file_put_contents(filename, data):
   f = open(filename, "wb")
   try:
      f.write(data)
   finally:
      f.close()

pdf2html = PDF2HTML.PDF2HTML()
try:    
   inputFilename = "c:\\test\\input.pdf"
   outputFilename = "c:\\test\\output.html"
   inputStream = file_get_contents(inputFilename)
   output = pdf2html.ConvertToHTML3(inputStream)
   file_put_contents(outputFilename, output[0])
   i = 1
   while i < len(output) - 1:
      file_put_contents(os.path.dirname(outputFilename) + "\\" + str(output[i], "utf-8"), output[i + 1])
      i += 2
except PDF2HTML.PDF2HTMLException as ex:
   print(ex)
import com.bcl.easyconverter.html.*;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;

public class TestConverterMem
{
   public static void main(String[] args) throws Exception
   {
      if (args.length == 2)
      {
         File inputFile = new File(args[0]);
         String inputFileName = inputFile.getCanonicalPath();

         File htmlFile = new File(args[1]);
         String htmlFileName = htmlFile.getCanonicalPath();

         IPDF2HTML pdf2html = new IPDF2HTML();

         try
         {
            FileInputStream inputFileStream = new FileInputStream(inputFile.getCanonicalPath());
            byte[] inputStream = new byte[(int)inputFile.length()];
            inputFileStream.read(inputStream);
            byte[][] output = pdf2html.ConvertToHTML3(inputStream, "", -1, -1);
            int outputCount = output.length;
            byte[] htmlStream = output[0];
            FileOutputStream htmlFileStream = new FileOutputStream(htmlFile.getCanonicalPath());
            htmlFileStream.write(htmlStream);
            int imagesCount = (outputCount - 1) / 2;
            System.out.print("Number of images = ");
            System.out.println(imagesCount);
            for(int i = 0; i < imagesCount; ++i)
            {   
               String imageFilename = new String(output[i * 2 + 1], "US-ASCII");
               byte[] imageStream = output[i * 2 + 2];
               File imageFile = new File(new File(htmlFileName).getParent(), imageFilename); // compose path from output directory + image filename
               FileOutputStream imageFileStream = new FileOutputStream(imageFile);
               imageFileStream.write(imageStream);
            }
         }
         finally
         {
            pdf2html.dispose();
         }

      }
      else
      {
         System.out.println("Usage: java TestConverterMem  
"); System.out.println("For example:"); System.out.println("java TestConverterMem c:\\input\\smile.pdf c:\\output\\smile.html"); } } }
<?php
require("PDF2HTML.php");
$pdf2html = new BCL\easyConverter\HTML\PDF2HTML();
$inputFilename = "c:\\test\\input.pdf";
$outputFilename = "c:\\test\\output.html";
$inputStream = file_get_contents($inputFilename);
$output = $pdf2html->ConvertToHTML3($inputStream);
file_put_contents($outputFilename, $output[0]);
for($i = 1; $i < count($output) - 1; $i += 2)
   file_put_contents(dirname($outputFilename) . "\\" . $output[$i], $output[$i + 1]);
?>

Sample Code Explanation

Here is how the function works. ConvertToHTML2 and ConvertToHTML3 return an array of streams, where each stream is an array of bytes. In other words, the output is an array of array of bytes.

The output array has 1 + 2 * N elements, where N is the number of images. Even when there are no images, there is at least one stream, which contains the HTML output.

Stream 0 is always the HTML content. If other streams are present, stream 1 is the name of the first image file (ASCII encoded). Stream 2 is the first image file. Stream 3 is the name of the second image file. Stream 4 is the second image file. And so on and so forth. There is always exactly one HTML stream, but any number of images may be present (even zero).

Since the image file names are always ASCII byte arrays, they must be converted into a proper Unicode string first. We can consider ASCII to be the same as UTF-8. The fact that the file name is ASCII instead of real UTF-8 doesn't cause any problems, since we only have basic letters, numbers, .jpg and .png in the filename.

Note that the image filename is not a full path, only the name + extension portion, such as "1x1.jpg". The image files must go to the same directory as the HTML file, because that path is hard-coded inside the HTML.

The HTML file's content is not a string, but a byte array, and it should be considered binary, instead of text. The image files are pure binary as well.