(Internet-driven)HTML to PDF with Java

Ah most converters seems to make no sense for me as they producing nothing useable or are overloaded.

I just wanna try to convert a HTML Page into a PDF. Searching through internet bored me down as no concret easy solution(for free) exist. Or im blind, so i can not see this ;-)

First try to achieve “my” easy workaround for this matter:

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.StringWriter;
import java.net.URL;
import java.util.UUID;

import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.xerces.parsers.DOMParser;
import org.cyberneko.html.HTMLConfiguration;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xhtmlrenderer.pdf.ITextRenderer;
import org.xml.sax.InputSource;

import com.lowagie.text.DocumentException;

public class Html2Pdf {

	public static String source = "http://www.wikipedia.org";

	public static InputSource getRemoteResource(){

		try {
			//Internet
			return new InputSource(new URL(source).openConnection().getInputStream());
			//FileOperation:
			//return new InputSource(new FileInputStream(source));
		} catch (Exception e) {
			throw new RuntimeException(e);
		}
	}

	public static void prepareImageTags(Document document){
		NodeList nodes = document.getElementsByTagName("img");
        for(int i = 0; i < nodes.getLength(); i++){
        	NamedNodeMap attributes = nodes.item(i).getAttributes();
        	Node srcNode = attributes.getNamedItem("src");
        	String nodeValue = srcNode.getNodeValue();

        	if(nodeValue.startsWith("http")){}
        	else if(nodeValue.startsWith(source) != true){
        		if(nodeValue.startsWith("/") != true)
        			srcNode.setNodeValue(nodeValue + "/");

        		srcNode.setNodeValue(source + srcNode.getNodeValue());
        	}
        }
	}

    public static void main(String[] args)
            throws IOException, DocumentException, Exception {

        String outputFile = "output.pdf";
        OutputStream os = new FileOutputStream(outputFile);

        DOMParser parser = new DOMParser(new HTMLConfiguration());
        parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
        parser.setFeature("http://cyberneko.org/html/features/augmentations", true);

        parser.parse( getRemoteResource() );
        Document doc = parser.getDocument();
        prepareImageTags(doc);

        Document document = parser.getDocument();

        StringWriter stringWriter = new StringWriter();
        StreamResult streamResult = new StreamResult(stringWriter);
        TransformerFactory transformerFactory = TransformerFactory.newInstance();
        Transformer transformer = transformerFactory.newTransformer();

        transformer.setOutputProperty(OutputKeys.INDENT, "yes");
        transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
        //we need this strictly as jtidy requires this
        transformer.setOutputProperty(OutputKeys.METHOD, "xml");

        //for some encoding issues, we need to set explictly the preferred output encoding
        transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        //should be omitted the 
        transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, “yes”);
        transformer.transform(new DOMSource(document.getDocumentElement()), streamResult);

        //Creating a tempfile and deleting it

        File tempFile = File.createTempFile(”pre_”+UUID.randomUUID().toString()+”_”, “.html”);
        //tempFile.deleteOnExit();
        OutputStreamWriter  fw = new OutputStreamWriter(new FileOutputStream (tempFile), “UTF-8″);
        fw.write(stringWriter.toString());
        fw.close();

        ITextRenderer renderer = new ITextRenderer();

        renderer.setDocument(tempFile);

        renderer.layout();
        renderer.createPDF(os);
        os.close();
    }
}

Comments

Leave a Reply




Security Code: