Java:Using Apache Poi How to Convert Ms Word File to Pdf

Java:using apache POI how to convert ms word file to pdf?

Got It solved

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.OutputStream;

import com.lowagie.text.Document;
import com.lowagie.text.DocumentException;
import com.lowagie.text.Paragraph;
import com.lowagie.text.pdf.PdfWriter;


import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;

import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;


public class TestCon {

/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub

POIFSFileSystem fs = null;
Document document = new Document();

try {
System.out.println("Starting the test");
fs = new POIFSFileSystem(new FileInputStream("D:/Resume.doc"));

HWPFDocument doc = new HWPFDocument(fs);
WordExtractor we = new WordExtractor(doc);

OutputStream file = new FileOutputStream(new File("D:/test.pdf"));

PdfWriter writer = PdfWriter.getInstance(document, file);

Range range = doc.getRange();
document.open();
writer.setPageEmpty(true);
document.newPage();
writer.setPageEmpty(true);

String[] paragraphs = we.getParagraphText();
for (int i = 0; i < paragraphs.length; i++) {

org.apache.poi.hwpf.usermodel.Paragraph pr = range.getParagraph(i);
// CharacterRun run = pr.getCharacterRun(i);
// run.setBold(true);
// run.setCapitalized(true);
// run.setItalic(true);
paragraphs[i] = paragraphs[i].replaceAll("\\cM?\r?\n", "");
System.out.println("Length:" + paragraphs[i].length());
System.out.println("Paragraph" + i + ": " + paragraphs[i].toString());

// add the paragraph to the document
document.add(new Paragraph(paragraphs[i]));
}

System.out.println("Document testing completed");
} catch (Exception e) {
System.out.println("Exception during test");
e.printStackTrace();
} finally {
// close the document
document.close();
}
}
}

Converting word(.docx) to pdf using Apache POI and itext

You need to install MS Office before running, then:

import com.documents4j.api.DocumentType; 
import com.documents4j.api.IConverter;
import com.documents4j.job.LocalConverter;
import org.apache.commons.io.output.ByteArrayOutputStream;

import java.io.*;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;

public class Converter{
public static void main(String[] args) throws IOException, ExecutionException, InterruptedException {
ByteArrayOutputStream bo = new ByteArrayOutputStream();

InputStream in = new BufferedInputStream(new FileInputStream("d:\\input.docx"));
IConverter converter = LocalConverter.builder()
.baseFolder(new File("D:\\input"))
.workerPool(20, 25, 2, TimeUnit.SECONDS)
.processTimeout(5, TimeUnit.SECONDS)
.build();

Future<Boolean> conversion = converter
.convert(in).as(DocumentType.MS_WORD)
.to(bo).as(DocumentType.PDF)
.prioritizeWith(1000) // optional
.schedule();
conversion.get();
try (OutputStream outputStream = new FileOutputStream("D:\\output.pdf")) {
bo.writeTo(outputStream);
} catch (IOException e) {
e.printStackTrace();
}
in.close();
bo.close();
}
}

These are the necessary maven dependencies:

<dependency>
<groupId>com.documents4j</groupId>
<artifactId>documents4j-api</artifactId>
<version>0.2.1</version>
</dependency>
<dependency>
<groupId>com.documents4j</groupId>
<artifactId>documents4j-util-conversion</artifactId>
<version>0.2.1</version>
</dependency>
<dependency>
<groupId>com.documents4j</groupId>
<artifactId>documents4j-transformer</artifactId>
<version>0.2.1</version>
</dependency>
<dependency>
<groupId>com.documents4j</groupId>
<artifactId>documents4j-util-all</artifactId>
<version>0.2.1</version>
</dependency>
<dependency>
<groupId>com.documents4j</groupId>
<artifactId>documents4j-local</artifactId>
<version>0.2.1</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.8.0-beta2</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>1.8.0-beta2</version>
</dependency>
<dependency>
<groupId>com.documents4j</groupId>
<artifactId>documents4j-util-standalone</artifactId>
<version>1.0.3</version>
</dependency>
<dependency>
<groupId>com.documents4j</groupId>
<artifactId>documents4j-transformer-msoffice-word</artifactId>
<version>1.0.3</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>23.0</version>
</dependency>

Enjoy!



Related Topics



Leave a reply



Submit