admin管理员组文章数量:1130349
添加依赖
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.22</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>5.0.0</version>
</dependency>
代码
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
public class PdfToWordConverter {
public static void main(String[] args) throws IOException {
File file = new File("./input.pdf");
convertToWord(file, new File("./output.docx"));
}
public static String extractTextFromPdf(File file) throws IOException {
PDDocument document = PDDocument.load(file);
PDFTextStripper stripper = new PDFTextStripper();
String text = stripper.getText(document);
document.close();
return text;
}
public static void convertToWord(File pdf, File docx) throws IOException {
String text = extractTextFromPdf(pdf);
XWPFDocument document = new XWPFDocument();
XWPFParagraph paragraph = document.createParagraph();
XWPFRun run = paragraph.createRun();
run.setText(text);
FileOutputStream out = new FileOutputStream(docx);
document.write(out);
out.close();
document.close();
}
}
添加依赖
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.22</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>5.0.0</version>
</dependency>
代码
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
public class PdfToWordConverter {
public static void main(String[] args) throws IOException {
File file = new File("./input.pdf");
convertToWord(file, new File("./output.docx"));
}
public static String extractTextFromPdf(File file) throws IOException {
PDDocument document = PDDocument.load(file);
PDFTextStripper stripper = new PDFTextStripper();
String text = stripper.getText(document);
document.close();
return text;
}
public static void convertToWord(File pdf, File docx) throws IOException {
String text = extractTextFromPdf(pdf);
XWPFDocument document = new XWPFDocument();
XWPFParagraph paragraph = document.createParagraph();
XWPFRun run = paragraph.createRun();
run.setText(text);
FileOutputStream out = new FileOutputStream(docx);
document.write(out);
out.close();
document.close();
}
}
版权声明:本文标题:使用JAVA将PDF转WORD 内容由热心网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:https://it.en369.cn/jiaocheng/1763672538a2953257.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。


发表评论