python3 PyPDF2分割pdf

网友投稿 249 2022-11-06


python3 PyPDF2分割pdf

首先需要安装:

pip install PyPDF2

批量切割

然后利用下面的代码:

from PyPDF2 import PdfFileReader, PdfFileWriter# PDF文件分割def split_pdf(read_file, out_detail): try: fp_read_file = open(read_file, 'rb') pdf_input = PdfFileReader(fp_read_file) # 将要分割的PDF内容格式话 page_count = pdf_input.getNumPages() # 获取PDF页数 print(page_count) # 打印页数 with open(out_detail, 'r',True,'utf-8')as fp: # print(fp) txt = fp.readlines() # print(txt) for detail in txt: # 打开分割标准文件 # print(type(detail)) pages= detail.strip() # 空格分组 # write_file, write_ext = os.path.splitext(write_file) # 用于返回文件名和扩展名元组 pdf_file = f'{pages}.pdf' # liststr=list(map(int, pages.split('-'))) # print(type(liststr)) start_page, end_page = list(map(int, pages.split('-'))) # 将字符串数组转换成整形数组 start_page -= 1 try: print(f'开始分割{start_page}页-{end_page}页,保存为{pdf_file}......') pdf_output = PdfFileWriter() # 实例一个 PDF文件编写器 for i in range(start_page, end_page): pdf_output.addPage(pdf_input.getPage(i)) with open(pdf_file, 'wb') as sub_fp: pdf_output.write(sub_fp) print(f'完成分割{start_page}页-{end_page}页,保存为{pdf_file}!') except IndexError: print(f'分割页数超过了PDF的页数') # fp.close() except Exception as e: print(e) finally: fp_read_file.close()split_pdf('./高考模拟卷(6套)/试卷/2020模拟卷语文·试卷.pdf', 'config.txt')

config.txt为起始页文件,我的config.txt文件为:

1-45-89-1213-1617-2021-24

切割指定页面的pdf

from PyPDF2 import PdfFileReader, PdfFileWriterdef split_single_pdf(read_file,start_page,end_page,pdf_file): fp_read_file = open(read_file, 'rb') pdf_output = PdfFileWriter() # 实例一个 PDF文件编写器 pdf_input = PdfFileReader(fp_read_file) # 将要分割的PDF内容格式话 for i in range(start_page, end_page): pdf_output.addPage(pdf_input.getPage(i)) with open(pdf_file, 'wb') as sub_fp: pdf_output.write(sub_fp) print(f'完成分割{start_page}页-{end_page}页,保存为{pdf_file}!')pdf_name='Automatic Chinese Spelling Checking and Correction Based on Character-Based Pre-trained Contextual Representations.pdf'split_single_pdf('2019_Book_NaturalLanguageProcessingAndCh.pdf',569,579,pdf_name)

参考文献

[1].python分割PDF.


版权声明:本文内容由网络用户投稿,版权归原作者所有,本站不拥有其著作权,亦不承担相应法律责任。如果您发现本站中有涉嫌抄袭或描述失实的内容,请联系我们jiasou666@gmail.com 处理,核实后本网站将在24小时内删除侵权内容。

上一篇:ubuntu 18.04安装向日葵的时候出现缺少libwebkitgtk 3.0
下一篇:Java中Easypoi实现excel多sheet表导入导出功能
相关文章

 发表评论

暂时没有评论,来抢沙发吧~