我和同事分别用.net 和 python来实现这个功能。
做好后发现.net 转的时间很慢,python代码少,时间快,最终采用了python 我写的代码。
比较特殊的是poppler-0.68.0。大家可以参考下百度。
具体代码如下:
from pdf2image import convert_from_path
from pathlib import Path
from os import listdir
from PIL import Image
import os,time
from shutil import copyfile
import shutil
def pdf_to_image(pdf_filename):
#判断路径是否存在
if not pdf_filename.upper().endswith(".PDF"):
return
print(‘处理 pdf_filename:‘, pdf_filename)
filename_withoutext = pdf_filename.split(‘.‘)[0]
out_path = Path(filename_withoutext)
print(‘out_path‘,out_path)
out_path_full = os.path.join(path_to_watch, out_path)
print(‘完整路径:‘,out_path_full)
out_path_full_check=Path(out_path_full)
if not out_path_full_check.exists():
print(‘创建目录:‘, out_path_full)
os.mkdir(out_path_full)
print(‘开始转换‘)
pdf_filename = os.path.join(path_to_watch, pdf_filename)
print(‘filename:‘, pdf_filename)
pages = convert_from_path(pdf_filename, dpi=400, output_folder=None, fmt="JPEG",
thread_count=5)
pindex=1
for p in pages:
p_f = os.path.join(out_path_full,str(pindex)+‘.jpg‘)
p.save(p_f)
pindex=pindex+1
time.sleep(1)
print(‘转换完成‘)
contact_image(out_path_full)
print(‘合并完成‘)
path_file = pdf_filename.split(‘.‘)[0]
sub_path = os.path.join(path_to_watch, path_file)
print(‘删除目录‘, sub_path)
shutil.rmtree(sub_path)
def watch():
while 1:
time.sleep(3)
#print(‘扫描目录的PDF文件‘)
pdf_files = dict([(f, None) for f in os.listdir(path_to_watch) if f.upper().endswith(‘.PDF‘) ])
for f in pdf_files:
f_full = os.path.join(path_to_watch, f)
f_jpg=f.split(‘.‘)[0]+‘.jpg‘
f_jpg_full=os.path.join(path_to_watch,f_jpg)
print(f_jpg_full)
if not os.path.exists(f_jpg_full):
print(f_full)
time.sleep(1)
print(‘文件名:‘, f_full)
pdf_to_image(f)
#return
#while 1:
#return
# before = dict([(f, None) for f in os.listdir(path_to_watch)])
# time.sleep(1)
# after = dict([(f,None) for f in os.listdir(path_to_watch)])
# added = [ f for f in after if not f in before]
# removed =[f for f in before if not f in after]
# if added:
# for f_add in added:
# time.sleep(1)
# print(‘文件名:‘,os.path.join(path_to_watch,f_add))
# pdf_to_image(f_add)
# path_file=f_add.split(‘.‘)[0]
# print(‘删除目录‘)
# shutil.rmtree(os.path.join(path_to_watch, path_file))
# if removed:
# for f_r in removed:
# print(‘删除:‘, os.path.join(path_to_watch, f_r))
# before = after
def open_image(out_path_full,fn):
image_file = os.path.join(out_path_full,fn)
print(‘打开图片路径‘, image_file)
return Image.open(image_file)
def contact_image(out_path_full):
print(‘开始合并‘)
print(‘合并路径:‘,out_path_full)
image_list = [open_image(out_path_full, fn)
for fn in listdir(out_path_full) if fn.endswith(‘.jpg‘)]
print(‘图片数量:‘,len(image_list))
images=[]
width=0
height=0
total_height=0
max_width=0
for i in image_list:
if i.size[0]>width or i.size[1]>height:
width, height = i.size
print(‘width %d,height %d ‘ % (width, height))
if height>width:
new_image = i.resize((1102, 1564), Image.BILINEAR) # 551*782
images.append(new_image)
total_height = total_height+1564
max_width=1102
else:
new_image = i.resize((1102, 776), Image.BILINEAR) # 551*782
images.append(new_image)
total_height = total_height+776
max_width = 1102
result = Image.new(images[0].mode, (max_width, total_height), "white")
print(‘total_height:‘, total_height)
save_path = out_path_full+".jpg"
#copy_to=out_path_full+".swf"
print(‘save path:‘,save_path)
height_total=0
for i,im in enumerate(images):
height_im=im.size[1]
print(‘height_im %d‘ % height_im)
result.paste(im, box=(0, height_total))
result.save(save_path)
height_total = height_total+height_im
#copyfile(save_path,copy_to)
path_to_watch = "D:\\PDFS"
print(‘监听目录:‘, path_to_watch)
if __name__==‘__main__‘:
watch()
原文:https://www.cnblogs.com/xiaoyichong/p/14301059.html