# 多媒体
# 图片转pdf
from PIL import Image
from PyPDF2 import PdfReader, PdfWriter
import os
import io
from typing import List
import re
re_img = re.compile("(.*?)_[0-9]{1,3}\.jpg")
def imgs2pdf(image_files:List[str],pdf_name=None):
# 打开一个空白的PDF文件对象
pdf_output = PdfWriter()
# 遍历每个图片文件
for image_file in image_files:
# 打开图片文件
image = Image.open(image_file)
# 将图片转换为PDF页面
img_io = io.BytesIO()
image.save(img_io, format='pdf')
pdf_page = PdfReader(img_io).pages[0]
pdf_output.add_page(pdf_page)
# 保存合并后的PDF文件
with open(pdf_name, "wb") as f:
pdf_output.write(f)
def get_similar_imgs(path):
for root, dirs, files in os.walk(path):
mk_set = set()
resp_list = []
files.sort()
for f in files:
if f in mk_set:
continue
r = re_img.match(f)
if r is None:
continue
_name = r.groups()[0]
mk_set.add(f)
resp_list.append(os.path.join(root,f))
for ff in files:
if ff in mk_set:
continue
if _name in ff:
mk_set.add(ff)
resp_list.append(os.path.join(root,ff))
yield resp_list,_name
resp_list = []
def qh():
for img_files,_name in get_similar_imgs("C:\\Users\\wangl\\Desktop\\全量数据"):
pdf_name = os.path.join(os.path.dirname(img_files[0]),_name+".pdf")
print("pdf_name: ",pdf_name)
print("img_files: ",img_files)
imgs2pdf(image_files=img_files,pdf_name=pdf_name)
qh()