# 多媒体

# 图片转pdf

from PIL import Image
from PyPDF2 import PdfReader, PdfWriter

import os
import io
from typing import List
import re

re_img = re.compile("(.*?)_[0-9]{1,3}\.jpg")


def imgs2pdf(image_files:List[str],pdf_name=None):
    # 打开一个空白的PDF文件对象
    pdf_output = PdfWriter()
    # 遍历每个图片文件
    for image_file in image_files:
        # 打开图片文件
        image = Image.open(image_file)
        # 将图片转换为PDF页面
        img_io = io.BytesIO()
        image.save(img_io, format='pdf')
        pdf_page = PdfReader(img_io).pages[0]
        pdf_output.add_page(pdf_page)

    # 保存合并后的PDF文件
    with open(pdf_name, "wb") as f:
        pdf_output.write(f)

def get_similar_imgs(path):
    for root, dirs, files in os.walk(path):
        mk_set = set()
        resp_list = []
        files.sort()
        for f in files:
            if f in mk_set:
                continue
            r = re_img.match(f)
            if r is None:
                continue
            _name = r.groups()[0]
            mk_set.add(f)
            resp_list.append(os.path.join(root,f))
            for ff in files:
                if ff in mk_set:
                    continue
                if _name in ff:
                    mk_set.add(ff)
                    resp_list.append(os.path.join(root,ff))

            yield resp_list,_name
            resp_list = []


def qh():
    for img_files,_name in get_similar_imgs("C:\\Users\\wangl\\Desktop\\全量数据"):
        pdf_name = os.path.join(os.path.dirname(img_files[0]),_name+".pdf")
        print("pdf_name: ",pdf_name)
        print("img_files: ",img_files)
        imgs2pdf(image_files=img_files,pdf_name=pdf_name)

qh()