PDFCropper/main.py

# import argparse
import tkinter as tk
from cropper import crop
import traceback

def str2bool(string):
    positive = ['true',
        't',
        'y',
        'yes',
        '1',
        'correct',
        'accept',
        'positive'
    ]
    if string.lower() in positive:
        return True
    else:
        return False

def str2list(string):
    try:
        string = string.split(',')
        l = []
        for sp in string:
            if sp != '':
                l.append(float(sp))
        l = l[:3]
        if len(l) < 3:
            raise ValueError
        return l
    except:
        print('parse list failed.')
        raise ValueError

def empty_as_none(string):
    if string == '':
        return None
    else:
        return string

class Configuration():
    def __init__(self) -> None:
        pass

def parse_and_crop(conf):
    handlers = {
        "output": empty_as_none,
        "names": empty_as_none,
        "background_color": str2list,
        "border": float,
        "zoom": float,
        "thresh": float,
        "split": str2bool,
        "visual": str2bool,
        "mute": str2bool,
    }
    for key in conf:
        if key in handlers:
            conf[key] = handlers[key](conf[key])
    print('======= conf =======\n', '\n'.join(['{0}={1}'.format(key, value) for key, value in conf.items()]))
    if conf['input'] == '':
        print('Error, input file must be given.')
    else:
        args = Configuration()
        args.__dict__.update(conf)
        try:
            crop(args)
        except Exception:
            print(traceback.format_exc())


# parser = argparse.ArgumentParser(description="Remove white border in pdf")
# parser.add_argument('--input', '-i', type=str, help='path to the input pdf.')
# parser.add_argument('--output', '-o', type=str, default=None, help='path to output file, default=infile_crop.pdf')
# parser.add_argument('--background_color', '-bgc', type=str2list, default=[255, 255, 255], help='pixels that are considered as background')
# parser.add_argument('--border', '-b', type=float, default=0.0, help='a value in pixel that specifies the border to given.')
# parser.add_argument('--zoom', '-z', type=float, default=1.0, help='bigger is better, however also slower.')
# parser.add_argument('--thresh', '-t', type=float, default=1, help='threshold that a pixel is considered as background')
# parser.add_argument('--split', '-s', action='store_true', default=False, help='auto split the file, default names as out_1.pdf, ...')
# parser.add_argument('--names', '-n', type=str, default=None, help='specify the name of the cropped pdf.')
# parser.add_argument('--visual', '-v', default=False, action='store_true', help='display cropbox.')
# parser.add_argument('--mute', '-m', default=False, action='store_true', help='do not display output file path.')
# args = parser.parse_args()


import os
import json
from ui import Loader
defaults = {
    "input": "",
    "output": "",
    "background_color": "255,255,255",
    "border": "0.0",
    "zoom": "1.0",
    "thresh": "1.0",
    "split": "true",
    "names": "",
    "visual": "false",
    "mute": "false"
}
config_path = 'config.json'
if os.path.isfile(config_path):
    with open(config_path, 'r', encoding='utf-8') as f:
        updater = json.load(f)
        defaults.update(updater)
else:
    with open(config_path, 'w+', encoding='utf-8') as f:
        json.dump(defaults, f, ensure_ascii=False, indent=4)

conf = {
        "input": {"name": "源文件", "type": "readfile", "extension": ("PDF & PPT", ".pdf .pptx")},
        "output": {"name": "保存路径", "type": "savefile", "initial": "output.pdf"},
        "background_color": {"name": "背景颜色", "type": "color", "default": "255,255,255"},
        "border": {"name": "留白", "type": "str", "default": "0.0"},
        "zoom": {"name": "缩放等级", "type": "str", "default": "1.0"},
        "thresh": {"name": "阈值", "type": "str", "default": "1.0"},
        "split": {"name": "拆分", "type": "str", "default": "true"},
        "names": {"name": "页名称", "type": "str"},
        "visual": {"name": "显示裁切框", "type": "str", "default": "false"},
        "mute": {"name": "显示保存文件", "type": "str", "default": "false"},
    }

introduction = [
"==== 使用说明 ====",
"- 源文件：输入的文件，可以为PDF或者PPT，如果是PPT，会首先自动调用PowerPoint将给定的PPT转变为PDF然后进行裁剪（PPT裁剪第一次需要调用COM对象因此速度稍慢，建议耐心等待，裁切一次后速度会恢复到正常水平",
"- 保存路径：保存的文件名，仅适用于非拆分模式。如果留空，则保存路径为源文件所在目录，保存的文件名为\"源文件名_crop.pdf\"",
"- 背景颜色：哪一种颜色会被认为是背景",
"- 留白：不紧贴有效内容裁切，预留一个给定大小的白边。",
"- 缩放等级：裁切是基于视觉裁切的，因此裁切过程中会首先渲染PDF，采用默认值即可，更高的缩放会将PDF渲染为更高分辨率的图片从而提高裁剪精度，但计算时间也会相应增加。",
"- 阈值：像素值差异多少会被认为是前景。",
"- 拆分：是否将源文件自动拆分为每页一个的单个文件。默认文件名采取下划线+数字命名。如果源文件是PPT且PPT备注不为空，则首先采用PPT的备注作为当前页保存的文件名。",
"- 页名称：手动指定拆分模式下每一页的名称，具有最高优先级，用逗号分隔。留空则使用默认的文件名或者PPT中备注的文件名。",
"- 显示裁切框：在右侧的日志区域输出裁切框坐标",
"- 显示保存文件：显示保存的文件名。",
]
help_msg = '\n'.join(introduction)

# write defaults.
for key in defaults:
    conf[key]['default'] = defaults[key]
root = tk.Tk()
Loader(master=root, conf=conf, execution=parse_and_crop, title="PDF/PPT自动裁边", help_msg=help_msg)


print(help_msg)
root.mainloop()