PDFCropper/main.py

150 lines
6.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# import argparse
import tkinter as tk
from cropper import crop
import traceback
def str2bool(string):
positive = ['true',
't',
'y',
'yes',
'1',
'correct',
'accept',
'positive'
]
if string.lower() in positive:
return True
else:
return False
def str2list(string):
try:
string = string.split(',')
l = []
for sp in string:
if sp != '':
l.append(float(sp))
l = l[:3]
if len(l) < 3:
raise ValueError
return l
except:
print('parse list failed.')
raise ValueError
def empty_as_none(string):
if string == '':
return None
else:
return string
class Configuration():
def __init__(self) -> None:
pass
def parse_and_crop(conf):
handlers = {
"output": empty_as_none,
"names": empty_as_none,
"background_color": str2list,
"border": float,
"zoom": float,
"thresh": float,
"split": str2bool,
"visual": str2bool,
"mute": str2bool,
}
for key in conf:
if key in handlers:
conf[key] = handlers[key](conf[key])
print('======= conf =======\n', '\n'.join(['{0}={1}'.format(key, value) for key, value in conf.items()]))
if conf['input'] == '':
print('Error, input file must be given.')
else:
args = Configuration()
args.__dict__.update(conf)
try:
crop(args)
except Exception:
print(traceback.format_exc())
# parser = argparse.ArgumentParser(description="Remove white border in pdf")
# parser.add_argument('--input', '-i', type=str, help='path to the input pdf.')
# parser.add_argument('--output', '-o', type=str, default=None, help='path to output file, default=infile_crop.pdf')
# parser.add_argument('--background_color', '-bgc', type=str2list, default=[255, 255, 255], help='pixels that are considered as background')
# parser.add_argument('--border', '-b', type=float, default=0.0, help='a value in pixel that specifies the border to given.')
# parser.add_argument('--zoom', '-z', type=float, default=1.0, help='bigger is better, however also slower.')
# parser.add_argument('--thresh', '-t', type=float, default=1, help='threshold that a pixel is considered as background')
# parser.add_argument('--split', '-s', action='store_true', default=False, help='auto split the file, default names as out_1.pdf, ...')
# parser.add_argument('--names', '-n', type=str, default=None, help='specify the name of the cropped pdf.')
# parser.add_argument('--visual', '-v', default=False, action='store_true', help='display cropbox.')
# parser.add_argument('--mute', '-m', default=False, action='store_true', help='do not display output file path.')
# args = parser.parse_args()
import os
import json
from ui import Loader
defaults = {
"input": "",
"output": "",
"background_color": "255,255,255",
"border": "0.0",
"zoom": "1.0",
"thresh": "1.0",
"split": "true",
"names": "",
"visual": "false",
"mute": "false"
}
config_path = 'config.json'
if os.path.isfile(config_path):
with open(config_path, 'r', encoding='utf-8') as f:
updater = json.load(f)
defaults.update(updater)
else:
with open(config_path, 'w+', encoding='utf-8') as f:
json.dump(defaults, f, ensure_ascii=False, indent=4)
conf = {
"input": {"name": "源文件", "type": "readfile", "extension": ("PDF & PPT", ".pdf .pptx")},
"output": {"name": "保存路径", "type": "savefile", "initial": "output.pdf"},
"background_color": {"name": "背景颜色", "type": "color", "default": "255,255,255"},
"border": {"name": "留白", "type": "str", "default": "0.0"},
"zoom": {"name": "缩放等级", "type": "str", "default": "1.0"},
"thresh": {"name": "阈值", "type": "str", "default": "1.0"},
"split": {"name": "拆分", "type": "str", "default": "true"},
"names": {"name": "页名称", "type": "str"},
"visual": {"name": "显示裁切框", "type": "str", "default": "false"},
"mute": {"name": "显示保存文件", "type": "str", "default": "false"},
}
introduction = [
"==== 使用说明 ====",
"- 源文件输入的文件可以为PDF或者PPT如果是PPT会首先自动调用PowerPoint将给定的PPT转变为PDF然后进行裁剪PPT裁剪第一次需要调用COM对象因此速度稍慢建议耐心等待裁切一次后速度会恢复到正常水平",
"- 保存路径:保存的文件名,仅适用于非拆分模式。如果留空,则保存路径为源文件所在目录,保存的文件名为\"源文件名_crop.pdf\"",
"- 背景颜色:哪一种颜色会被认为是背景",
"- 留白:不紧贴有效内容裁切,预留一个给定大小的白边。",
"- 缩放等级裁切是基于视觉裁切的因此裁切过程中会首先渲染PDF采用默认值即可更高的缩放会将PDF渲染为更高分辨率的图片从而提高裁剪精度但计算时间也会相应增加。",
"- 阈值:像素值差异多少会被认为是前景。",
"- 拆分:是否将源文件自动拆分为每页一个的单个文件。默认文件名采取下划线+数字命名。如果源文件是PPT且PPT备注不为空则首先采用PPT的备注作为当前页保存的文件名。",
"- 页名称手动指定拆分模式下每一页的名称具有最高优先级用逗号分隔。留空则使用默认的文件名或者PPT中备注的文件名。",
"- 显示裁切框:在右侧的日志区域输出裁切框坐标",
"- 显示保存文件:显示保存的文件名。",
]
help_msg = '\n'.join(introduction)
# write defaults.
for key in defaults:
conf[key]['default'] = defaults[key]
root = tk.Tk()
Loader(master=root, conf=conf, execution=parse_and_crop, title="PDF/PPT自动裁边", help_msg=help_msg)
print(help_msg)
root.mainloop()