update
This commit is contained in:
parent
3b0af93d14
commit
4646ce09cc
141
cropper.py
Normal file
141
cropper.py
Normal file
@ -0,0 +1,141 @@
|
|||||||
|
import os
|
||||||
|
import fitz
|
||||||
|
import numpy as np
|
||||||
|
import argparse
|
||||||
|
import traceback
|
||||||
|
import io
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
|
||||||
|
def clip(value, min_value, max_value):
|
||||||
|
if min_value is not None:
|
||||||
|
if value < min_value:
|
||||||
|
value = min_value
|
||||||
|
if max_value is not None:
|
||||||
|
if value > max_value:
|
||||||
|
value = max_value
|
||||||
|
return value
|
||||||
|
|
||||||
|
def crop(args):
|
||||||
|
infile = args.input
|
||||||
|
# process filepath
|
||||||
|
infile = os.path.join(os.getcwd(), infile)
|
||||||
|
infile = '\\'.join(infile.split('/'))
|
||||||
|
inbase = infile.rsplit('.', maxsplit=1)[0]
|
||||||
|
|
||||||
|
outfile = args.output
|
||||||
|
if outfile is None:
|
||||||
|
outfile = inbase + '_crop.pdf'
|
||||||
|
outbase = outfile.rsplit('.', maxsplit=1)[0]
|
||||||
|
names = None
|
||||||
|
thresh = args.thresh
|
||||||
|
|
||||||
|
# parse working dir.
|
||||||
|
workdir = '/'.join('/'.join(infile.split('\\')).split('/')[:-1])
|
||||||
|
if workdir == '':
|
||||||
|
workdir = '.'
|
||||||
|
|
||||||
|
if infile.split('.')[-1].lower() in ['ppt', 'pptx']:
|
||||||
|
# lets process pptx.
|
||||||
|
import comtypes.client
|
||||||
|
from pptx import Presentation
|
||||||
|
# convert to pdf.
|
||||||
|
powerpoint = comtypes.client.CreateObject("Powerpoint.Application")
|
||||||
|
powerpoint.Visible = 1
|
||||||
|
slides = powerpoint.Presentations.Open(infile)
|
||||||
|
print('convert {0} to {1}'.format(infile, inbase + '.pdf'))
|
||||||
|
slides.SaveAs(inbase + '.pdf', 32)
|
||||||
|
slides.close()
|
||||||
|
powerpoint.Quit()
|
||||||
|
# read comments.
|
||||||
|
ppt = Presentation(infile)
|
||||||
|
slides = ppt.slides
|
||||||
|
names = []
|
||||||
|
for i, slide in enumerate(slides):
|
||||||
|
anno = slide.notes_slide.notes_text_frame.text.strip()
|
||||||
|
if anno == '':
|
||||||
|
anno = '{0}_{1}.pdf'.format(outbase, i+1)
|
||||||
|
names.append(anno)
|
||||||
|
names = [n + '.pdf' if n[:-4] != '.pdf' else n for n in names]
|
||||||
|
infile = inbase + '.pdf'
|
||||||
|
|
||||||
|
if infile.split('.')[-1].lower() != 'pdf':
|
||||||
|
infile += '.pdf'
|
||||||
|
|
||||||
|
|
||||||
|
pdf = fitz.open(infile)
|
||||||
|
for i, page in enumerate(pdf):
|
||||||
|
try:
|
||||||
|
# recover crop box
|
||||||
|
page.setCropBox(page.MediaBox)
|
||||||
|
# render page as img
|
||||||
|
zoom = args.zoom
|
||||||
|
mat = fitz.Matrix(zoom, zoom)
|
||||||
|
img = page.getPixmap(matrix=mat)
|
||||||
|
|
||||||
|
# convert the img to nparray.
|
||||||
|
# img = [[img.pixel(j, i) for j in range(img.w)] for i in range(img.h)]
|
||||||
|
img = Image.open(io.BytesIO(img.tobytes()))
|
||||||
|
img = np.asarray(img, dtype=np.float32)
|
||||||
|
|
||||||
|
# get ltrb
|
||||||
|
background = np.broadcast_to(args.background_color, (img.shape))
|
||||||
|
diff = np.abs(img - background).mean(axis=2)
|
||||||
|
points = np.argwhere(diff > thresh)
|
||||||
|
x = points[:, 1]
|
||||||
|
y = points[:, 0]
|
||||||
|
left, right = min(x), max(x)
|
||||||
|
top, bottom = min(y), max(y)
|
||||||
|
right += 1
|
||||||
|
bottom += 1
|
||||||
|
|
||||||
|
# calculate real crop coordinate.
|
||||||
|
ltrb = left, top, right, bottom
|
||||||
|
ltrb = [coord/zoom for coord in ltrb]
|
||||||
|
|
||||||
|
# reserve border.
|
||||||
|
border = args.border
|
||||||
|
left, top, right, bottom = [x+y for x, y in zip(ltrb, [-border,-border,border,border])]
|
||||||
|
|
||||||
|
# clip ltrb
|
||||||
|
left = clip(left, 0, img.shape[1]/zoom)
|
||||||
|
top = clip(top, 0, img.shape[0]/zoom)
|
||||||
|
right = clip(right, 0, img.shape[1]/zoom)
|
||||||
|
bottom = clip(bottom, 0, img.shape[0]/zoom)
|
||||||
|
if args.visual:
|
||||||
|
print('cropbox =', (left, top), (right, bottom))
|
||||||
|
page.setCropBox(fitz.Rect(left, top, right, bottom))
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print('Operation Cancled.')
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
print('Error happened while cropint page', i+1)
|
||||||
|
print(traceback.format_exc())
|
||||||
|
|
||||||
|
if not args.split:
|
||||||
|
pdf.save(outfile)
|
||||||
|
else:
|
||||||
|
if names is None:
|
||||||
|
names = ['{0}_{1}.pdf'.format(outbase, i+1) for i in range(len(pdf))]
|
||||||
|
if args.names is not None:
|
||||||
|
names = args.names
|
||||||
|
namesfile = os.path.join(workdir, names)
|
||||||
|
if os.path.isfile(namesfile) and names.split('.')[-1] == 'txt':
|
||||||
|
with open(namesfile, 'r', encoding='utf-8') as f:
|
||||||
|
names = f.read()
|
||||||
|
names = names.split('\n')
|
||||||
|
names = [n.strip() for n in names]
|
||||||
|
names = [n for n in names if n != '']
|
||||||
|
else:
|
||||||
|
names = names.split(',')
|
||||||
|
names = [n + '.pdf' if n[:-4] != '.pdf' else n for n in names]
|
||||||
|
if len(names) < len(pdf):
|
||||||
|
names += ['{0}_{1}.pdf'.format(outbase, i+1) for i in range(len(names), len(pdf))]
|
||||||
|
names = [os.path.join(workdir, name) for name in names]
|
||||||
|
for i, (name, page) in enumerate(zip(names, pdf)):
|
||||||
|
page_pdf = fitz.Document()
|
||||||
|
page_pdf.insertPDF(pdf, from_page=i, to_page=i)
|
||||||
|
page_pdf.save(name)
|
||||||
|
if not args.mute:
|
||||||
|
print('saving cropped page to', name)
|
||||||
|
pdf.close()
|
||||||
104
main.py
Normal file
104
main.py
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
# import argparse
|
||||||
|
import tkinter as tk
|
||||||
|
from cropper import crop
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
def str2bool(string):
|
||||||
|
positive = ['true',
|
||||||
|
't',
|
||||||
|
'y',
|
||||||
|
'yes',
|
||||||
|
'1',
|
||||||
|
'correct',
|
||||||
|
'accept',
|
||||||
|
'positive'
|
||||||
|
]
|
||||||
|
if string.lower() in positive:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def str2list(string):
|
||||||
|
try:
|
||||||
|
string = string.split(',')
|
||||||
|
l = []
|
||||||
|
for sp in string:
|
||||||
|
if sp != '':
|
||||||
|
l.append(float(sp))
|
||||||
|
l = l[:3]
|
||||||
|
if len(l) < 3:
|
||||||
|
raise ValueError
|
||||||
|
return l
|
||||||
|
except:
|
||||||
|
print('parse list failed.')
|
||||||
|
raise ValueError
|
||||||
|
|
||||||
|
def empty_as_none(string):
|
||||||
|
if string == '':
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return string
|
||||||
|
|
||||||
|
class Configuration():
|
||||||
|
def __init__(self) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def parse_and_crop(conf):
|
||||||
|
handlers = {
|
||||||
|
"output": empty_as_none,
|
||||||
|
"names": empty_as_none,
|
||||||
|
"background_color": str2list,
|
||||||
|
"border": float,
|
||||||
|
"zoom": float,
|
||||||
|
"thresh": float,
|
||||||
|
"split": str2bool,
|
||||||
|
"visual": str2bool,
|
||||||
|
"mute": str2bool,
|
||||||
|
}
|
||||||
|
for key in conf:
|
||||||
|
if key in handlers:
|
||||||
|
conf[key] = handlers[key](conf[key])
|
||||||
|
print('======= conf =======\n', '\n'.join(['{0}={1}'.format(key, value) for key, value in conf.items()]))
|
||||||
|
if conf['input'] == '':
|
||||||
|
print('Error, input file must be given.')
|
||||||
|
else:
|
||||||
|
args = Configuration()
|
||||||
|
args.__dict__.update(conf)
|
||||||
|
try:
|
||||||
|
crop(args)
|
||||||
|
except Exception:
|
||||||
|
print(traceback.format_exc())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# parser = argparse.ArgumentParser(description="Remove white border in pdf")
|
||||||
|
# parser.add_argument('--input', '-i', type=str, help='path to the input pdf.')
|
||||||
|
# parser.add_argument('--output', '-o', type=str, default=None, help='path to output file, default=infile_crop.pdf')
|
||||||
|
# parser.add_argument('--background_color', '-bgc', type=str2list, default=[255, 255, 255], help='pixels that are considered as background')
|
||||||
|
# parser.add_argument('--border', '-b', type=float, default=0.0, help='a value in pixel that specifies the border to given.')
|
||||||
|
# parser.add_argument('--zoom', '-z', type=float, default=1.0, help='bigger is better, however also slower.')
|
||||||
|
# parser.add_argument('--thresh', '-t', type=float, default=1, help='threshold that a pixel is considered as background')
|
||||||
|
# parser.add_argument('--split', '-s', action='store_true', default=False, help='auto split the file, default names as out_1.pdf, ...')
|
||||||
|
# parser.add_argument('--names', '-n', type=str, default=None, help='specify the name of the cropped pdf.')
|
||||||
|
# parser.add_argument('--visual', '-v', default=False, action='store_true', help='display cropbox.')
|
||||||
|
# parser.add_argument('--mute', '-m', default=False, action='store_true', help='do not display output file path.')
|
||||||
|
# args = parser.parse_args()
|
||||||
|
|
||||||
|
from ui import Loader
|
||||||
|
conf = {
|
||||||
|
"input": {"name": "源文件", "type": "readfile", "extension": ("PDF & PPT", ".pdf .pptx")},
|
||||||
|
"output": {"name": "保存路径", "type": "savefile"},
|
||||||
|
"background_color": {"name": "背景颜色", "type": "color", "default": "255,255,255"},
|
||||||
|
"border": {"name": "留白", "type": "str", "default": "0.0"},
|
||||||
|
"zoom": {"name": "缩放等级", "type": "str", "default": "1.0"},
|
||||||
|
"thresh": {"name": "阈值", "type": "str", "default": "1.0"},
|
||||||
|
"split": {"name": "拆分", "type": "str", "default": "true"},
|
||||||
|
"names": {"name": "页名称", "type": "str"},
|
||||||
|
"visual": {"name": "显示裁切框", "type": "str", "default": "false"},
|
||||||
|
"mute": {"name": "显示保存文件", "type": "str", "default": "false"},
|
||||||
|
}
|
||||||
|
root = tk.Tk()
|
||||||
|
Loader(master=root, conf=conf, execution=parse_and_crop, title="PDF/PPT自动裁边")
|
||||||
|
root.mainloop()
|
||||||
213
ui.py
Normal file
213
ui.py
Normal file
@ -0,0 +1,213 @@
|
|||||||
|
import tkinter as tk
|
||||||
|
from tkinter import Text, ttk
|
||||||
|
from tkinter.constants import DISABLED, RIGHT, NORMAL, END
|
||||||
|
import tkinter.filedialog
|
||||||
|
import tkinter.font as tkFont
|
||||||
|
from PIL import Image, ImageTk, ImageFont, ImageDraw
|
||||||
|
from tkinter.colorchooser import askcolor
|
||||||
|
import sys
|
||||||
|
|
||||||
|
class StdSimulator():
|
||||||
|
def __init__(self, box) -> None:
|
||||||
|
self.box = box
|
||||||
|
self.stdout = sys.stdout
|
||||||
|
self.stderr = sys.stderr
|
||||||
|
|
||||||
|
def write(self, msg):
|
||||||
|
self.box.insert(END, msg)
|
||||||
|
self.box.see(END)
|
||||||
|
|
||||||
|
def flush(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def text_on_image(img, text, pos, size=16, color='#000000'):
|
||||||
|
imgdraw = ImageDraw.Draw(img)
|
||||||
|
imgfont = ImageFont.truetype("msyh.ttc",size=size)
|
||||||
|
textw, texth = imgdraw.textsize(text, font=imgfont)
|
||||||
|
left_top = (pos[0]-int(textw/2), pos[1]-int(texth/2))
|
||||||
|
imgdraw.text(left_top, text, fill=color, font=imgfont)
|
||||||
|
return img
|
||||||
|
|
||||||
|
class Loader(tk.Frame):
|
||||||
|
def __init__(self, master=None, conf={}, execution=None, title=None):
|
||||||
|
super().__init__(master)
|
||||||
|
self.master = master
|
||||||
|
self.conf = conf
|
||||||
|
self.execution = execution
|
||||||
|
self.title = title
|
||||||
|
|
||||||
|
self.pack(fill='both', expand=1)
|
||||||
|
self.resources = {}
|
||||||
|
self.fontStyle = tkFont.Font(size=10, family='微软雅黑')
|
||||||
|
self.fontStyle_anchor = tkFont.Font(size=16)
|
||||||
|
self.create_widgets()
|
||||||
|
# self.font = tk.Font(family='', size=40,weight='',slant='',underline='',overstrike='')
|
||||||
|
if self.title is not None:
|
||||||
|
self.master.title(self.title)
|
||||||
|
|
||||||
|
def add_resource(self, obj, name):
|
||||||
|
if name in self.resources:
|
||||||
|
print('Name [{0}] Repeated.'.format(name))
|
||||||
|
print('Current used name:', list(self.resources.keys()))
|
||||||
|
raise ValueError
|
||||||
|
self.resources[name] = obj
|
||||||
|
|
||||||
|
def add_entry(self, default, width, layout, name, master=None):
|
||||||
|
if master is None:
|
||||||
|
master = self
|
||||||
|
resource = tk.Entry(master, width=width)
|
||||||
|
if type(default) is str:
|
||||||
|
resource.insert(0, default)
|
||||||
|
resource.grid(**layout)
|
||||||
|
self.add_resource(resource, name)
|
||||||
|
|
||||||
|
def add_label(self, text, layout, name, master=None):
|
||||||
|
if master is None:
|
||||||
|
master = self
|
||||||
|
resource = tk.Label(master, text=text, font=self.fontStyle)
|
||||||
|
resource.grid(**layout)
|
||||||
|
self.add_resource(resource, name)
|
||||||
|
|
||||||
|
def add_button(self, text, command, layout, size, name, image=None, master=None):
|
||||||
|
if master is None:
|
||||||
|
master = self
|
||||||
|
resource = {}
|
||||||
|
if image is not None:
|
||||||
|
width, height = size
|
||||||
|
image = image.resize((width,height))
|
||||||
|
if text is not None:
|
||||||
|
image = text_on_image(image, text, (int(width/2), int(height/2)), size=16, color='#000000')
|
||||||
|
img = ImageTk.PhotoImage(image)
|
||||||
|
resource['img'] = img
|
||||||
|
resource['button'] = tk.Button(master, text=text, command=command, borderwidth=0)
|
||||||
|
resource['button'].config(image=img)
|
||||||
|
else:
|
||||||
|
resource['button'] = tk.Button(master, text=text, command=command)
|
||||||
|
resource['button'].grid(**layout)
|
||||||
|
self.add_resource(resource, name)
|
||||||
|
|
||||||
|
def add_textbox(self, width, layout, name, master=None):
|
||||||
|
if master is None:
|
||||||
|
master = self
|
||||||
|
resource = tk.Text(master, width=width, state=NORMAL)
|
||||||
|
resource.grid(**layout)
|
||||||
|
# vsb = tk.Scrollbar(self, orient="vertical", command=resource.yview)
|
||||||
|
# resource.configure(yscrollcommand=vsb.set)
|
||||||
|
self.add_resource(resource, name)
|
||||||
|
# self.add_resource(vsb, name + '_scroll')
|
||||||
|
|
||||||
|
|
||||||
|
def choose_file(self, dst, choose_type='readfile', extension=None, initial_file=None):
|
||||||
|
default_extension = ("所有文件", '.*')
|
||||||
|
args = {"filetypes": []}
|
||||||
|
if extension is not None:
|
||||||
|
args['filetypes'].append(extension)
|
||||||
|
args['filetypes'].append(default_extension)
|
||||||
|
if initial_file is not None:
|
||||||
|
args['initialfile'] = initial_file
|
||||||
|
|
||||||
|
# print('args:', args)
|
||||||
|
filename = ''
|
||||||
|
if choose_type == 'readfile':
|
||||||
|
filename = tkinter.filedialog.askopenfilename(**args)
|
||||||
|
elif choose_type == 'savefile':
|
||||||
|
filename = tkinter.filedialog.asksaveasfilename(**args)
|
||||||
|
elif choose_type == 'directory':
|
||||||
|
args = {
|
||||||
|
'initialdir': initial_file
|
||||||
|
}
|
||||||
|
filename = tkinter.filedialog.askdirectory(**args)
|
||||||
|
if filename != '':
|
||||||
|
self.resources[dst].delete(0, 'end')
|
||||||
|
self.resources[dst].insert(0, filename)
|
||||||
|
|
||||||
|
def choose_color(self, dst):
|
||||||
|
color = askcolor()[0]
|
||||||
|
if color is not None:
|
||||||
|
color = [str(c) for c in color]
|
||||||
|
color = ','.join(color)
|
||||||
|
self.resources[dst].delete(0, 'end')
|
||||||
|
self.resources[dst].insert(0, color)
|
||||||
|
|
||||||
|
def execute(self):
|
||||||
|
conf = {}
|
||||||
|
for key in self.resources:
|
||||||
|
dtype, name = key.split('_', maxsplit=1)
|
||||||
|
if dtype == 'entry':
|
||||||
|
conf[name] = self.resources[key].get()
|
||||||
|
self.execution(conf)
|
||||||
|
|
||||||
|
def create_widgets(self):
|
||||||
|
self.rowcount = 0
|
||||||
|
for key, value in self.conf.items():
|
||||||
|
name = value['name']
|
||||||
|
dtype = value['type']
|
||||||
|
default = value.get('default', '')
|
||||||
|
self.add_label(name, {"row": self.rowcount, "column": 0, "padx": 10, "pady": 5}, name="label_" + key)
|
||||||
|
self.add_entry(
|
||||||
|
default=default,
|
||||||
|
width=None,
|
||||||
|
layout={"row": self.rowcount, "column": 1, "padx": 0, "sticky": "we"},
|
||||||
|
name='entry_' + key
|
||||||
|
)
|
||||||
|
destination = 'entry_' + key
|
||||||
|
if dtype in ['readfile', 'savefile', 'directory']:
|
||||||
|
extension = value.get('extension', None)
|
||||||
|
initialfile = default
|
||||||
|
args = {
|
||||||
|
"dst": destination,
|
||||||
|
"choose_type": dtype,
|
||||||
|
"extension": extension,
|
||||||
|
"initial_file": initialfile
|
||||||
|
}
|
||||||
|
self.add_button(
|
||||||
|
text="选择文件",
|
||||||
|
command=lambda args=args:self.choose_file(**args),
|
||||||
|
size=(16, 16),
|
||||||
|
layout={"row":self.rowcount, "column": 2, "padx": 5},
|
||||||
|
name='{0}_choose'.format(name),
|
||||||
|
image=None
|
||||||
|
)
|
||||||
|
self.conf[key]['type'] = 'str'
|
||||||
|
elif dtype == 'color':
|
||||||
|
self.add_button(
|
||||||
|
text="选择颜色",
|
||||||
|
command=lambda dst=destination:self.choose_color(dst),
|
||||||
|
size=(16, 16),
|
||||||
|
layout={"row":self.rowcount, "column": 2, "padx": 5},
|
||||||
|
name='{0}_choose'.format(name),
|
||||||
|
image=None
|
||||||
|
)
|
||||||
|
|
||||||
|
self.rowcount += 1
|
||||||
|
self.add_button(
|
||||||
|
text=" 裁剪 ",
|
||||||
|
command=self.execute,
|
||||||
|
size=(16, 16),
|
||||||
|
layout={"row":self.rowcount, "column": 0, "columnspan": 2, "pady": 5},
|
||||||
|
name='execute_final'
|
||||||
|
)
|
||||||
|
self.add_textbox(
|
||||||
|
width=40,
|
||||||
|
layout={"row":0, "column": 3, "rowspan": self.rowcount, "padx": 5, "pady": 0},
|
||||||
|
name="textbox_log"
|
||||||
|
)
|
||||||
|
sys.stdout = StdSimulator(self.resources['textbox_log'])
|
||||||
|
sys.stderr = StdSimulator(self.resources['textbox_log'])
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
conf = {
|
||||||
|
"input": {"name": "源文件", "type": "readfile", "extension": ("PDF & PPT", ".pdf .pptx")},
|
||||||
|
"output": {"name": "保存路径", "type": "savefile"},
|
||||||
|
"background_color": {"name": "背景颜色", "type": "color", "default": "255,255,255"},
|
||||||
|
"border": {"name": "留白", "type": "str", "default": "0.0"},
|
||||||
|
"zoom": {"name": "缩放等级", "type": "str", "default": "1.0"},
|
||||||
|
"thresh": {"name": "阈值", "type": "str", "default": "1.0"},
|
||||||
|
"split": {"name": "拆分", "type": "str", "default": "false"},
|
||||||
|
"names": {"name": "页名称", "type": "str"},
|
||||||
|
"visual": {"name": "显示裁切框", "type": "str", "default": "false"},
|
||||||
|
"mute": {"name": "显示保存文件", "type": "str", "default": "false"},
|
||||||
|
}
|
||||||
|
root = tk.Tk()
|
||||||
|
Loader(master=root, conf=conf)
|
||||||
|
root.mainloop()
|
||||||
Loading…
Reference in New Issue
Block a user