update
This commit is contained in:
parent
3b0af93d14
commit
4646ce09cc
141
cropper.py
Normal file
141
cropper.py
Normal file
@ -0,0 +1,141 @@
|
||||
import os
|
||||
import fitz
|
||||
import numpy as np
|
||||
import argparse
|
||||
import traceback
|
||||
import io
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def clip(value, min_value, max_value):
|
||||
if min_value is not None:
|
||||
if value < min_value:
|
||||
value = min_value
|
||||
if max_value is not None:
|
||||
if value > max_value:
|
||||
value = max_value
|
||||
return value
|
||||
|
||||
def crop(args):
|
||||
infile = args.input
|
||||
# process filepath
|
||||
infile = os.path.join(os.getcwd(), infile)
|
||||
infile = '\\'.join(infile.split('/'))
|
||||
inbase = infile.rsplit('.', maxsplit=1)[0]
|
||||
|
||||
outfile = args.output
|
||||
if outfile is None:
|
||||
outfile = inbase + '_crop.pdf'
|
||||
outbase = outfile.rsplit('.', maxsplit=1)[0]
|
||||
names = None
|
||||
thresh = args.thresh
|
||||
|
||||
# parse working dir.
|
||||
workdir = '/'.join('/'.join(infile.split('\\')).split('/')[:-1])
|
||||
if workdir == '':
|
||||
workdir = '.'
|
||||
|
||||
if infile.split('.')[-1].lower() in ['ppt', 'pptx']:
|
||||
# lets process pptx.
|
||||
import comtypes.client
|
||||
from pptx import Presentation
|
||||
# convert to pdf.
|
||||
powerpoint = comtypes.client.CreateObject("Powerpoint.Application")
|
||||
powerpoint.Visible = 1
|
||||
slides = powerpoint.Presentations.Open(infile)
|
||||
print('convert {0} to {1}'.format(infile, inbase + '.pdf'))
|
||||
slides.SaveAs(inbase + '.pdf', 32)
|
||||
slides.close()
|
||||
powerpoint.Quit()
|
||||
# read comments.
|
||||
ppt = Presentation(infile)
|
||||
slides = ppt.slides
|
||||
names = []
|
||||
for i, slide in enumerate(slides):
|
||||
anno = slide.notes_slide.notes_text_frame.text.strip()
|
||||
if anno == '':
|
||||
anno = '{0}_{1}.pdf'.format(outbase, i+1)
|
||||
names.append(anno)
|
||||
names = [n + '.pdf' if n[:-4] != '.pdf' else n for n in names]
|
||||
infile = inbase + '.pdf'
|
||||
|
||||
if infile.split('.')[-1].lower() != 'pdf':
|
||||
infile += '.pdf'
|
||||
|
||||
|
||||
pdf = fitz.open(infile)
|
||||
for i, page in enumerate(pdf):
|
||||
try:
|
||||
# recover crop box
|
||||
page.setCropBox(page.MediaBox)
|
||||
# render page as img
|
||||
zoom = args.zoom
|
||||
mat = fitz.Matrix(zoom, zoom)
|
||||
img = page.getPixmap(matrix=mat)
|
||||
|
||||
# convert the img to nparray.
|
||||
# img = [[img.pixel(j, i) for j in range(img.w)] for i in range(img.h)]
|
||||
img = Image.open(io.BytesIO(img.tobytes()))
|
||||
img = np.asarray(img, dtype=np.float32)
|
||||
|
||||
# get ltrb
|
||||
background = np.broadcast_to(args.background_color, (img.shape))
|
||||
diff = np.abs(img - background).mean(axis=2)
|
||||
points = np.argwhere(diff > thresh)
|
||||
x = points[:, 1]
|
||||
y = points[:, 0]
|
||||
left, right = min(x), max(x)
|
||||
top, bottom = min(y), max(y)
|
||||
right += 1
|
||||
bottom += 1
|
||||
|
||||
# calculate real crop coordinate.
|
||||
ltrb = left, top, right, bottom
|
||||
ltrb = [coord/zoom for coord in ltrb]
|
||||
|
||||
# reserve border.
|
||||
border = args.border
|
||||
left, top, right, bottom = [x+y for x, y in zip(ltrb, [-border,-border,border,border])]
|
||||
|
||||
# clip ltrb
|
||||
left = clip(left, 0, img.shape[1]/zoom)
|
||||
top = clip(top, 0, img.shape[0]/zoom)
|
||||
right = clip(right, 0, img.shape[1]/zoom)
|
||||
bottom = clip(bottom, 0, img.shape[0]/zoom)
|
||||
if args.visual:
|
||||
print('cropbox =', (left, top), (right, bottom))
|
||||
page.setCropBox(fitz.Rect(left, top, right, bottom))
|
||||
except KeyboardInterrupt:
|
||||
print('Operation Cancled.')
|
||||
break
|
||||
except:
|
||||
print('Error happened while cropint page', i+1)
|
||||
print(traceback.format_exc())
|
||||
|
||||
if not args.split:
|
||||
pdf.save(outfile)
|
||||
else:
|
||||
if names is None:
|
||||
names = ['{0}_{1}.pdf'.format(outbase, i+1) for i in range(len(pdf))]
|
||||
if args.names is not None:
|
||||
names = args.names
|
||||
namesfile = os.path.join(workdir, names)
|
||||
if os.path.isfile(namesfile) and names.split('.')[-1] == 'txt':
|
||||
with open(namesfile, 'r', encoding='utf-8') as f:
|
||||
names = f.read()
|
||||
names = names.split('\n')
|
||||
names = [n.strip() for n in names]
|
||||
names = [n for n in names if n != '']
|
||||
else:
|
||||
names = names.split(',')
|
||||
names = [n + '.pdf' if n[:-4] != '.pdf' else n for n in names]
|
||||
if len(names) < len(pdf):
|
||||
names += ['{0}_{1}.pdf'.format(outbase, i+1) for i in range(len(names), len(pdf))]
|
||||
names = [os.path.join(workdir, name) for name in names]
|
||||
for i, (name, page) in enumerate(zip(names, pdf)):
|
||||
page_pdf = fitz.Document()
|
||||
page_pdf.insertPDF(pdf, from_page=i, to_page=i)
|
||||
page_pdf.save(name)
|
||||
if not args.mute:
|
||||
print('saving cropped page to', name)
|
||||
pdf.close()
|
||||
104
main.py
Normal file
104
main.py
Normal file
@ -0,0 +1,104 @@
|
||||
# import argparse
|
||||
import tkinter as tk
|
||||
from cropper import crop
|
||||
import traceback
|
||||
|
||||
def str2bool(string):
|
||||
positive = ['true',
|
||||
't',
|
||||
'y',
|
||||
'yes',
|
||||
'1',
|
||||
'correct',
|
||||
'accept',
|
||||
'positive'
|
||||
]
|
||||
if string.lower() in positive:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def str2list(string):
|
||||
try:
|
||||
string = string.split(',')
|
||||
l = []
|
||||
for sp in string:
|
||||
if sp != '':
|
||||
l.append(float(sp))
|
||||
l = l[:3]
|
||||
if len(l) < 3:
|
||||
raise ValueError
|
||||
return l
|
||||
except:
|
||||
print('parse list failed.')
|
||||
raise ValueError
|
||||
|
||||
def empty_as_none(string):
|
||||
if string == '':
|
||||
return None
|
||||
else:
|
||||
return string
|
||||
|
||||
class Configuration():
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
def parse_and_crop(conf):
|
||||
handlers = {
|
||||
"output": empty_as_none,
|
||||
"names": empty_as_none,
|
||||
"background_color": str2list,
|
||||
"border": float,
|
||||
"zoom": float,
|
||||
"thresh": float,
|
||||
"split": str2bool,
|
||||
"visual": str2bool,
|
||||
"mute": str2bool,
|
||||
}
|
||||
for key in conf:
|
||||
if key in handlers:
|
||||
conf[key] = handlers[key](conf[key])
|
||||
print('======= conf =======\n', '\n'.join(['{0}={1}'.format(key, value) for key, value in conf.items()]))
|
||||
if conf['input'] == '':
|
||||
print('Error, input file must be given.')
|
||||
else:
|
||||
args = Configuration()
|
||||
args.__dict__.update(conf)
|
||||
try:
|
||||
crop(args)
|
||||
except Exception:
|
||||
print(traceback.format_exc())
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# parser = argparse.ArgumentParser(description="Remove white border in pdf")
|
||||
# parser.add_argument('--input', '-i', type=str, help='path to the input pdf.')
|
||||
# parser.add_argument('--output', '-o', type=str, default=None, help='path to output file, default=infile_crop.pdf')
|
||||
# parser.add_argument('--background_color', '-bgc', type=str2list, default=[255, 255, 255], help='pixels that are considered as background')
|
||||
# parser.add_argument('--border', '-b', type=float, default=0.0, help='a value in pixel that specifies the border to given.')
|
||||
# parser.add_argument('--zoom', '-z', type=float, default=1.0, help='bigger is better, however also slower.')
|
||||
# parser.add_argument('--thresh', '-t', type=float, default=1, help='threshold that a pixel is considered as background')
|
||||
# parser.add_argument('--split', '-s', action='store_true', default=False, help='auto split the file, default names as out_1.pdf, ...')
|
||||
# parser.add_argument('--names', '-n', type=str, default=None, help='specify the name of the cropped pdf.')
|
||||
# parser.add_argument('--visual', '-v', default=False, action='store_true', help='display cropbox.')
|
||||
# parser.add_argument('--mute', '-m', default=False, action='store_true', help='do not display output file path.')
|
||||
# args = parser.parse_args()
|
||||
|
||||
from ui import Loader
|
||||
conf = {
|
||||
"input": {"name": "源文件", "type": "readfile", "extension": ("PDF & PPT", ".pdf .pptx")},
|
||||
"output": {"name": "保存路径", "type": "savefile"},
|
||||
"background_color": {"name": "背景颜色", "type": "color", "default": "255,255,255"},
|
||||
"border": {"name": "留白", "type": "str", "default": "0.0"},
|
||||
"zoom": {"name": "缩放等级", "type": "str", "default": "1.0"},
|
||||
"thresh": {"name": "阈值", "type": "str", "default": "1.0"},
|
||||
"split": {"name": "拆分", "type": "str", "default": "true"},
|
||||
"names": {"name": "页名称", "type": "str"},
|
||||
"visual": {"name": "显示裁切框", "type": "str", "default": "false"},
|
||||
"mute": {"name": "显示保存文件", "type": "str", "default": "false"},
|
||||
}
|
||||
root = tk.Tk()
|
||||
Loader(master=root, conf=conf, execution=parse_and_crop, title="PDF/PPT自动裁边")
|
||||
root.mainloop()
|
||||
213
ui.py
Normal file
213
ui.py
Normal file
@ -0,0 +1,213 @@
|
||||
import tkinter as tk
|
||||
from tkinter import Text, ttk
|
||||
from tkinter.constants import DISABLED, RIGHT, NORMAL, END
|
||||
import tkinter.filedialog
|
||||
import tkinter.font as tkFont
|
||||
from PIL import Image, ImageTk, ImageFont, ImageDraw
|
||||
from tkinter.colorchooser import askcolor
|
||||
import sys
|
||||
|
||||
class StdSimulator():
|
||||
def __init__(self, box) -> None:
|
||||
self.box = box
|
||||
self.stdout = sys.stdout
|
||||
self.stderr = sys.stderr
|
||||
|
||||
def write(self, msg):
|
||||
self.box.insert(END, msg)
|
||||
self.box.see(END)
|
||||
|
||||
def flush(self):
|
||||
pass
|
||||
|
||||
def text_on_image(img, text, pos, size=16, color='#000000'):
|
||||
imgdraw = ImageDraw.Draw(img)
|
||||
imgfont = ImageFont.truetype("msyh.ttc",size=size)
|
||||
textw, texth = imgdraw.textsize(text, font=imgfont)
|
||||
left_top = (pos[0]-int(textw/2), pos[1]-int(texth/2))
|
||||
imgdraw.text(left_top, text, fill=color, font=imgfont)
|
||||
return img
|
||||
|
||||
class Loader(tk.Frame):
|
||||
def __init__(self, master=None, conf={}, execution=None, title=None):
|
||||
super().__init__(master)
|
||||
self.master = master
|
||||
self.conf = conf
|
||||
self.execution = execution
|
||||
self.title = title
|
||||
|
||||
self.pack(fill='both', expand=1)
|
||||
self.resources = {}
|
||||
self.fontStyle = tkFont.Font(size=10, family='微软雅黑')
|
||||
self.fontStyle_anchor = tkFont.Font(size=16)
|
||||
self.create_widgets()
|
||||
# self.font = tk.Font(family='', size=40,weight='',slant='',underline='',overstrike='')
|
||||
if self.title is not None:
|
||||
self.master.title(self.title)
|
||||
|
||||
def add_resource(self, obj, name):
|
||||
if name in self.resources:
|
||||
print('Name [{0}] Repeated.'.format(name))
|
||||
print('Current used name:', list(self.resources.keys()))
|
||||
raise ValueError
|
||||
self.resources[name] = obj
|
||||
|
||||
def add_entry(self, default, width, layout, name, master=None):
|
||||
if master is None:
|
||||
master = self
|
||||
resource = tk.Entry(master, width=width)
|
||||
if type(default) is str:
|
||||
resource.insert(0, default)
|
||||
resource.grid(**layout)
|
||||
self.add_resource(resource, name)
|
||||
|
||||
def add_label(self, text, layout, name, master=None):
|
||||
if master is None:
|
||||
master = self
|
||||
resource = tk.Label(master, text=text, font=self.fontStyle)
|
||||
resource.grid(**layout)
|
||||
self.add_resource(resource, name)
|
||||
|
||||
def add_button(self, text, command, layout, size, name, image=None, master=None):
|
||||
if master is None:
|
||||
master = self
|
||||
resource = {}
|
||||
if image is not None:
|
||||
width, height = size
|
||||
image = image.resize((width,height))
|
||||
if text is not None:
|
||||
image = text_on_image(image, text, (int(width/2), int(height/2)), size=16, color='#000000')
|
||||
img = ImageTk.PhotoImage(image)
|
||||
resource['img'] = img
|
||||
resource['button'] = tk.Button(master, text=text, command=command, borderwidth=0)
|
||||
resource['button'].config(image=img)
|
||||
else:
|
||||
resource['button'] = tk.Button(master, text=text, command=command)
|
||||
resource['button'].grid(**layout)
|
||||
self.add_resource(resource, name)
|
||||
|
||||
def add_textbox(self, width, layout, name, master=None):
|
||||
if master is None:
|
||||
master = self
|
||||
resource = tk.Text(master, width=width, state=NORMAL)
|
||||
resource.grid(**layout)
|
||||
# vsb = tk.Scrollbar(self, orient="vertical", command=resource.yview)
|
||||
# resource.configure(yscrollcommand=vsb.set)
|
||||
self.add_resource(resource, name)
|
||||
# self.add_resource(vsb, name + '_scroll')
|
||||
|
||||
|
||||
def choose_file(self, dst, choose_type='readfile', extension=None, initial_file=None):
|
||||
default_extension = ("所有文件", '.*')
|
||||
args = {"filetypes": []}
|
||||
if extension is not None:
|
||||
args['filetypes'].append(extension)
|
||||
args['filetypes'].append(default_extension)
|
||||
if initial_file is not None:
|
||||
args['initialfile'] = initial_file
|
||||
|
||||
# print('args:', args)
|
||||
filename = ''
|
||||
if choose_type == 'readfile':
|
||||
filename = tkinter.filedialog.askopenfilename(**args)
|
||||
elif choose_type == 'savefile':
|
||||
filename = tkinter.filedialog.asksaveasfilename(**args)
|
||||
elif choose_type == 'directory':
|
||||
args = {
|
||||
'initialdir': initial_file
|
||||
}
|
||||
filename = tkinter.filedialog.askdirectory(**args)
|
||||
if filename != '':
|
||||
self.resources[dst].delete(0, 'end')
|
||||
self.resources[dst].insert(0, filename)
|
||||
|
||||
def choose_color(self, dst):
|
||||
color = askcolor()[0]
|
||||
if color is not None:
|
||||
color = [str(c) for c in color]
|
||||
color = ','.join(color)
|
||||
self.resources[dst].delete(0, 'end')
|
||||
self.resources[dst].insert(0, color)
|
||||
|
||||
def execute(self):
|
||||
conf = {}
|
||||
for key in self.resources:
|
||||
dtype, name = key.split('_', maxsplit=1)
|
||||
if dtype == 'entry':
|
||||
conf[name] = self.resources[key].get()
|
||||
self.execution(conf)
|
||||
|
||||
def create_widgets(self):
|
||||
self.rowcount = 0
|
||||
for key, value in self.conf.items():
|
||||
name = value['name']
|
||||
dtype = value['type']
|
||||
default = value.get('default', '')
|
||||
self.add_label(name, {"row": self.rowcount, "column": 0, "padx": 10, "pady": 5}, name="label_" + key)
|
||||
self.add_entry(
|
||||
default=default,
|
||||
width=None,
|
||||
layout={"row": self.rowcount, "column": 1, "padx": 0, "sticky": "we"},
|
||||
name='entry_' + key
|
||||
)
|
||||
destination = 'entry_' + key
|
||||
if dtype in ['readfile', 'savefile', 'directory']:
|
||||
extension = value.get('extension', None)
|
||||
initialfile = default
|
||||
args = {
|
||||
"dst": destination,
|
||||
"choose_type": dtype,
|
||||
"extension": extension,
|
||||
"initial_file": initialfile
|
||||
}
|
||||
self.add_button(
|
||||
text="选择文件",
|
||||
command=lambda args=args:self.choose_file(**args),
|
||||
size=(16, 16),
|
||||
layout={"row":self.rowcount, "column": 2, "padx": 5},
|
||||
name='{0}_choose'.format(name),
|
||||
image=None
|
||||
)
|
||||
self.conf[key]['type'] = 'str'
|
||||
elif dtype == 'color':
|
||||
self.add_button(
|
||||
text="选择颜色",
|
||||
command=lambda dst=destination:self.choose_color(dst),
|
||||
size=(16, 16),
|
||||
layout={"row":self.rowcount, "column": 2, "padx": 5},
|
||||
name='{0}_choose'.format(name),
|
||||
image=None
|
||||
)
|
||||
|
||||
self.rowcount += 1
|
||||
self.add_button(
|
||||
text=" 裁剪 ",
|
||||
command=self.execute,
|
||||
size=(16, 16),
|
||||
layout={"row":self.rowcount, "column": 0, "columnspan": 2, "pady": 5},
|
||||
name='execute_final'
|
||||
)
|
||||
self.add_textbox(
|
||||
width=40,
|
||||
layout={"row":0, "column": 3, "rowspan": self.rowcount, "padx": 5, "pady": 0},
|
||||
name="textbox_log"
|
||||
)
|
||||
sys.stdout = StdSimulator(self.resources['textbox_log'])
|
||||
sys.stderr = StdSimulator(self.resources['textbox_log'])
|
||||
|
||||
if __name__ == '__main__':
|
||||
conf = {
|
||||
"input": {"name": "源文件", "type": "readfile", "extension": ("PDF & PPT", ".pdf .pptx")},
|
||||
"output": {"name": "保存路径", "type": "savefile"},
|
||||
"background_color": {"name": "背景颜色", "type": "color", "default": "255,255,255"},
|
||||
"border": {"name": "留白", "type": "str", "default": "0.0"},
|
||||
"zoom": {"name": "缩放等级", "type": "str", "default": "1.0"},
|
||||
"thresh": {"name": "阈值", "type": "str", "default": "1.0"},
|
||||
"split": {"name": "拆分", "type": "str", "default": "false"},
|
||||
"names": {"name": "页名称", "type": "str"},
|
||||
"visual": {"name": "显示裁切框", "type": "str", "default": "false"},
|
||||
"mute": {"name": "显示保存文件", "type": "str", "default": "false"},
|
||||
}
|
||||
root = tk.Tk()
|
||||
Loader(master=root, conf=conf)
|
||||
root.mainloop()
|
||||
Loading…
Reference in New Issue
Block a user