Untitled
unknown
python
2 years ago
1.2 kB
15
Indexable
#!/usr/bin/python3
import argparse
import pdf2image
import numpy as np
import scipy.ndimage
from scipy.special import comb
from PIL import Image
DPI = 200
S = 60
B = 20
def smoothstep(x):
x = np.clip(x, 0, 1)
x2 = x * x
return 3 * x2 - 2 * x2 * x
def main():
parser = argparse.ArgumentParser(description="clean up PDF scans")
parser.add_argument("filename", help="input PDF file")
parser.add_argument("-o", default="out.pdf", help="specify output PDF file name")
args = parser.parse_args()
images = []
for img in pdf2image.convert_from_path(args.filename, DPI):
im = np.array(img)
gray = np.array(img.convert("L"))
blur = scipy.ndimage.gaussian_filter(gray, S)
q = im.astype(np.float32) / blur[..., None]
im = smoothstep(q) * 255
# white boarders
border_mask = np.ones(gray.shape, np.bool_)
border_mask[B:-B,B:-B] = False
im[border_mask] = 255
img = Image.fromarray(im.astype(np.uint8))
images.append(img)
images.pop(0).save(args.o, quality=70, save_all=True, append_images=images, resolution=DPI)
if __name__ == "__main__":
main()
Editor is loading...