Untitled
python
2 months ago
1.2 kB
6
Indexable
Never
#!/usr/bin/python3 import argparse import pdf2image import numpy as np import scipy.ndimage from scipy.special import comb from PIL import Image DPI = 200 S = 60 B = 20 def smoothstep(x): x = np.clip(x, 0, 1) x2 = x * x return 3 * x2 - 2 * x2 * x def main(): parser = argparse.ArgumentParser(description="clean up PDF scans") parser.add_argument("filename", help="input PDF file") parser.add_argument("-o", default="out.pdf", help="specify output PDF file name") args = parser.parse_args() images = [] for img in pdf2image.convert_from_path(args.filename, DPI): im = np.array(img) gray = np.array(img.convert("L")) blur = scipy.ndimage.gaussian_filter(gray, S) q = im.astype(np.float32) / blur[..., None] im = smoothstep(q) * 255 # white boarders border_mask = np.ones(gray.shape, np.bool_) border_mask[B:-B,B:-B] = False im[border_mask] = 255 img = Image.fromarray(im.astype(np.uint8)) images.append(img) images.pop(0).save(args.o, quality=70, save_all=True, append_images=images, resolution=DPI) if __name__ == "__main__": main()