""" Tries to remove unnecessary black borders around the images, and "trim" the images to they take up the entirety of the image. It's hacky & not very nice but it works :)) """ import os import numpy as np from PIL import Image import warnings from multiprocessing import Pool from tqdm import tqdm import cv2 def trim(im): """ Converts image to grayscale using cv2, then computes binary matrix of the pixels that are above a certain threshold, then takes out the first row where a certain percetage of the pixels are above the threshold will be the first clip point. Same idea for col, max row, max col. """ percentage = 0.02 img = np.array(im) img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) im = img_gray > 0.1 * np.mean(img_gray[img_gray != 0]) row_sums = np.sum(im, axis=1) col_sums = np.sum(im, axis=0) rows = np.where(row_sums > img.shape[1] * percentage)[0] cols = np.where(col_sums > img.shape[0] * percentage)[0] min_row, min_col = np.min(rows), np.min(cols) max_row, max_col = np.max(rows), np.max(cols) im_crop = img[min_row : max_row + 1, min_col : max_col + 1] return Image.fromarray(im_crop) def resize_maintain_aspect(image, desired_size): """ Stole this from some stackoverflow post but can't remember which, this will add padding to maintain the aspect ratio. """ old_size = image.size # old_size[0] is in (width, height) format ratio = float(desired_size) / max(old_size) new_size = tuple([int(x * ratio) for x in old_size]) im = image.resize(new_size, Image.ANTIALIAS) new_im = Image.new("RGB", (desired_size, desired_size)) new_im.paste(im, ((desired_size - new_size[0]) // 2, (desired_size - new_size[1]) // 2)) return new_im def save_single(args): img_file, input_path_folder, output_path_folder, output_size = args image_original = Image.open(os.path.join(input_path_folder, img_file)) image = trim(image_original) image = resize_maintain_aspect(image, desired_size=output_size[0]) image.save(os.path.join(output_path_folder + img_file)) def fast_image_resize(input_path_folder, output_path_folder, output_size=None): """ Uses multiprocessing to make it fast """ if not output_size: warnings.warn("Need to specify output_size! For example: output_size=100") exit() if not os.path.exists(output_path_folder): os.makedirs(output_path_folder) jobs = [ (file, input_path_folder, output_path_folder, output_size) for file in os.listdir(input_path_folder) ] with Pool() as p: list(tqdm(p.imap_unordered(save_single, jobs), total=len(jobs))) if __name__ == "__main__": fast_image_resize("../train/images/", "../train/images_resized_150/", output_size=(150, 150)) fast_image_resize("../test/images/", "../test/images_resized_150/", output_size=(150, 150))