Files
Aladdin Persson 8136ee169f DR kaggle
2021-05-30 16:24:52 +02:00

82 lines
2.8 KiB
Python

"""
Tries to remove unnecessary black borders around the images, and
"trim" the images to they take up the entirety of the image.
It's hacky & not very nice but it works :))
"""
import os
import numpy as np
from PIL import Image
import warnings
from multiprocessing import Pool
from tqdm import tqdm
import cv2
def trim(im):
"""
Converts image to grayscale using cv2, then computes binary matrix
of the pixels that are above a certain threshold, then takes out
the first row where a certain percetage of the pixels are above the
threshold will be the first clip point. Same idea for col, max row, max col.
"""
percentage = 0.02
img = np.array(im)
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
im = img_gray > 0.1 * np.mean(img_gray[img_gray != 0])
row_sums = np.sum(im, axis=1)
col_sums = np.sum(im, axis=0)
rows = np.where(row_sums > img.shape[1] * percentage)[0]
cols = np.where(col_sums > img.shape[0] * percentage)[0]
min_row, min_col = np.min(rows), np.min(cols)
max_row, max_col = np.max(rows), np.max(cols)
im_crop = img[min_row : max_row + 1, min_col : max_col + 1]
return Image.fromarray(im_crop)
def resize_maintain_aspect(image, desired_size):
"""
Stole this from some stackoverflow post but can't remember which,
this will add padding to maintain the aspect ratio.
"""
old_size = image.size # old_size[0] is in (width, height) format
ratio = float(desired_size) / max(old_size)
new_size = tuple([int(x * ratio) for x in old_size])
im = image.resize(new_size, Image.ANTIALIAS)
new_im = Image.new("RGB", (desired_size, desired_size))
new_im.paste(im, ((desired_size - new_size[0]) // 2, (desired_size - new_size[1]) // 2))
return new_im
def save_single(args):
img_file, input_path_folder, output_path_folder, output_size = args
image_original = Image.open(os.path.join(input_path_folder, img_file))
image = trim(image_original)
image = resize_maintain_aspect(image, desired_size=output_size[0])
image.save(os.path.join(output_path_folder + img_file))
def fast_image_resize(input_path_folder, output_path_folder, output_size=None):
"""
Uses multiprocessing to make it fast
"""
if not output_size:
warnings.warn("Need to specify output_size! For example: output_size=100")
exit()
if not os.path.exists(output_path_folder):
os.makedirs(output_path_folder)
jobs = [
(file, input_path_folder, output_path_folder, output_size)
for file in os.listdir(input_path_folder)
]
with Pool() as p:
list(tqdm(p.imap_unordered(save_single, jobs), total=len(jobs)))
if __name__ == "__main__":
fast_image_resize("../train/images/", "../train/images_resized_150/", output_size=(150, 150))
fast_image_resize("../test/images/", "../test/images_resized_150/", output_size=(150, 150))