import zipfile
from zipfile import ZipFile
import math
import PIL
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
from PIL import ImageOps
import pytesseract
import cv2 as cv
import numpy as np
from [Link] import display
import inspect
# loading the face detection classifier
face_cascade = [Link]('readonly/haarcascade_frontalface_default.xml')
# the rest is up to you!
# parsed source
parsed_img_src = {}
# iterate through the zip file and save all the binarized versions to parsed_img_src
archive = ZipFile('readonly/[Link]','r')
with archive as myzip:
for entry in [Link]():
with [Link](entry) as file:
img = [Link](file).convert('RGB')
parsed_img_src[[Link]] = {'pil_img':img}
# parse all images text
for img_name in parsed_img_src.keys():
text = pytesseract.image_to_string(parsed_img_src[img_name]['pil_img'])
parsed_img_src[img_name]['text'] = text
# find the bounding boxes for all the faces from every page and extract them
for img_name in parsed_img_src.keys():
open_cv_image = [Link](parsed_img_src[img_name]['pil_img'])
img_g = [Link](open_cv_image, cv.COLOR_BGR2GRAY)
faces_bounding_boxes = face_cascade.detectMultiScale(img_g, 1.3, 5)
parsed_img_src[img_name]['faces'] = []
for x,y,w,h in faces_bounding_boxes:
face = parsed_img_src[img_name]['pil_img'].crop((x,y,x+w,y+h))
parsed_img_src[img_name]['faces'].append(face)
# creation of tumbnails
for img_name in parsed_img_src.keys():
for face in parsed_img_src[img_name]['faces']:
[Link]((100,100), [Link])
# search the keyword in every page's text and return the faces
def lookfor(keyword):
for img_name in parsed_img_src:
if (keyword in parsed_img_src[img_name]['text']):
if(len(parsed_img_src[img_name]['faces']) != 0):
print('Result found in file {}'.format(img_name))
h = [Link](len(parsed_img_src[img_name]['faces'])/5)
contact_sheet = [Link]('RGB',(500,100*h))
xc = 0
yc = 0
for img in parsed_img_src[img_name]['faces']:
contact_sheet.paste(img, (xc,yc))
if xc + 100 == contact_sheet.width:
xc = 0
yc += 100
else:
xc += 100
display(contact_sheet)
else:
print('Result found in file {} \nBut there were no faces in that file\n\n'.format(img_name))
return
lookfor('Christopher')
# output:
lookfor('Pizza')
# output:
lookfor('Mark')