XML
[Link]
haarcascade_frontalface_default.xml
Geometric transformations
import cv2
import numpy as np
image = [Link]("E:\\ComputerVisionPractical\\PracticalNO1\\[Link]")
#Perform linear and cubic
image_re_linear = [Link](image, None, fx=2.5, fy=3.5, interpolation = cv2.INTER_LINEAR)
image_re_cubic = [Link](image, None, fx=2.5, fy=3.5, interpolation = cv2.INTER_CUBIC)
#Perform the translation
matrix = np.float32([[1, 0, 80], [0, 1, 80]])
translated = [Link](image, matrix, ([Link][1], [Link][0]))
#Rotation
height, width = [Link][:2]
matrix1 = cv2.getRotationMatrix2D((width/2,height/2) , 45 , 1)
Rotation = [Link](image, matrix1 , (width , height))
[Link]('Original' , image)
#Display the translated image
[Link]('Rotation' , Rotation)
[Link]('Translation', translated)
[Link]('Cubic' , image_re_cubic)
[Link]('Linear' , image_re_linear)
[Link](50000)
[Link]()
Image Stitching
import cv2
import numpy as np
image = [Link]("E:\\ComputerVisionPractical\\PracticalNO1\\[Link]")
#Perform linear and cubic
image_re_linear = [Link](image, None, fx=2.5, fy=3.5, interpolation = cv2.INTER_LINEAR)
image_re_cubic = [Link](image, None, fx=2.5, fy=3.5, interpolation = cv2.INTER_CUBIC)
#Perform the translation
matrix = np.float32([[1, 0, 80], [0, 1, 80]])
translated = [Link](image, matrix, ([Link][1], [Link][0]))
#Rotation
height, width = [Link][:2]
matrix1 = cv2.getRotationMatrix2D((width/2,height/2) , 45 , 1)
Rotation = [Link](image, matrix1 , (width , height))
[Link]('Original' , image)
#Display the translated image
[Link]('Rotation' , Rotation)
[Link]('Translation', translated)
[Link]('Cubic' , image_re_cubic)
[Link]('Linear' , image_re_linear)
[Link](50000)
[Link]()
Camera Calibration
import cv2
import numpy as np
import os
import glob
CHECKERBOARD = (6, 9)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
objpoints = []
imgpoints = []
objp = [Link]((1, CHECKERBOARD[0] * CHECKERBOARD[1], 3), np.float32)
objp[0, :, :2] = [Link][0:CHECKERBOARD[0], 0:CHECKERBOARD[1]].[Link](-1, 2)
prev_img_shape = None
images = [Link]('*.jpg')
for fname in images:
img = [Link](fname)
gray = [Link](img, cv2.COLOR_BGR2GRAY)
ret, corners = [Link](gray, CHECKERBOARD, cv2.CALIB_CB_ADAPTIVE_THRESH
+ cv2.CALIB_CB_FAST_CHECK + cv2.CALIB_CB_NORMALIZE_IMAGE)
if ret == True:
[Link](objp)
corners2 = [Link](gray, corners, (11, 11), (-1, -1), criteria)
[Link](corners2)
img = [Link](img, CHECKERBOARD, corners2, ret)
[Link]('img', img)
[Link](0)
[Link]()
h, w = [Link][:2]
ret, mtx, dist, rvecs, tvecs = [Link](objpoints, imgpoints, [Link][::-1], None, None)
print("Camera matrix : \n")
print(mtx)
print("dist : \n")
print(dist)
print("rvecs : \n")
print(rvecs)
print("tvecs : \n")
print(tvecs)
Face detection
import cv2
# Load the pre-trained face detector
face_detector = [Link]('haarcascade_frontalface_default.xml')
# Read the input image
image = [Link]('[Link]')
# Convert the image to grayscale
gray = [Link](image, cv2.COLOR_BGR2GRAY)
# Detect faces in the image
faces = face_detector.detectMultiScale(gray)
# Draw bounding boxes around the detected faces
for (x, y, w, h) in faces:
[Link](image, (x, y), (x + w, y + h), (0, 255, 0), 2)
# Display the output image
[Link]('Face Detection', image)
[Link](0)
Object detection
import cv2
from matplotlib import pyplot as plt
# Paths
image_path = "[Link]"
cascade_path = 'stop_data.xml'
# Load image
img = [Link](image_path)
if img is None:
raise FileNotFoundError("Image file could not be loaded. Check the path.")
# Convert image
img_gray = [Link](img, cv2.COLOR_BGR2GRAY)
img_rgb = [Link](img, cv2.COLOR_BGR2RGB)
# Load cascade
stop_data = [Link](cascade_path)
if stop_data.empty():
raise FileNotFoundError("Cascade file could not be loaded. Check the path.")
# Detect and draw rectangles
for (x, y, w, h) in stop_data.detectMultiScale(img_gray, minSize=(20, 20)):
[Link](img_rgb, (x, y), (x + h, y + w), (0, 255, 0), 3)
# Display image
[Link](img_rgb)
[Link]()
Pedestrian detection
# pip install imutils
import cv2
import imutils
# Initializing the HOG person detector
hog = [Link]()
[Link](cv2.HOGDescriptor_getDefaultPeopleDetector())
# Video capture from file
cap = [Link]('PD1.mp4')
while [Link]():
ret, image = [Link]()
if not ret:
break
image = [Link](image, width=min(400, [Link][1]))
regions, _ = [Link](image, winStride=(4, 4), padding=(4, 4), scale=1.05)
for (x, y, w, h) in regions:
[Link](image, (x, y), (x + w, y + h), (0, 255, 0), 2)
[Link]("Image", image)
if [Link](25) & 0xFF == ord('q'):
break
[Link]()
[Link]()
Face recognition from video
# pip install imutils
import cv2
import imutils
hog = [Link]()
[Link](cv2.HOGDescriptor_getDefaultPeopleDetector())
cap = [Link]('fac.mp4')
while [Link]():
ret, image = [Link]()
if not ret:
break
image = [Link](image, width=min(400, [Link][1]))
regions, _ = [Link](image, winStride=(4, 4), padding=(4, 4),scale=1.05)
for (x, y, w, h) in regions:
[Link](image, (x, y), (x + w, y + h), (0, 255, 0), 2)
[Link]("Image", image)
if [Link](25) & 0xFF == ord('q'):
break
# Release video capture and close all windows
[Link]()
[Link]()
Construct 3D model from images
from PIL import Image
import numpy as np
def shift_image(img, depth_img, shift_amount=10):
# Ensure base image has alpha
img = [Link]("RGBA")
data = [Link](img)
depth_img = depth_img.convert("L")
depth_data = [Link](depth_img)
deltas = ((depth_data / 255.0) * float(shift_amount)).astype(int)
shifted_data = np.zeros_like(data)
height, width, _ = [Link]
for y, row in enumerate(deltas):
for x, dx in enumerate(row):
if x + dx < width and x + dx >= 0:
shifted_data[y, x + dx] = data[y, x]
shifted_image = [Link](shifted_data.astype(np.uint8))
return shifted_image
img = [Link]("[Link]")
depth_img = [Link]("[Link]")
shifted_img = shift_image(img, depth_img, shift_amount=10)
shifted_img.show()
Object detection and tracking from video
import cv2
# Load pre-trained YOLO model for object detection
net = [Link]("[Link]", "[Link]")
classes = []
with open("[Link]", "r") as f:
classes = [[Link]() for line in [Link]()]
layer_names = [Link]()
output_layers = [layer_names[i - 1] for i in [Link]()]
colors = [(0, 255, 0)] # Green color for bounding boxes
# Initialize video capture
cap = [Link]("AtalSetu.mp4")
# Object tracking variables
tracked_objects = {}
next_id = 1
while True:
ret, frame = [Link]()
if not ret:
break
height, width, channels = [Link]
# Object detection
blob = [Link](frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
[Link](blob)
outs = [Link](output_layers)
boxes = []
confidences = []
class_ids = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = [Link]()
confidence = scores[class_id]
if confidence > 0.5:
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
[Link]([x, y, w, h])
[Link](float(confidence))
class_ids.append(class_id)
indexes = [Link](boxes, confidences, 0.5, 0.4)
# Object tracking and drawing bounding boxes
if indexes is not None:
for i in [Link]():
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
color = colors[0]
# Check if object is already tracked
matched_id = None
for obj_id, tracked_box in tracked_objects.items():
if abs(x - tracked_box[0]) < 50 and abs(y - tracked_box[1]) < 50:
matched_id = obj_id
break
# Update or create new tracked object
if matched_id is not None:
tracked_objects[matched_id] = (x, y)
[Link](frame, f"{label} {matched_id}", (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
[Link](frame, (x, y), (x + w, y + h), color, 2)
else:
tracked_objects[next_id] = (x, y)
[Link](frame, f"{label} {next_id}", (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
[Link](frame, (x, y), (x + w, y + h), color, 2)
next_id += 1
[Link]("Frame", frame)
if [Link](1) & 0xFF == ord('q'):
break
[Link]()
[Link]()
Feature extraction using RANSAC
import cv2
import numpy as np
def extract_features_and_match(img1, img2):
sift = cv2.SIFT_create()
kp1, des1 = [Link](img1, None)
kp2, des2 = [Link](img2, None)
bf = [Link]()
matches = [Link](des1, des2, k=2)
good_matches = []
for m, n in matches:
if [Link] < 0.75 * [Link]:
good_matches.append(m)
return kp1, kp2, good_matches
def apply_ransac(kp1, kp2, matches):
src_pts = np.float32([kp1[[Link]].pt for m in matches]).reshape(-1, 1, 2)
dst_pts = np.float32([kp2[[Link]].pt for m in matches]).reshape(-1, 1, 2)
M, mask = [Link](src_pts, dst_pts, [Link], 5.0)
return M, mask
def draw_matches_with_mask(img1, kp1, img2, kp2, matches, mask):
matches_mask = [Link]().tolist()
draw_params = dict(matchColor=(0, 255, 0),
singlePointColor=None,
matchesMask=matches_mask,
flags=2)
img_matches = [Link](img1, kp1, img2, kp2, matches, None, **draw_params)
return img_matches
img1 = [Link]('[Link]', cv2.IMREAD_GRAYSCALE)
img2 = [Link]('[Link]', cv2.IMREAD_GRAYSCALE)
kp1, kp2, matches = extract_features_and_match(img1, img2)
M, mask = apply_ransac(kp1, kp2, matches)
img_matches = draw_matches_with_mask(img1, kp1, img2, kp2, matches, mask)
[Link]("Matches with RANSAC", img_matches)
[Link](0)
[Link]()
Colorization
import numpy as np
import cv2
from cv2 import dnn
# Model file paths
proto_file = 'colorization_deploy_v2.prototxt'
model_file = 'colorization_release_v2.caffemodel'
hull_pts = 'pts_in_hull.npy'
img_path = '[Link]'
img = [Link](img_path)
if img is None:
print(f"Error: Unable to load image at {img_path}")
exit()
scaled = [Link]("float32") / 255.0
lab_img = [Link](scaled, cv2.COLOR_BGR2LAB)
net = [Link](proto_file, model_file)
kernel = [Link](hull_pts)
class8 = [Link]("class8_ab")
conv8 = [Link]("conv8_313_rh")
pts = [Link]().reshape(2, 313, 1, 1)
[Link](class8).blobs = [[Link]("float32")]
[Link](conv8).blobs = [[Link]([1, 313], 2.606, dtype="float32")]
resized = [Link](lab_img, (224, 224))
L = [Link](resized)[0]
L -= 50
[Link]([Link](L))
ab_channel = [Link]()[0, :, :, :].transpose((1, 2, 0))
ab_channel = [Link](ab_channel, ([Link][1], [Link][0]))
# Convert to colorized image in BGR
L = [Link](lab_img)[0]
colorized = [Link]([Link]((L[:, :, [Link]],
ab_channel), axis=2), cv2.COLOR_LAB2BGR)
colorized = [Link]((255 * colorized).astype("uint8"), 0, 255)
# Resize images for display
img_display = [Link](img, (640, 640))
colorized_display = [Link](colorized, (640, 640))
# Concatenate and display images
result = [Link]([img_display, colorized_display])
[Link]("Grayscale -> Colour", result)
[Link](0)
[Link]()
Text detection and recognition
import cv2
import pytesseract
import os
[Link]['TESSDATA_PREFIX'] = r'C:\\Users\\Vivek\\AppData\\Local\\Programs\\Tesseract-OCR\\tessdata'
[Link].tesseract_cmd = r'C:\\Users\\Vivek\\AppData\\Local\\Programs\\Tesseract-OCR\\
[Link]'
image_path = '[Link]'
img = [Link](image_path)
gray = [Link](img, cv2.COLOR_BGR2GRAY)
ret, thresh1 = [Link](gray, 0, 255, cv2.THRESH_OTSU |
cv2.THRESH_BINARY_INV)
rect_kernel = [Link](cv2.MORPH_RECT, (18, 18))
dilation = [Link](thresh1, rect_kernel, iterations= 1)
contours, hierarchy = [Link](dilation, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_NONE)
im2 = [Link]()
output_path = '[Link]'
with open(output_path, "w+") as file:
for cnt in contours:
x, y, w, h = [Link](cnt)
cropped = im2[y:y + h, x:x + w]
text = pytesseract.image_to_string(cropped)
print(f"Recognized text: {text}")
[Link](text + "\n")
Image matting and Compositing
import [Link] as plt
from PIL import Image
import requests
# Load the image and trimap
url = "[Link]
image = [Link]([Link](url,stream=True).raw).convert("RGB")
url = "[Link]
trimap = [Link]([Link](url, stream=True).raw)
# Display the image and trimap
[Link](figsize=(15, 15))
[Link](1, 2, 1)
[Link](image)
[Link](1, 2, 2)
[Link](trimap)
[Link]()
Image Compositing
from PIL import Image
# Load images and convert to mode 'L'
im1 = [Link]('[Link]').convert('L')
im2 = [Link]('[Link]').convert('L')
mask = [Link]('[Link]').convert('L')
# Ensure all images are the same size
size = [Link]
im2 = [Link](size)
mask = [Link](size)
# Composite the images
im3 = [Link](im1, im2, mask)
# Show the result
[Link]()