I’m working on code to track count and estimate speed of vehicles but after running the code and trying to open the output video it doesn’t open
%% [code]
import os
HOME = os.getcwd()
print(HOME)
Code cell
%% [code]
!pip install -q gdown
%cd {HOME}
!gdown ‘1pz68D1Gsx80MoPg-_q-IbEdESEmyVLm-’
Code cell
%% [code]
SOURCE_VIDEO_PATH = f"{HOME}/vehicle-counting.mp4"
Code cell <4jtmDVyXHSZn>
%% [code]
!pip install ultralytics
from IPython import display
display.clear_output()
import ultralytics
ultralytics.checks()
Code cell
%% [code]
!pip install supervision
from IPython import display
display.clear_output()
import supervision as sv
print(“supervision.version:”, sv.version)
Code cell
%% [code]
import cv2
import numpy as np
import supervision as sv
from tqdm import tqdm
from ultralytics import YOLO
from supervision.assets import VideoAssets, download_assets
from collections import defaultdict, deque
Code cell
%% [code]
SOURCE_VIDEO_PATH = f"{HOME}/vehicle-counting.mp4"
TARGET_VIDEO_PATH = f"{HOME}/vehicle-counting-result-with-counter.mp4"
CONFIDENCE_THRESHOLD = 0.3
IOU_THRESHOLD = 0.5
MODEL_NAME = “yolov8x.pt”
MODEL_RESOLUTION = 1280
Code cell
%% [code]
from ultralytics import YOLO
model = YOLO(MODEL_NAME)
model.fuse()
Code cell <0oIt3OvQHqAA>
%% [code]
SOURCE = np.array([
[1252, 787],
[2298, 803],
[5039, 2159],
[-550, 2159]
])
TARGET_WIDTH = 25
TARGET_HEIGHT = 250
TARGET = np.array([
[0, 0],
[TARGET_WIDTH - 1, 0],
[TARGET_WIDTH - 1, TARGET_HEIGHT - 1],
[0, TARGET_HEIGHT - 1],
])
Code cell
%% [code]
Text cell <-PhljDjRH2eY>
%% [markdown]
predict single frame
Code cell <Bawb3O4_Hvkz>
%% [code]
dict maping class_id to class_name
CLASS_NAMES_DICT = model.model.names
class_ids of interest - car, motorcycle, bus and truck
selected_classes = [2, 3, 5, 7]
Code cell
%% [code]
import supervision as sv
import numpy as np
Code cell
%% [code]
generator = sv.get_video_frames_generator(SOURCE_VIDEO_PATH)
create instance of BoxAnnotator
box_annotator = sv.BoxAnnotator(thickness=4, text_thickness=4, text_scale=2)
acquire first video frame
iterator = iter(generator)
frame = next(iterator)
model prediction on single frame and conversion to supervision Detections
results = model(frame, verbose=False)[0]
convert to Detections
detections = sv.Detections.from_ultralytics(results)
only consider class id from selected_classes define above
detections = detections[np.isin(detections.class_id, selected_classes)]
format custom labels
labels = [
f"{CLASS_NAMES_DICT[class_id]} {confidence:0.2f}"
for confidence, class_id in zip(detections.confidence, detections.class_id)
]
annotate and display frame
anotated_frame=box_annotator.annotate(scene=frame, detections=detections, labels=labels)
%matplotlib inline
sv.plot_image(anotated_frame, (16,16))
Code cell
%% [code]
class ViewTransformer:
def __init__(self, source: np.ndarray, target: np.ndarray) -> None:
source = source.astype(np.float32)
target = target.astype(np.float32)
self.m = cv2.getPerspectiveTransform(source, target)
def transform_points(self, points: np.ndarray) -> np.ndarray:
if points.size == 0:
return points
reshaped_points = points.reshape(-1, 1, 2).astype(np.float32)
transformed_points = cv2.perspectiveTransform(reshaped_points, self.m)
return transformed_points.reshape(-1, 2)
Code cell <6GxK4vV_Icn4>
%% [code]
view_transformer = ViewTransformer(source=SOURCE, target=TARGET)
Text cell
%% [markdown]
track, count and estimate speed
Code cell
%% [code]
settings
LINE_START = sv.Point(50, 1500)
LINE_END = sv.Point(3840-50, 1500)
TARGET_VIDEO_PATH = f"{HOME}/vehicle-counting-result-with-counter.mp4"
Code cell <3C6bY_TNIunW>
%% [code]
sv.VideoInfo.from_video_path(SOURCE_VIDEO_PATH)
Code cell
%% [code]
model = YOLO(MODEL_NAME)
video_info = sv.VideoInfo.from_video_path(video_path=SOURCE_VIDEO_PATH)
create BYTETracker instance
#byte_tracker = sv.ByteTrack(track_thresh=0.25, track_buffer=30, match_thresh=0.8, frame_rate=30)
tracer initiation
byte_track = sv.ByteTrack(
frame_rate=video_info.fps, track_thresh=CONFIDENCE_THRESHOLD
)
create VideoInfo instance
video_info = sv.VideoInfo.from_video_path(SOURCE_VIDEO_PATH)
create frame generator
frame_generator = sv.get_video_frames_generator(SOURCE_VIDEO_PATH)
create LineZone instance, it is previously called LineCounter class
line_zone = sv.LineZone(start=LINE_START, end=LINE_END)
annotators configuration
thickness = sv.calculate_dynamic_line_thickness(
resolution_wh=video_info.resolution_wh
)
text_scale = sv.calculate_dynamic_text_scale(
resolution_wh=video_info.resolution_wh
)
bounding_box_annotator = sv.BoundingBoxAnnotator(
thickness=thickness
)
label_annotator = sv.LabelAnnotator(
text_scale=text_scale,
text_thickness=thickness,
text_position=sv.Position.BOTTOM_CENTER
)
trace_annotator = sv.TraceAnnotator(
thickness=thickness,
trace_length=video_info.fps * 2,
position=sv.Position.BOTTOM_CENTER
)
polygon_zone = sv.PolygonZone(
polygon=SOURCE,
frame_resolution_wh=video_info.resolution_wh
)
coordinates = defaultdict(lambda: deque(maxlen=video_info.fps))
create instance of BoxAnnotator
#box_annotator = sv.BoxAnnotator(thickness=4, text_thickness=4, text_scale=2)
create instance of TraceAnnotator
#trace_annotator = sv.TraceAnnotator(thickness=4, trace_length=50)
create LineZoneAnnotator instance, it is previously called LineCounterAnnotator class
line_zone_annotator = sv.LineZoneAnnotator(thickness=4, text_thickness=4, text_scale=2)
def callback(frame: np.ndarray, index:int) → np.ndarray:
results = model(frame, imgsz=MODEL_RESOLUTION, verbose=False)[0]
detections = sv.Detections.from_ultralytics(results)
open target video
with sv.VideoSink(TARGET_VIDEO_PATH, video_info) as sink:
loop over source video frame
for frame in tqdm(frame_generator, total=video_info.total_frames):
define call back function to be used in video processing
# def callback(frame: np.ndarray, index:int, points:np.ndarray) -> np.ndarray:
#for frame in tqdm(frame_generator, total=video_info.total_frames):
# model prediction on single frame and conversion to supervision Detections
results = model(frame, imgsz=MODEL_RESOLUTION, verbose=False)[0]
detections = sv.Detections.from_ultralytics(results)
# only consider class id from selected_classes define above
detections = detections[np.isin(detections.class_id, selected_classes)]
# filter out detections by class and confidence
detections = detections[detections.confidence > CONFIDENCE_THRESHOLD]
detections = detections[detections.class_id != 0]
# filter out detections outside the zone
detections = detections[polygon_zone.trigger(detections)]
# refine detections using non-max suppression
detections = detections.with_nms(IOU_THRESHOLD)
# pass detection through the tracker
detections = byte_track.update_with_detections(detections=detections)
points = detections.get_anchors_coordinates(
anchor=sv.Position.BOTTOM_CENTER
)
# calculate the detections position inside the target RoI
points = view_transformer.transform_points(points=points).astype(int)
# store detections position
for tracker_id, [_, y] in zip(detections.tracker_id, points):
coordinates[tracker_id].append(y)
# format labels
labels =
for confidence, class_id, tracker_id in zip(detections.confidence, detections.class_id, detections.tracker_id):
if len(coordinates[tracker_id]) < video_info.fps / 2:
labels.append(f"#{tracker_id} {model.model.names[class_id]} {confidence:0.2f}")
else:
# calculate speed
coordinate_start = coordinates[tracker_id][-1]
coordinate_end = coordinates[tracker_id][0]
distance = abs(coordinate_start - coordinate_end)
time = len(coordinates[tracker_id]) / video_info.fps
speed = distance / time * 3.6
labels.append(f"#{tracker_id} {model.model.names[class_id]} {confidence:0.2f} {int(speed)} km/h")
# annotate frame
annotated_frame = frame.copy()
annotated_frame = trace_annotator.annotate(
scene=annotated_frame, detections=detections
)
annotated_frame = bounding_box_annotator.annotate(
scene=annotated_frame, detections=detections
)
annotated_frame = label_annotator.annotate(
scene=annotated_frame, detections=detections, labels=labels
)
# update line counter
line_zone.trigger(detections)
return frame with box and line annotated result
#return line_zone_annotator.annotate(annotated_frame, line_counter=line_zone)
add frame to target video
sink.write_frame(annotated_frame)
process the whole video
sv.process_video(
source_path = SOURCE_VIDEO_PATH,
target_path = TARGET_VIDEO_PATH,
callback=callback
)
Project Type: detection ,classification and speed estimation
Operating System & Browser:google chrome
Project Universe Link or Workspace/Project ID:Google Colab