Hello
I’m a new guy to computer vision on YOLO , i observeed that when starting video detection on yolov8 with some rstp stream from IP cameras then using openCV with imshow, the yolo initially start a process ( mine code is multi-process) it takes about 700MB RAM when inference and keep increasing until the computer run out of memory then exit.
I don’t know how to resolve this issue or the way people to do in reality for production use since i intended to do the smart detection surveiliance camera runing 24/7.
I event inject the code to delete the resut, detection and gabarge collector every two minutes to free up the RAM, but no help.
Anybody can show me how to resolve this issue ?
Here is my code.
import cv2
from ultralytics import YOLO
import supervision as sv
import numpy as np
import subprocess
from multiprocessing import Process
import time
import gc
LINE_START = sv.Point(250, 150)
LINE_END = sv.Point(250, 550)
#polygon = np.array([
# [1725, 1550],
# [2725, 1550],
# [3500, 2160],
# [1250, 2160]
#])
# zone = sv.PolygonZone(polygon=polygon, frame_resolution_wh=video_info.resolution_wh)
def beep():
subprocess.call(["afplay", "/System/Library/Sounds/Glass.aiff"])
def detect_objects(model, video_source):
line_counter = sv.LineZone(start=LINE_START, end=LINE_END)
# zone_annotator = sv.PolygonZoneAnnotator(zone=zone, color=sv.Color.white(), thickness=1, text_thickness=1, text_scale=0.5)
line_annotator = sv.LineZoneAnnotator(thickness=1, text_thickness=1, text_scale=0.5)
box_annotator = sv.BoxAnnotator(
thickness=1,
text_thickness=1,
text_scale=0.5
)
fps_start_time = time.time()
fps=0
start_time = time.time() # variable to keep track of time
for result in model.predict(source=video_source,
device="mps",
cache=False,
save=False,
visualize=False,
half=True,
max_det=20,
stream=True,
imgsz=320,
agnostic_nms=True):
frame = result.orig_img
detections = sv.Detections.from_yolov8(result)
fps_end_time = time.time()
fps_diff_time = fps_end_time - fps_start_time
fps = 1 / fps_diff_time
fps_start_time = fps_end_time
fps_text="INFERENCE-FPS:{:.0f}".format(fps)
cv2.putText(frame, fps_text, (5,30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1)
if result.boxes.id is not None:
detections.tracker_id = result.boxes.id.cpu().numpy().astype(int)
labels = [
f"{model.model.names[int(class_id)]} {confidence*100:0.0F}%"
for _, _, confidence, class_id, _
in detections
]
frame = box_annotator.annotate(
scene=frame,
detections=detections,
labels=labels
)
if line_counter.trigger(detections=detections):
beep()
line_annotator.annotate(frame=frame, line_counter=line_counter)
cv2.imshow(f"AI Camera Detection system ", frame)
if (cv2.waitKey(30) == 27):
break
# release memory every 2 minutes
if time.time() - start_time >120:
print(f"Release memory for {video_source}")
beep()
del result
del frame
del detections
gc.collect()
torch._C._mps_emptyCache()
del fps_text
start_time = time.time()
cv2.destroyAllWindows()
def main():
# list of video sources
video_sources = [
"rstp://IP1","rstp://IP2","rstp://ip3"
]
model = YOLO("yolov8n.pt")
processes = []
for video_source in video_sources:
p = Process(target=detect_objects, args=(model, video_source))
p.start()
processes.append(p)
for p in processes:
if not p.is_alive():
p.join()
processes.remove(p)
if __name__ == "__main__":
main()
Thank you