Video stream from ESP32 cam to Browser Model (RoboFlow.js)

How can I use video stream from ESP32 CAM via URL e.g. in the Web Browser (on Device) inference instead of a built in webcam on my computer.

@Mohamed can you please help?

I can look closer at the code later today. It is probably because we are using a browser function MediaDevices.getUserMedia() to access local hardware via the web browser.

I’m not sure that this function supports IP cameras. There is however a OpenCV.js package that does support RTSP / IP camera input.

My first attempt at fixing this problem would be to integrate OpenCV.js into Roboflow.js OR find another way to use an IP camera as input.

Hi @TylerOdenthal did you work on solution?

Yes, we have built a couple solutions into the python package since then.

You should be able to send RTSP frames directly to the Roboflow python package and it should work.

There is also this repository that we created for IP cameras: GitHub - roboflow/rtsp-server: RTSP streaming using GStreamer

Hi @TylerOdenthal your solution uses python script, I am looking to use API in JavaScript and instead of web cam I want to use streaming URL, i.e. my ESP32 Cam stream?

I have edited the code from EgoHands V9, but video strem is not visible. can you please check.

/*jshint esversion:6*/

$(function () {
    const video = $("video")[0];
    var model;
    var cameraMode = "environment"; // or "user"

 /* const startVideoStreamPromise = navigator.mediaDevices
        audio: false,
        video: {
            facingMode: cameraMode
    .then(function (stream) {
        return new Promise(function (resolve) {
            video.srcObject = stream;
            video.onloadeddata = function () {

    const startVideoStreamPromise = new Promise(function (resolve, reject) {
        const video = document.createElement("video");
        video.src = "";
        video.setAttribute("autoplay", "");
        video.setAttribute("muted", "");
        video.setAttribute("playsinline", "");
        video.addEventListener("loadedmetadata", () => {

    var publishable_key = "rf_vcBl5cLwmKPjYKLcQzeuVA8RTa62";
    var toLoad = {
        model: "damage-detection-0otvb",
        version: 5

    const loadModelPromise = new Promise(function (resolve, reject) {
                publishable_key: publishable_key
            .then(function (m) {
                model = m;

  Promise.all([startVideoStreamPromise, loadModelPromise]).then(function () {

    var canvas, ctx;
    const font = "16px sans-serif";

    /*function videoDimensions(video) {
        // Ratio of the video's intrisic dimensions
        var videoRatio = video.videoWidth / video.videoHeight;

        // The width and height of the video element
        var width = video.offsetWidth,
            height = video.offsetHeight;

        // The ratio of the element's width to its height
        var elementRatio = width / height;

        // If the video element is short and wide
        if (elementRatio > videoRatio) {
            width = height * videoRatio;
        } else {
            // It must be tall and thin, or exactly equal to the original ratio
            height = width / videoRatio;

        return {
            width: width,
            height: height

    function videoDimensions(video) {
        // Ratio of the video's intrisic dimensions
        var videoRatio = video.videoWidth / video.videoHeight;
        // The width and height of the video element
        var width = video.videoWidth,
          height = video.videoHeight;
        // The ratio of the element's width to its height
        var elementRatio = width / height;
        // If the video element is short and wide
        if (elementRatio > videoRatio) {
          width = height * videoRatio;
        } else {
          // It must be tall and thin, or exactly equal to the original ratio
          height = width / videoRatio;
        return {
          width: width,
          height: height,

    $(window).resize(function () {

    const resizeCanvas = function () {

        canvas = $("<canvas/>");

        ctx = canvas[0].getContext("2d");

        var dimensions = videoDimensions(video);


        canvas[0].width = video.videoWidth;
        canvas[0].height = video.videoHeight;

            width: dimensions.width,
            height: dimensions.height,
            left: ($(window).width() - dimensions.width) / 2,
            top: ($(window).height() - dimensions.height) / 2


    const renderPredictions = function (predictions) {
        var dimensions = videoDimensions(video);

        var scale = 1;

        ctx.clearRect(0, 0, ctx.canvas.width, ctx.canvas.height);

        predictions.forEach(function (prediction) {
            const x = prediction.bbox.x;
            const y = prediction.bbox.y;

            const width = prediction.bbox.width;
            const height = prediction.bbox.height;

            // Draw the bounding box.
            ctx.strokeStyle = prediction.color;
            ctx.lineWidth = 4;
                (x - width / 2) / scale,
                (y - height / 2) / scale,
                width / scale,
                height / scale

            // Draw the label background.
            ctx.fillStyle = prediction.color;
            const textWidth = ctx.measureText(prediction.class).width;
            const textHeight = parseInt(font, 10); // base 10
                (x - width / 2) / scale,
                (y - height / 2) / scale,
                textWidth + 8,
                textHeight + 4

        predictions.forEach(function (prediction) {
            const x = prediction.bbox.x;
            const y = prediction.bbox.y;

            const width = prediction.bbox.width;
            const height = prediction.bbox.height;

            // Draw the text last to ensure it's on top.
            ctx.font = font;
            ctx.textBaseline = "top";
            ctx.fillStyle = "#000000";
                (x - width / 2) / scale + 4,
                (y - height / 2) / scale + 1

    var prevTime;
    var pastFrameTimes = [];
    const detectFrame = function () {
        if (!model) return requestAnimationFrame(detectFrame);

            .then(function (predictions) {

                if (prevTime) {
                    pastFrameTimes.push( - prevTime);
                    if (pastFrameTimes.length > 30) pastFrameTimes.shift();

                    var total = 0;
                    _.each(pastFrameTimes, function (t) {
                        total += t / 1000;

                    var fps = pastFrameTimes.length / total;
                prevTime =;
            .catch(function (e) {
                console.log("CAUGHT", e);