Video inference using roboflow.js

Hi,
I am trying to use roboflow.js library to do inference on my browser. I am able to get results for an image. But, when I pass a video tag (using a video uploaded from the laptop), it infers only the current frame. What is the best way to infer a video fully? Do you have any code sample that I can try?

Hi @nithish - thanks for posting!

If you don’t need live results, I suggest our Video Inference API.

You can self-host inference on real-time video streams with our inference repo.

Thanks Jacob. I am currently using the hosted video inference but I need some inference on a more real-time basis on the users browser. So, I was looking into this: https://docs.roboflow.com/deploy/sdks/web-browser/documentation-roboflow.js

I’m interested in this part where you say it is possible to continuously infer a video: * Note: when a video element is passed to the model, it infers on the frame that the video element is on at the moment it’s called. It does not automatically continuously infer on a video, but it is possible to do so.

I have not been able to get it to work reliably. So curious if you have some example code for that.

The inference repo link I shared above should work well!

But that one is in python right? I want to be able to run it on the client side using Javascript.

  1. Load roboflow.js on client side
  2. Ask client to upload video. Create a video element and assign video to it.
  3. Use the model.detect() on the video to infer every frame from it.

@nithish - I hope this helps:

  1. if you’re using React/Nextjs, here’s a component that could be helpful to you. It’s not perfect, but it’s a start. If someone may corrections/additions, I’d appreciate it.

add this to any component:

import Script from "next/script"
import Roboflow from "@/components/roboflow.js"

    <>
      <Script
        src="https://cdn.roboflow.com/0.2.25/roboflow.js"
        strategy="beforeInteractive"
      />
      <Roboflow modelName="microsoft-coco" modelVersion="9" />
    </>

roboflow.js:

import { useEffect, useRef } from "react"

const Roboflow = (props) => {
  const videoRef = useRef(null)
  const canvasRef = useRef(null)
  var inferRunning
  let frameCount = 0
  const frameSkip = 2 // Update to detect every 2 frames

  const startInfer = () => {
    inferRunning = true
    window.roboflow
      .auth({
        publishable_key: "YOUR_PUBLISHABLE_KEY",
      })
      .load({
        model: props.modelName,
        version: props.modelVersion,
        onMetadata: function (m) {
          console.log("model loaded")
        },
      })
      .then((model) => {
        function repeatDetection() {
          if (inferRunning) {
            if (frameCount % frameSkip === 0) {
              detect(model)
            }
            frameCount++
            requestAnimationFrame(repeatDetection)
          }
        }
        requestAnimationFrame(repeatDetection)
      })
  }

  useEffect(startInfer, [])

  const detect = async (model) => {
    // Check data is available
    if (
      typeof videoRef.current !== "undefined" &&
      videoRef.current !== null &&
      videoRef.current.readyState === 4
    ) {
      const videoWidth = videoRef.current.videoWidth
      const videoHeight = videoRef.current.videoHeight

      videoRef.current.width = videoWidth
      videoRef.current.height = videoHeight

      adjustCanvas(videoWidth, videoHeight)

      const detections = await model.detect(videoRef.current)

      const ctx = canvasRef.current.getContext("2d")
      drawBoxes(detections, ctx)
    }
  }

  const adjustCanvas = (w, h) => {
    canvasRef.current.width = w * window.devicePixelRatio
    canvasRef.current.height = h * window.devicePixelRatio

    canvasRef.current.style.width = w + "px"
    canvasRef.current.style.height = h + "px"

    canvasRef.current
      .getContext("2d")
      .scale(window.devicePixelRatio, window.devicePixelRatio)
  }

  const drawBoxes = (detections, ctx) => {
    ctx.clearRect(0, 0, canvasRef.current.width, canvasRef.current.height)
    detections.forEach((row) => {
      if (true) {
        // Simplify this condition if not needed
        var temp = row.bbox
        temp.class = row.class
        temp.color = row.color
        temp.confidence = row.confidence
        row = temp
      }

      if (row.confidence < 0) return

      var x = row.x - row.width / 2
      var y = row.y - row.height / 2
      var w = row.width
      var h = row.height

      ctx.beginPath()
      ctx.lineWidth = 1
      ctx.strokeStyle = row.color
      ctx.rect(x, y, w, h)
      ctx.stroke()

      ctx.fillStyle = "black"
      ctx.globalAlpha = 0.2
      ctx.fillRect(x, y, w, h)
      ctx.globalAlpha = 1.0

      var fontColor = "black"
      var fontSize = 12
      ctx.font = `${fontSize}px monospace`
      ctx.textAlign = "center"
      var classTxt = row.class
      var confTxt = (row.confidence * 100).toFixed().toString() + "%"
      var msgTxt = classTxt + " " + confTxt
      const textHeight = fontSize
      var textWidth = ctx.measureText(msgTxt).width

      if (textHeight <= h && textWidth <= w) {
        ctx.fillStyle = row.color
        ctx.fillRect(
          x - ctx.lineWidth / 2,
          y - textHeight - ctx.lineWidth,
          textWidth + 2,
          textHeight + 1
        )
        ctx.fillStyle = fontColor
        ctx.fillText(msgTxt, x + textWidth / 2 + 1, y - 1)
      } else {
        textWidth = ctx.measureText(confTxt).width
        ctx.fillStyle = row.color
        ctx.fillRect(
          x - ctx.lineWidth / 2,
          y - textHeight - ctx.lineWidth,
          textWidth + 2,
          textHeight + 1
        )
        ctx.fillStyle = fontColor
        ctx.fillText(confTxt, x + textWidth / 2 + 1, y - 1)
      }
    })
  }

  return (
    <>
      <video
        ref={videoRef}
        className="absolute left-0 right-0 z-10 mx-auto text-center"
        controls
        muted
        autoPlay
        crossOrigin="anonymous"
      >
        <source src="/video/myvideo.mp4" type="video/mp4" />
        Your browser does not support the video tag does not support the video
        tag.
      </video>
      <canvas
        ref={canvasRef}
        className="absolute left-0 right-0 z-20 mx-auto text-center"
      />
    </>
  )
}

export default Roboflow
  1. If you’re using vanilla javascript – you can start with this file:
    https://github.com/roboflow/homepage-demo/blob/main/script.js

And replace the webcamInference() function with one that uses a video tag rather than a webcam input

1 Like