WebCodecs Example
Examples for using predict_batch with the WebCodecs API.
If your browser supports the WebCodecs API, you can create efficient video processing pipelines with DeGirumJS.
The WebCodecs API provides low-level access to the individual frames of a video stream. This allows for highly efficient and flexible video processing pipelines directly in the browser. When combined with DeGirumJS's predict_batch()
method, you can perform real-time AI inference on a live webcam stream with minimal latency.
The core components of this pipeline are:
MediaStreamTrackProcessor
: Takes aMediaStreamTrack
(like from a webcam) and exposes its frames as aReadableStream
ofVideoFrame
objects.predict_batch()
: The DeGirumJS method that can directly consume aReadableStream
ofVideoFrame
objects and efficiently process them for inference.MediaStreamTrackGenerator
: Takes a stream of processedVideoFrame
objects and exposes them as a newMediaStreamTrack
, which can be displayed in a<video>
element.
Here are some examples demonstrating how to build pipelines using these components:
Example 1: ReadableStream as Input
This example demonstrates the most direct way to perform inference on a video stream. We will take the ReadableStream
provided by the MediaStreamTrackProcessor
and feed it directly into model.predict_batch()
.
How it works:
Get a
videoTrack
from the webcam usingnavigator.mediaDevices.getUserMedia
.Create a
MediaStreamTrackProcessor
to get aReadableStream
ofVideoFrame
objects.Pass this
readableStream
directly as the data source tomodel.predict_batch()
.Display the results in a
<canvas>
.
<p>Inference results from a direct video stream:</p>
<canvas id="outputCanvas"></canvas>
<script src="https://assets.degirum.com/degirumjs/0.1.4/degirum-js.min.obf.js"></script>
<script type="module">
// --- Model Setup ---
const dg = new dg_sdk();
const secretToken = localStorage.getItem('secretToken') || prompt('Enter secret token:');
localStorage.setItem('secretToken', secretToken);
const MODEL_NAME = 'yolov8n_relu6_coco--640x640_quant_n2x_orca1_1';
const ZOO_IP = 'https://cs.degirum.com/degirum/public';
const zoo = await dg.connect('cloud', ZOO_IP, secretToken);
const model = await zoo.loadModel(MODEL_NAME);
// 1. Get video stream from webcam
const mediaStream = await navigator.mediaDevices.getUserMedia({ video: true });
const videoTrack = mediaStream.getVideoTracks()[0];
// 2. Create a processor to get a readable stream of frames
const processor = new MediaStreamTrackProcessor({ track: videoTrack });
const readableStream = processor.readable;
// 3. Feed the stream to predict_batch and loop through results
for await (const result of model.predict_batch(readableStream)) {
// Display the result on the canvas
await model.displayResultToCanvas(result, 'outputCanvas');
// IMPORTANT: Close the frame to release memory.
// The SDK does not close frames when you provide a raw stream.
result.imageFrame.close();
}
</script>
Example 2: Real-Time Inference with Display in a <video>
Element
<video>
ElementWhile the first example is simple, you might want to output the processed video (with results drawn) into a <video>
element (for further processing, use by other libraries in your code, etc...). This example uses WebCodecs for re-encoding the processed frames back into a video track.
We use a TransformStream
to orchestrate the work and a MediaStreamTrackGenerator
to create the final output video track. This pattern is more robust and flexible for building complex applications.
How it works:
A
MediaStreamTrackProcessor
creates aReadableStream
from the webcam.This stream is piped through a
TransformStream
. Inside thetransform
function, for eachframe
:We run inference on the frame using
model.predict()
.We draw the original frame onto an
OffscreenCanvas
.We use
model.displayResultToCanvas()
to overlay the inference results on that same canvas.We enqueue a new
VideoFrame
created from the canvas to the stream's controller.We close the original
frame
to free up memory.
The output of the
TransformStream
is piped to thewritable
side of aMediaStreamTrackGenerator
.The
MediaStreamTrackGenerator
's track is then attached to a<video>
element'ssrcObject
.
<p>Inference results inside a video element</p>
<video id="outputVideo" width="640" height="480" autoplay muted></video>
<script src="https://assets.degirum.com/degirumjs/0.1.4/degirum-js.min.obf.js"></script>
<script type="module">
const outputVideo = document.getElementById('outputVideo');
// --- Model Setup ---
const dg = new dg_sdk();
const secretToken = localStorage.getItem('secretToken') || prompt('Enter secret token:');
localStorage.setItem('secretToken', secretToken);
const MODEL_NAME = 'yolov8n_relu6_coco--640x640_quant_n2x_orca1_1';
const ZOO_IP = 'https://cs.degirum.com/degirum/public';
const zoo = await dg.connect('cloud', ZOO_IP, secretToken);
const model = await zoo.loadModel(MODEL_NAME);
// Use an OffscreenCanvas for efficient background rendering
const canvas = new OffscreenCanvas(640, 480);
const ctx = canvas.getContext('2d');
const stream = await navigator.mediaDevices.getUserMedia({ video: true });
const videoTrack = stream.getVideoTracks()[0];
const trackProcessor = new MediaStreamTrackProcessor({ track: videoTrack });
const trackGenerator = new MediaStreamTrackGenerator({ kind: "video" });
outputVideo.srcObject = new MediaStream([trackGenerator]);
// Define the transformation logic
const transform = async (frame, controller) => {
// Run inference on the current frame.
// Note: We use predict() here, not predict_batch(), as we process one frame at a time.
const result = await model.predict(frame);
// If we have a valid result, draw it on top
if (result.result) {
await model.displayResultToCanvas(result, canvas);
} else {
// Draw the original frame onto our offscreen canvas
ctx.drawImage(frame, 0, 0);
}
// Create a new frame from the canvas and pass it down the pipeline
controller.enqueue(new VideoFrame(canvas, { timestamp: frame.timestamp }));
// IMPORTANT: Close the original frame to release its resources.
frame.close();
};
// Construct the full pipeline!
trackProcessor.readable
.pipeThrough(new TransformStream({ transform }))
.pipeTo(trackGenerator.writable);
</script>
Example 3: Parallel Inference on Four Video Streams
The WebCodecs API and DeGirumJS can handle multiple independent video pipelines at once. This example demonstrates four processed video streams displayed in a 2x2 grid.
This architecture is highly scalable. While we use a cloned track here, you could just as easily use four different video sources (e.g., multiple cameras or video files).
How it works:
Grab a single webcam track (
mainVideoTrack
).Clone the track four times so each pipeline gets its own independent
MediaStreamTrack
.For each pipeline:
Load a separate model instance.
Create a
MediaStreamTrackProcessor
for the cloned track to get aReadableStream
ofVideoFrame
s.Pass the stream directly to
model.predict_batch()
.For each inference result, render detections onto the assigned
<canvas>
element usingmodel.displayResultToCanvas()
.Close the frame after processing to release memory.
<!DOCTYPE html>
<html>
<head>
<title>DeGirumJS four-canvas parallel demo</title>
<style>
html,
body {
margin: 0;
height: 100%;
}
#canvas-grid {
display: grid;
grid-template-columns: repeat(2, 1fr);
grid-template-rows: repeat(2, 1fr);
width: 100vw;
height: 100vh;
}
canvas {
width: 100%;
height: 100%;
background: #000;
display: block
}
</style>
</head>
<body>
<div id="canvas-grid">
<canvas id="canvas_0" width="640" height="480"></canvas>
<canvas id="canvas_1" width="640" height="480"></canvas>
<canvas id="canvas_2" width="640" height="480"></canvas>
<canvas id="canvas_3" width="640" height="480"></canvas>
</div>
<script src="https://assets.degirum.com/degirumjs/0.1.4/degirum-js.min.obf.js"></script>
<script type="module">
// ----- Model setup -----
const dg = new dg_sdk();
const secretToken = localStorage.getItem('secretToken') || prompt('Enter secret token:');
localStorage.setItem('secretToken', secretToken);
const MODEL_NAMES = [
'yolov8n_relu6_coco--640x640_quant_n2x_orca1_1',
'yolov8n_relu6_face--640x640_quant_n2x_orca1_1',
'yolov8n_relu6_hand--640x640_quant_n2x_orca1_1',
'yolov8n_relu6_widerface_kpts--640x640_quant_n2x_orca1_1'
];
const NUM_PIPELINES = MODEL_NAMES.length;
const ZOO_IP = 'https://cs.degirum.com/degirum/public';
const zoo = await dg.connect('cloud', ZOO_IP, secretToken);
// Grab the webcam once and clone the track
const stream = await navigator.mediaDevices.getUserMedia({ video: true });
const mainVideoTrack = stream.getVideoTracks()[0];
async function setupPipeline(index, videoTrack) {
// Load a separate model instance for each stream
const model = await zoo.loadModel(MODEL_NAMES[index]);
// Processor gives us a ReadableStream<VideoFrame>
const processor = new MediaStreamTrackProcessor({ track: videoTrack });
const readable = processor.readable;
// Iterate over batched predictions
for await (const result of model.predict_batch(readable)) {
// Draw detections to the right canvas
await model.displayResultToCanvas(result, `canvas_${index}`);
// IMPORTANT: Always close frames when supplying a raw stream
result.imageFrame.close();
}
}
// Create and launch four independent pipelines
for (let i = 0; i < NUM_PIPELINES; i++) {
setupPipeline(i, mainVideoTrack.clone());
}
</script>
</body>
</html>
Last updated
Was this helpful?