Two weeks back, I had to demo my final year project which uses Deep Learning. Being a novice myself, I know how difficult it is to wrap our minds around DL. So, I wanted to build an intuitive demo with real-world applications which would help me to explain my project. (The world has had enough MNIST classifiers!).
I planned on building an object detector but also eschewed from “production-ready” deep-learning frameworks because those are quite convoluted for a beginner to understand. So after a lot of considerations, I picked brain.js. Brain.js is a fork of an older javascript neural network library, brain, which went unmaintained few years back. It has a steep learning curve, easy to use, and runs on the browser.
I was all set to build the detector but had one major problem: I have never collected a real-world image dataset before, let alone do it with javascript. After searching for a while, I came up with this inane idea of completely collecting the dataset in the frontend using the webcam.
Here’s how I did it:
- Get the camera stream using
navigator.getUserMedia
and write to a video tag. - Write the video to a canvas as a 100x100 image so that it can be read back as ImageData object.
- Read the canvas, grayscale the image, and write one of the color channels to an array. [ref]
- Convert the array to a CSV and download it. [ref]
// grayscales ImageData object
function grayscale(image) {
let pixels = image.data;
for (let i = 0; i < pixels.length; i += 4) {
pixels[i] = pixels[i + 1] = pixels[i + 2] =
0.2126 * pixels[i] + 0.7152 * pixels[i + 1] + 0.0722 * pixels[i + 2];
}
return image;
}
// converts a given channel of an ImageData object to array
function channelToArray(image, channelID) {
let pixels = image.data;
let channel = [];
for (let i = 0; i < pixels.length; i += 4) {
channel.push(pixels[i + channelID]);
}
return channel;
}
// converts 2D array to CSV and downloads it
function arrayToCSV(arr) {
let csvContent = 'data:text/csv;charset=utf-8,';
arr.forEach(row => {
csvContent += row.join(',') + '\r\n';
});
const URI = encodeURI(csvContent);
const link = document.createElement('a');
link.setAttribute('href', URI);
link.setAttribute('download', 'dataset.csv');
document.body.appendChild(link);
link.click();
}
window.addEventListener('load', () => {
const display = document.querySelector('#display');
const canvas = document.querySelector('#canvas');
const ctx = canvas.getContext('2d');
const startBtn = document.querySelector('#start');
const stopBtn = document.querySelector('#stop');
const HEIGHT = 100;
const WIDTH = 100;
let dataset = [];
let recordInterval;
// write webcam to #display
navigator.getUserMedia({video: { width: WIDTH, height: HEIGHT }},
stream => display.srcObject = stream,
err => alert('Something went terribly wrong! :('));
startBtn.addEventListener('click', () => {
// starts writing webcam to dataset @ 30fps
recordInterval = setInterval(() => {
ctx.drawImage(display, 0, 0, WIDTH, HEIGHT);
const image = ctx.getImageData(0, 0, WIDTH, HEIGHT);
const grayscaled = grayscale(image);
const channel = channelToArray(grayscaled, 0);
dataset.push(channel);
console.log(channel);
ctx.putImageData(grayscaled, 0, 0);
}, 1000 / 30);
});
stopBtn.addEventListener('click', () => {
// stops writing webcam to datset and downloads dataset as CSV
clearInterval(recordInterval);
arrayToCSV(dataset);
});
});
This way I created separate datasets for the obstacle and its absence. Using this I created, trained, and saved a network.
const brain = require('brain.js');
const fs = require('fs');
function readCSV(path) {
const content = fs.readFileSync(path, 'utf8');
let arr = [];
content.split('\n').forEach(line => {
arr.push(line.split(',').map(n => parseInt(n, 10)));
});
return arr;
}
// read obstacle and no_obstacle dataset seperately and convert to brain.js format
obstacle = readCSV('./dataset/obstacle.csv');
noObstacle = readCSV('./dataset/no_obstacle.csv');
inputData = [];
obstacle.forEach(image => inputData.push({input: image, output: [0]}));
noObstacle.forEach(image => inputData.push({input: image, output: [1]}));
// train the network
console.time('training');
const net = new brain.NeuralNetwork({
activation: 'sigmoid',
hiddenLayers: [1000, 100],
learningRate: 0.6
});
net.train(inputData);
console.timeEnd('training');
// write the model
fs.writeFileSync('./model/model.json', JSON.stringify(net.toJSON()));
Here is where something really unexpected happened. The generated model was around 270MB which is gargantuan considering it was trained to run on the browser. But since this is just a demo, it wasn’t much of a problem when serving from local.
Finally, all I had to do was load the model via AJAX and predict.
// fethes the model using AJAX
function loadModel(net) {
fetch('../model/model.json')
.then(res => res.json())
.then(json => net.fromJSON(json));
return net;
}
// initialize and load the model
function initModel() {
const net = new brain.NeuralNetwork({
activation: 'sigmoid',
hiddenLayers: [1000, 100],
learningRate: 0.6
});
const model = loadModel(net);
return model;
}
// grayscales ImageData object
function grayscale(image) {
let pixels = image.data;
for (let i = 0; i < pixels.length; i += 4) {
pixels[i] = pixels[i + 1] = pixels[i + 2] =
0.2126 * pixels[i] + 0.7152 * pixels[i + 1] + 0.0722 * pixels[i + 2];
}
return image;
}
// converts a given channel of an ImageData object to array
function channelToArray(image, channelID) {
let pixels = image.data;
let channel = [];
for (let i = 0; i < pixels.length; i += 4) {
channel.push(pixels[i + channelID]);
}
return channel;
}
// if obstacle -> 1 else -> 0
function predict(net, image) {
const input = channelToArray(grayscale(image), 0);
return net.run(input) <= 0.5;
}
window.addEventListener('load', () => {
const display = document.querySelector('#display');
const canvas = document.querySelector('#canvas');
const ctx = canvas.getContext('2d');
const model = initModel();
const HEIGHT = 100;
const WIDTH = 100;
// write webcam to #display
navigator.getUserMedia({video: { width: WIDTH, height: HEIGHT }},
stream => display.srcObject = stream,
err => alert('Something went terribly wrong! :('));
// predict and update @ 30fps
setInterval(() => {
ctx.drawImage(display, 0, 0, WIDTH, HEIGHT);
const image = ctx.getImageData(0, 0, WIDTH, HEIGHT);
document.body.style.backgroundColor = (predict(model, image) ? 'red' : 'green'); // if obstacle -> red else -> green
}, 1000 / 30);
});
So the million dollar question: Did it work? Well, it partially did. Definitely, not the best object detector, but was good enough for the demo. Afterall, this is all just a hack!
Complete source here.