//Student Name : Zhensheng Tan
//Student Number: 19214699
/*[AssignmentComment]
This page resolves the doodle issue and also brought in the Convolutional Neural Network (CNN), compared with the current Simple Neural Network (NN)
*/
// Port of Character recognition neural network from here:
// https://github.com/CodingTrain/Toy-Neural-Network-JS/tree/master/examples/mnist
// with many modifications
// --- defined by MNIST - do not change these ---------------------------------------
const PIXELS = 28; // images in data set are tiny
const PIXELSSQUARED = PIXELS * PIXELS;
/*[AssignmentComment]
This is for pre processing doodle, fixed value, so from a square point of view:
PIXELS = PIXELS_PADDING_LEN + PIXELS_CENTER_SQ_LEN + PIXELS_PADDING_LEN -> 28 = 4 + 20 + 4.
See documentation for details explaining it, mainly to place the digit in a 20 * 20 box within 28 * 28 box
*/
const PIXELS_CENTER_SQ_LEN = 20;
const PIXELS_PADDING_LEN = 4;
// number of training and test exemplars in the data set:
const NOTRAIN = 60000;
const NOTEST = 10000;
//--- can modify all these --------------------------------------------------
// no of nodes in network
const noinput = PIXELSSQUARED;
const nohidden = 64;
const nooutput = 10;
const learningrate = 0.1; // default 0.1
// should we train every timestep or not
let do_training = true;
// how many to train and test per timestep [Assignment Comment] : Doing NN and CNN at the same time is slow on small spec machines as I tested
const TRAINPERSTEP = 18; // orginally 30, here a smaller number in case it's too slow for small spec machine to run
const TESTPERSTEP = 3; // orginally 5, here a smaller number in case it's too slow for small spec machine to run
// multiply it by this to magnify for display
const ZOOMFACTOR = 7;
const ZOOMPIXELS = ZOOMFACTOR * PIXELS;
// 3 rows of
// large image + 50 gap + small image
// 50 gap between rows
const canvaswidth = ( PIXELS + ZOOMPIXELS ) + 50;
const canvasheight = ( ZOOMPIXELS * 3 ) + 100;
const DOODLE_THICK = 18; // thickness of doodle lines //18
const DOODLE_BLUR = 3; // blur factor applied to doodles //3
let mnist;
// all data is loaded into this
// mnist.train_images
// mnist.train_labels
// mnist.test_images
// mnist.test_labels
let nn;
/*
[AssignmentComment]
cnn_model represents the overall CNN
cnn_trainer represents the trainer only responsible for training
*/
let cnn_model;
let cnn_trainer;
let trainrun = 1;
let train_index = 0;
let testrun = 1;
let test_index = 0;
let total_tests = 0;
let total_correct = 0;
let cnn_total_correct = 0;
let train_time = 0;
let cnn_train_time = 0;
let test_time = 0;
let cnn_test_time = 0;
// images in LHS:
let doodle, demo;
let doodle_exists = false;
let demo_exists = false;
let mousedrag = false; // are we in the middle of a mouse drag drawing?
// save inputs to global var to inspect
// type these names in console
var train_inputs, test_inputs, demo_inputs, doodle_inputs;
// Matrix.randomize() is changed to point to this. Must be defined by user of Matrix.
function randomWeight()
{
return ( AB.randomFloatAtoB ( -0.5, 0.5 ) );
// Coding Train default is -1 to 1
}
// CSS trick
// make run header bigger
$("#runheaderbox").css ( { "max-height": "95vh" } );
//--- start of AB.msgs structure: ---------------------------------------------------------
// We output a serious of AB.msgs to put data at various places in the run header
var thehtml;
// 1 Doodle header
thehtml = "<hr> <h3> 1. Doodle </h3> Top row: Doodle (left) and shrunk (right). <br> " +
" Draw your doodle in top LHS. <button onclick='wipeDoodle();' class='normbutton' >Clear doodle</button> <br> ";
AB.msg ( thehtml, 1 );
// 2 Doodle variable data (guess)
// 3 Training header
thehtml = "<hr> <h3> 2. Training </h3> Middle row: Training image magnified (left) and original (right). " +
" <button onclick='do_training = false;' class='normbutton' >Stop training</button> <br> ";
AB.msg ( thehtml, 3 );
// 4 variable training data
// 5 Testing header
thehtml = "<h4> Hidden tests </h4> " ;
AB.msg ( thehtml, 5 );
// 6 variable testing data
// 7 Demo header
thehtml = "<hr> <h3> 3. Demo </h3> Bottom row: Test image magnified (left) and original (right). <br>" +
" The network is <i>not</i> trained on any of these images. <br> " +
" <button onclick='makeDemo();' class='normbutton' >Demo test image</button> <br> ";
AB.msg ( thehtml, 7 );
// 8 Demo variable data (random demo ID)
// 9 Demo variable data (changing guess)
const greenspan = "<span style='font-weight:bold; font-size:x-large; color:darkgreen'> " ;
//--- end of AB.msgs structure: ---------------------------------------------------------
function setup()
{
createCanvas ( canvaswidth, canvasheight );
doodle = createGraphics ( ZOOMPIXELS, ZOOMPIXELS ); // doodle on larger canvas
doodle.pixelDensity(1);
// JS load other JS
// maybe have a loading screen while loading the JS and the data set
AB.loadingScreen();
$.getScript ( "/uploads/codingtrain/matrix.js", function()
{
$.getScript ( "/uploads/codingtrain/nn.js", function()
{
$.getScript ( "/uploads/codingtrain/mnist.js", function()
{
console.log ("All JS loaded");
nn = new NeuralNetwork( noinput, nohidden, nooutput );
nn.setLearningRate ( learningrate );
loadData();
});
});
});
/*[Assignment Comment]
This credit to the library: https://cs.stanford.edu/people/karpathy/convnetjs/docs.html
The documentation of the library has no step by step guide to actual integration(so trial and error),
it requires you to understand all the code in its concrete example:
https://cs.stanford.edu/people/karpathy/convnetjs/demo/mnist.html.
More parameter details in my word report, they are perfect as default start as in the example.
*/
$.getScript ( "/uploads/zhensheng/convnet-min.js", function()
{
layer_defs = [];
layer_defs.push({type:'input', out_sx:28, out_sy:28, out_depth:1});
layer_defs.push({type:'conv', sx:5, filters:8, stride:1, pad:2, activation:'relu'});
layer_defs.push({type:'pool', sx:2, stride:2});
layer_defs.push({type:'conv', sx:5, filters:16, stride:1, pad:2, activation:'relu'});
layer_defs.push({type:'pool', sx:3, stride:3});
layer_defs.push({type:'softmax', num_classes:10});
cnn_model = new convnetjs.Net();
cnn_model.makeLayers(layer_defs);
cnn_trainer = new convnetjs.SGDTrainer(cnn_model, {method:'adadelta', batch_size:20, l2_decay:0.001});
});
}
// load data set from local file (on this server)
function loadData()
{
loadMNIST ( function(data)
{
mnist = data;
console.log ("All data loaded into mnist object:")
console.log(mnist);
AB.removeLoading(); // if no loading screen exists, this does nothing
});
}
function getImage ( img ) // make a P5 image object from a raw data array
{
let theimage = createImage (PIXELS, PIXELS); // make blank image, then populate it
theimage.loadPixels();
for (let i = 0; i < PIXELSSQUARED ; i++)
{
let bright = img[i];
let index = i * 4;
theimage.pixels[index + 0] = bright;
theimage.pixels[index + 1] = bright;
theimage.pixels[index + 2] = bright;
theimage.pixels[index + 3] = 255;
}
theimage.updatePixels();
return theimage;
}
/* [Assignment Comment]
Return normalized array
*/
function getInputs (img) // convert img array into normalised input array
{
let inputs = [];
for (let i = 0; i < PIXELSSQUARED ; i++)
{
let bright = img[i];
inputs[i] = bright / 255; // normalise to 0 to 1
}
return ( inputs );
}
/* [Assignment Comment]
Return CNN library specialized Vol object, heart of the ConvNetJS, assign all necessary fields
*/
function getCnnInputs (normalInput)
{
var vol = new convnetjs.Vol(PIXELS,PIXELS,1, 0.0);
for(var i=0 ; i < PIXELSSQUARED ; i++)
{
vol.w[i] = normalInput[i];
}
return vol;
}
function trainit (show) // train the network with a single exemplar, from global var "train_index", show visual on or off
{
let img = mnist.train_images[train_index];
let label = mnist.train_labels[train_index];
// optional - show visual of the image
if (show)
{
var theimage = getImage ( img ); // get image from data array
image ( theimage, 0, ZOOMPIXELS+50, ZOOMPIXELS, ZOOMPIXELS ); // magnified
image ( theimage, ZOOMPIXELS+50, ZOOMPIXELS+50, PIXELS, PIXELS ); // original
}
// set up the inputs
let inputs = getInputs ( img ); // get inputs from data array
/* [Assignment Comment]
Transform into Vol class for ConvNetJS
*/
let cnnInputs = getCnnInputs(inputs);
// set up the outputs
let targets = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
targets[label] = 1; // change one output location to 1, the rest stay at 0
train_inputs = inputs; // can inspect in console
let train_time_start = new Date().getTime();
nn.train ( inputs, targets );
train_time += (new Date().getTime() - train_time_start);
/* [Assignment Comment]
ConvNetJS train it with the Vol class and label directly, different from the original NN
*/
let cnn_train_time_start = new Date().getTime();
cnn_trainer.train(cnnInputs,label);
cnn_train_time += (new Date().getTime() - cnn_train_time_start);
thehtml = " Trainrun: " + trainrun + " No: " + train_index ;
AB.msg ( thehtml, 4 );
train_index++;
if ( train_index == NOTRAIN )
{
train_index = 0;
console.log( "finished trainrun: " + trainrun );
trainrun++;
}
}
function testit() // test the network with a single exemplar, from global var "test_index"
{
let img = mnist.test_images[test_index];
let label = mnist.test_labels[test_index];
// set up the inputs
let inputs = getInputs ( img );
let cnnInputs = getCnnInputs(inputs);
test_inputs = inputs; // can inspect in console
let test_time_start = new Date().getTime();
let prediction = nn.predict(inputs); // array of outputs
test_time += (new Date().getTime() - test_time_start);
let guess = findMax(prediction); // the top output
/* [Assignment Comment]
ConvNetJS predicts it with the Vol class again, and the return value is inside the .w field
*/
let cnn_test_time_start = new Date().getTime();
let cnnPrediction = cnn_model.forward(cnnInputs);
cnn_test_time += (new Date().getTime() - cnn_test_time_start);
let cnnGuess = findMax(cnnPrediction.w);
total_tests++;
if (guess == label) total_correct++;
/* [Assignment Comment]
Calcuate CNN stats as well here
*/
if (cnnGuess == label) cnn_total_correct++;
let percent = (total_correct / total_tests) * 100 ;
let cnnPercent = (cnn_total_correct / total_tests) * 100 ;
thehtml = " Testrun: " + testrun + " No: " + total_tests + " <br> " +
" NN Correct: " + total_correct + " " +
" CNN Correct: " + cnn_total_correct + "<br>" +
" NN Train Time(ms): " + train_time + " " +
" CNN Train Time(ms): " + cnn_train_time + "<br>" +
" NN Predict Time(ms): " + test_time + " " +
" CNN Predict Time(ms): " + cnn_test_time + "<br>" +
" NN Score(%): " + greenspan + percent.toFixed(2) + " </span> " +
" CNN Score(%): " + greenspan + cnnPercent.toFixed(2) + "</span>";
AB.msg ( thehtml, 6 );
test_index++;
if ( test_index == NOTEST )
{
console.log( "finished testrun: " + testrun + " score: " + percent.toFixed(2) + " cnn_core:" + cnnPercent.toFixed(2) );
testrun++;
test_index = 0;
total_tests = 0;
total_correct = 0;
cnn_total_correct = 0;
}
}
//--- find no.1 (and maybe no.2) output nodes ---------------------------------------
// (restriction) assumes array values start at 0 (which is true for output nodes)
function find12 (a) // return array showing indexes of no.1 and no.2 values in array
{
let no1 = 0;
let no2 = 0;
let no1value = 0;
let no2value = 0;
for (let i = 0; i < a.length; i++)
{
if (a[i] > no1value)
{
no1 = i;
no1value = a[i];
}
else if (a[i] > no2value)
{
no2 = i;
no2value = a[i];
}
}
var b = [ no1, no2 ];
return b;
}
// just get the maximum - separate function for speed - done many times
// find our guess - the max of the output nodes array
function findMax (a)
{
let no1 = 0;
let no1value = 0;
for (let i = 0; i < a.length; i++)
{
if (a[i] > no1value)
{
no1 = i;
no1value = a[i];
}
}
return no1;
}
// --- the draw function -------------------------------------------------------------
// every step:
function draw()
{
// check if libraries and data loaded yet:
if ( typeof mnist == 'undefined' ) return;
background ('#ebc634');
fill(255, 204, 0);
rect(ZOOMPIXELS + 2, 0, 2,canvasheight);
rect(0, ZOOMPIXELS + 2, ZOOMPIXELS + 2,2);
rect(0, ZOOMPIXELS *2 + 80, ZOOMPIXELS + 2,2);
if ( do_training )
{
// do some training per step
for (let i = 0; i < TRAINPERSTEP; i++)
{
if (i == 0) trainit(true); // show only one per step - still flashes by
else trainit(false);
}
// do some testing per step
for (let i = 0; i < TESTPERSTEP; i++)
testit();
}
// keep drawing demo and doodle images
// and keep guessing - we will update our guess as time goes on
if ( demo_exists )
{
drawDemo();
guessDemo();
}
if ( doodle_exists )
{
drawDoodle();
guessDoodle();
}
// detect doodle drawing
// (restriction) the following assumes doodle starts at 0,0
if ( mouseIsPressed ) // gets called when we click buttons, as well as if in doodle corner
{
// console.log ( mouseX + " " + mouseY + " " + pmouseX + " " + pmouseY );
var MAX = ZOOMPIXELS + 20; // can draw up to this pixels in corner
if ( (mouseX < MAX) && (mouseY < MAX) && (pmouseX < MAX) && (pmouseY < MAX) )
{
mousedrag = true; // start a mouse drag
doodle_exists = true;
doodle.stroke('white');
doodle.strokeWeight( DOODLE_THICK );
doodle.line(mouseX, mouseY, pmouseX, pmouseY);
}
}
else
{
// are we exiting a drawing
if ( mousedrag )
{
mousedrag = false;
// console.log ("Exiting draw. Now blurring.");
doodle.filter (BLUR, DOODLE_BLUR); // just blur once
// console.log (doodle);
}
}
}
//--- demo -------------------------------------------------------------
// demo some test image and predict it
// get it from test set so have not used it in training
function makeDemo()
{
demo_exists = true;
var i = AB.randomIntAtoB ( 0, NOTEST - 1 );
demo = mnist.test_images[i];
var label = mnist.test_labels[i];
thehtml = "Test image no: " + i + "<br>" +
"Classification: " + label + "<br>" ;
AB.msg ( thehtml, 8 );
// type "demo" in console to see raw data
}
function drawDemo()
{
var theimage = getImage ( demo );
// console.log (theimage);
image ( theimage, 0, canvasheight - ZOOMPIXELS, ZOOMPIXELS, ZOOMPIXELS ); // magnified
image ( theimage, ZOOMPIXELS+50, canvasheight - ZOOMPIXELS, PIXELS, PIXELS ); // original
}
function guessDemo()
{
let inputs = getInputs ( demo );
let cnnInputs = getCnnInputs(inputs);
demo_inputs = inputs; // can inspect in console
let prediction = nn.predict(inputs); // array of outputs
let guess = findMax(prediction); // the top output
/* [Assignment Comment]
ConvNetJS predicts it with the Vol class again, and the return value is inside the .w field
*/
let cnnPrediction = cnn_model.forward(cnnInputs);
let cnnGuess = findMax(cnnPrediction.w);
thehtml = " NN classify it as: " + greenspan + guess + "</span> <br> CNN classify it as: " + greenspan + cnnGuess + "</span> <br>";
AB.msg ( thehtml, 9 );
}
//--- doodle -------------------------------------------------------------
function drawDoodle()
{
// doodle is createGraphics not createImage
let theimage = doodle.get();
// console.log (theimage);
image ( theimage, 0, 0, ZOOMPIXELS, ZOOMPIXELS ); // original
image ( theimage, ZOOMPIXELS+50, 0, PIXELS, PIXELS ); // shrunk
//TODO draw after process image
}
/* [Assignment Comment]
Useful tool for visualization of the pixels of a P5 image
*/
function printImg(img)
{
let output = [];
for (let i = 0; i < img.width*img.height ; i++)
{
output[i] = img.pixels[i * 4];
}
showInputs(output,img.width);
}
/* [Assignment Comment]
Main procedure for pre process the newly written doodle image, explain in following steps section
*/
function preProcessDoodleImage(beforeImg)
{
// step 0: transform into one dimension array 28 * 28 length only with bright values
let processedInput = [];
for (let i = 0; i < PIXELSSQUARED ; i++)
{
processedInput[i] = beforeImg.pixels[i * 4] ;
}
/* x is index from left to right(row), y is index from top to bottom(row), same for following code for consistency
0 1 2 .. x ------------->
1
2
.
.
y
*/
// step 1: crop the image, prepare for resizing later.
// 1.1 calculate the left most x coordinate, right most x coordinate for the number
let leftMost = Number.MAX_VALUE, rightMost = Number.MIN_VALUE;
for(let y = 0; y < PIXELS; y++)
{
let leftP = Number.MAX_VALUE, rightP = Number.MIN_VALUE;
for(let x = 0; x < PIXELS; x++)
{
let index = y * PIXELS + x;
if(processedInput[index] > 0)
{
rightP = x;
if (leftP == Number.MAX_VALUE) leftP = x;
}
}
leftMost = Math.min(leftMost, leftP);
rightMost = Math.max(rightMost, rightP);
}
// 1.2 calculate the up most y coordinate, down most y coordinate for the number
let upMost = Number.MAX_VALUE, downMost = Number.MIN_VALUE;
for(let x = 0; x < PIXELS; x++)
{
let upP = Number.MAX_VALUE, downP = Number.MIN_VALUE;
for(let y = 0; y < PIXELS; y++)
{
let index = y * PIXELS + x;
if(processedInput[index] > 0)
{
downP = y;
if (upP == Number.MAX_VALUE) upP = y;
}
}
upMost = Math.min(upMost, upP);
downMost = Math.max(downMost, downP);
}
// 1.3 calculate the actual size of the crop image
let actualWidth = rightMost - leftMost + 1;
let actualHeight = downMost - upMost + 1;
// 1.4 create a new image, delegate P5 to do the resize algorithm
let cropImg = createImage(actualWidth, actualHeight);
cropImg.loadPixels();
for (let y = 0; y < actualHeight ; y++)
{
for (let x = 0; x < actualWidth; x++)
{
let c = processedInput[(y + upMost) * PIXELS + (x + leftMost)];
cropImg.set(x, y, color(c,c,c));
}
}
cropImg.updatePixels();
//printImg(cropImg);
// step 2: resize the crop image into its one of (width | height) at least 20 pixels,
// and the other (height | width) scale proportionally
if(actualWidth >= actualHeight)
{
cropImg.resize(PIXELS_CENTER_SQ_LEN,0); // 0 in resize here means scale height propertionally when scale width to 20 pixels
}
else
{
cropImg.resize(0,PIXELS_CENTER_SQ_LEN); // 0 in resize here means scale width propertionally when scale height to 20 pixels
}
cropImg.loadPixels();
//printImg(cropImg);
// step 3: calculate the mass of center for x coordinate and y coordinate
let greyIntensitySum = 0;
let xSum = 0;
let ySum = 0;
for (let y = 0; y < cropImg.height ; y++)
{
for (let x = 0; x < cropImg.width; x++)
{
let greyIntensity = cropImg.pixels[( y * cropImg.width + x ) * 4];
greyIntensitySum += greyIntensity;
xSum += (x + 1) * greyIntensity;
ySum += (y + 1) * greyIntensity;
}
}
/* 3.1:
formula for x coordinates mass center: sum (x * grey_scale) / sum (grey_scale)
formula for y coordinates mass center: sum (y * grey_scale) / sum (grey_scale)
*/
let xMassCenter = greyIntensitySum == 0 ? 0 : Math.round(xSum / greyIntensitySum);
let yMassCenter = greyIntensitySum == 0 ? 0 : Math.round(ySum / greyIntensitySum);
// step 4: create a new image for shifting the image according to mass center
// 4.1: calculate the left most x coordinate and up most y coordinate
let upMostFinal = PIXELS_PADDING_LEN + (PIXELS_CENTER_SQ_LEN/2 - yMassCenter);
let leftMostFinal = PIXELS_PADDING_LEN + (PIXELS_CENTER_SQ_LEN/2 - xMassCenter);
shiftImg = createImage(PIXELS, PIXELS);
shiftImg.loadPixels();
for (let y = 0; y < PIXELS ; y++)
{
for (let x = 0; x < PIXELS; x++)
{
// the coordinates has to be inside the (28 - 4 - 4) * (28 - 4 - 4) rectangle
if(y >= PIXELS_PADDING_LEN && x >= PIXELS_PADDING_LEN && y < PIXELS - PIXELS_PADDING_LEN && x < PIXELS - PIXELS_PADDING_LEN)
{
// the coordinates has to be inside the cropped and resized rectangle from its most top left point
if( x >= leftMostFinal && y >= upMostFinal && x <= (leftMostFinal + cropImg.width -1) && y <= (upMostFinal + cropImg.height -1))
{
//imagine we only need the overlap of the two rectangle, see the word report for more details
//the logic here is just for easy code reading, in real world, we could optimize the boolean conditions above
let c = cropImg.pixels[(cropImg.width * ( y - upMostFinal) + (x - leftMostFinal)) * 4];
shiftImg.set(x, y, color(c,c,c));
}
else
{
shiftImg.set(x, y, color(0,0,0));
}
}
else
{
shiftImg.set(x, y, color(0,0,0));
}
}
}
shiftImg.updatePixels();
return shiftImg;
}
function guessDoodle()
{
// doodle is createGraphics not createImage
let img = doodle.get();
img.resize ( PIXELS, PIXELS );
img.loadPixels();
// set up inputs
let processedImg = preProcessDoodleImage(img);
let processedInput = [];
for (let i = 0; i < PIXELSSQUARED ; i++)
{
processedInput[i] = processedImg.pixels[i * 4] /255 ;
}
doodle_inputs = processedInput; // can inspect in console
let cnnInputs = getCnnInputs(processedInput);
// feed forward to make prediction
let prediction = nn.predict(processedInput); // array of outputs
let guess = find12(prediction); // get no.1 and no.2 guesses
/* [Assignment Comment]
ConvNetJS predicts it with the Vol class again, and the return value is inside the .w field,
and .w contains all the probability of digits, hence find the first and second place
*/
let cnnPrediction = cnn_model.forward(cnnInputs);
let cnnGuess = find12(cnnPrediction.w);
thehtml = " NN classify it as: " + greenspan + guess[0] + "</span> " +
" No.2 guess is: " + greenspan + guess[1] + "</span> <br>" +
" CNN classify it as: " + greenspan + cnnGuess[0] + "</span> " +
" No.2 guess is: " + greenspan + cnnGuess[1] + "</span>";
AB.msg ( thehtml, 2 );
}
function wipeDoodle()
{
doodle_exists = false;
doodle.background('black');
}
/* [Assignment Comment]
Enhance debug tool for padding all the bright value with same length,
eg. "005.00" "255.00", all "000.00", will be replaced by "------",
so every digit is length of 6.
*/
function showInputs ( inputs, imgWidth)
// display inputs row by row, corresponding to square of pixels
{
var str = "";
for (let i = 0; i < inputs.length; i++)
{
if ( i % imgWidth == 0 ) str = str + "\n"; // new line for each row of pixels
var value = inputs[i];
var num = "" + value.toFixed(2);
if(num.length < 6) num = new Array(6 - num.length + 1).join('0') + num;
if(num == "000.00") str = str + " ------";
else str = str + " " + num;
}
console.log (str);
}