Code viewer for World: TF- CNN Character recognit...



// --- defined by MNIST - do not change these ---------------------------------------

const PIXELS        = 28;                       // images in data set are tiny 
const PIXELSSQUARED = PIXELS * PIXELS;

// number of training and test exemplars in the data set:
const NOTRAIN = 124800;
const NOTEST  = 20800;


//--- can modify all these --------------------------------------------------

// no of nodes in network 
const noinput  = PIXELSSQUARED;

const learningRate = 0.1;

// should we train every timestep or not 
let do_training = true;

// how many to train and test per timestep 
const TRAINPERSTEP = 800;
const TESTPERSTEP  = 1;

// multiply it by this to magnify for display 
const ZOOMFACTOR    = 7;                        
const ZOOMPIXELS    = ZOOMFACTOR * PIXELS; 

// 3 rows of
// large image + 50 gap + small image    
// 50 gap between rows 

const canvaswidth = ( PIXELS + ZOOMPIXELS ) + 50;
const canvasheight = ( ZOOMPIXELS * 3 ) + 100;


const DOODLE_THICK = 18;    // thickness of doodle lines 
const DOODLE_BLUR = 3;      // blur factor applied to doodles 

const numEpochs = 3;
     
let mnist, trainingImages, trainingLabels, testImages, testLabels, encodedTestLabels, encodedTrainingLabels, networkModel, normalizeFactor;
let index, trainingInProgress = false, testingInProgress = false;
// all data is loaded into this 
// mnist.train_images
// mnist.train_labels
// mnist.test_images
// mnist.test_labels


let nn;

let trainrun = 1;
let train_index = 0;

let testrun = 1;
let test_index = 0;
let total_tests = 0;
let total_correct = 0;

// images in LHS:
let doodle, demo;
let doodle_exists = false;
let demo_exists = false;

let mousedrag = false;      // are we in the middle of a mouse drag drawing?  


// save inputs to global var to inspect
// type these names in console 
var train_inputs, test_inputs, demo_inputs, doodle_inputs;
   
// make run header bigger
AB.headerCSS ( { "max-height": "95vh" } );




//--- start of AB.msgs structure: ---------------------------------------------------------
// We output a serious of AB.msgs to put data at various places in the run header 
var thehtml;

  // 1 Doodle header 
  thehtml = "<hr> <h1> 1. Doodle </h1> Top row: Doodle (left) and shrunk (right). <br> " +
        " Draw your doodle in top LHS. <button onclick='wipeDoodle();' class='normbutton' >Clear doodle</button> <br> ";
   AB.msg ( thehtml, 1 );

  // 2 Doodle variable data (guess)
  
  // 3 Training header
  thehtml = "<hr> <h1> 2. Training </h1> Middle row: Training image magnified (left) and original (right). <br>  " +
        " <button onclick='do_training = false;' class='normbutton' >Stop training</button> <br> ";
  AB.msg ( thehtml, 3 );
     
  // 4 variable training data 
  
  // 5 Testing header
  thehtml = "<h3> Hidden tests </h3> " ;
  AB.msg ( thehtml, 5 );
           
  // 6 variable testing data 
  
  // 7 Demo header 
  thehtml = "<hr> <h1> 3. Demo </h1> Bottom row: Test image magnified (left) and  original (right). <br>" +
        " The network is <i>not</i> trained on any of these images. <br> " +
        " <button onclick='makeDemo();' class='normbutton' >Demo test image</button> <br> ";
   AB.msg ( thehtml, 7 );
   
  // 8 Demo variable data (random demo ID)
  // 9 Demo variable data (changing guess)
  
const greenspan = "<span style='font-weight:bold; font-size:x-large; color:darkgreen'> "  ;

//--- end of AB.msgs structure: ---------------------------------------------------------


function setup() 
{
  createCanvas ( canvaswidth, canvasheight );

  doodle = createGraphics ( ZOOMPIXELS, ZOOMPIXELS );       // doodle on larger canvas 
  doodle.pixelDensity(1);
  
// JS load other JS 
// maybe have a loading screen while loading the JS and the data set 

      AB.loadingScreen();
 
  $.getScript("https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@2.0.0/dist/tf.min.js", function() {
      $.getScript ( "/uploads/aparna/mnist-az.js", function()
      {
        console.log ("All JS loaded");
        loadData();
      });
  });
}

// load data set from local file (on this server)
/***
 * By Aparna
 * Configure CNN
 */
function loadData()    
{
  loadMNIST ( function(data)    
  {
    mnist = data;
    console.log ("All data loaded into mnist object:")
    console.log(mnist);
    // CODE CHANGE BY APARNA
    // Use Tensorflow framework for setting up a CNN.
    // In the first layer of our convolutional neural network we have 
    // to specify the input shape. Then we specify some parameters for 
    // the convolution operation that takes place in this layer. 
    // The MaxPooling layer acts as a sort of downsampling using max values
    // in a region instead of averaging. 
    // Repeat another conv2d + maxPooling stack. 
    // Note that we have more filters in the convolution.
    // Then we flatten the output from the 2D filters into a 1D vector to prepare
    // it for input into our last layer. This is common practice when feeding
    // higher dimensional data to a final classification output layer.

    networkModel = tf.sequential();
    networkModel.add(tf.layers.conv2d({activation: "relu", filters: 8, inputShape: [PIXELS, PIXELS, 1], kernelInitializer: "varianceScaling", kernelSize: 5, strides: 1}));
    networkModel.add(tf.layers.maxPooling2d({poolSize: [2, 2], strides: [2, 2]}));
    networkModel.add(tf.layers.conv2d({activation: "relu", filters: 16, kernelInitializer: "varianceScaling", kernelSize: 5, strides: 1}));
    networkModel.add(tf.layers.maxPooling2d({poolSize: [2, 2], strides: [2, 2]}));
    networkModel.add(tf.layers.flatten());
    networkModel.add(tf.layers.dense({activation: "softmax", kernelInitializer: "varianceScaling", units: 26}));
    // Configure the learning rate with Stochastic Gradient Descent.
    networkModel.compile({loss: "categoricalCrossentropy", optimizer: tf.train.sgd(learningRate)});
    // A scalar tensor with value 255, for normalizing input data.
    normalizeFactor = tf.scalar(255);

    console.log("loading completed");
    AB.removeLoading();     // if no loading screen exists, this does nothing 
  });
}

/**
 * This function perform training as a batch.
 * BY APARNA
 */
function trainBatch()
{
  // Fetch a batch of data.
  [trainImagesBatch, trainLabelsBatch, train_index] = nextBatch(mnist.train_images, mnist.train_labels, TRAINPERSTEP, train_index);
  // Convert data to tensor, and normalize data by dividing with 255.
  trainingImages = tf.reshape(tf.tensor(trainImagesBatch), [TRAINPERSTEP, 28, 28, 1]).div(normalizeFactor);
  trainingLabels = tf.tensor1d(trainLabelsBatch, "int32"); 
  // Generate one-hot encoding for labels.
  encodedTrainingLabels = tf.oneHot(trainingLabels, 26);
  // Create callback to peform any action during training, if required.
  let t = {callbacks: {onBatchEnd: (e, o) => { 
  }, onEpochBegin: (e, t) => {
    currentEpoch = e + 1;
  }}, batchSize: TRAINPERSTEP, epochs: numEpochs, shuffle: true};
  trainingInProgress = true;  
  // Train the model.
  networkModel.fit(trainingImages, encodedTrainingLabels, t).then(() => {
    trainingInProgress = false;
  });
  thehtml = " trainrun: " + trainrun + "<br> no: " + train_index ;
  AB.msg ( thehtml, 4 );

  // Upon reaching the final batch of the dataset, restart from the first batch.
  if ( train_index == NOTRAIN - TRAINPERSTEP) 
  {
    train_index = 0;
    console.log( "finished trainrun: " + trainrun );
    trainrun++;
  }
}

/**
 * BY APARNA
 * Slice a batch of images and corresponding labels from the input data.
 * Input data is labelled from 1 to 26, so here each label is converted to 0 to 26 by subtracting 1.
 * So in the output of the neural network, 0th index corresponds to 'A' and 25th index corresponds to 'Z'.
 */
function nextBatch(images, labels, batchSize, index) {
  index = index + batchSize;
  return [images.slice(index, index + batchSize), labels.slice(index, index + batchSize).map(i => i - 1), index]
}

function getImage ( img )      // make a P5 image object from a raw data array   
{
    let theimage  = createImage (PIXELS, PIXELS);    // make blank image, then populate it 
    theimage.loadPixels();        
    
    for (let i = 0; i < PIXELSSQUARED ; i++) 
    {
        let bright = img[i];
        let index = i * 4;
        theimage.pixels[index + 0] = bright;
        theimage.pixels[index + 1] = bright;
        theimage.pixels[index + 2] = bright;
        theimage.pixels[index + 3] = 255;
    }
    
    theimage.updatePixels();
    return theimage;
}


function getInputs ( img )      // convert img array into normalised input array 
{
    let inputs = [];
    for (let i = 0; i < PIXELSSQUARED ; i++)          
    {
        let bright = img[i];
        inputs[i] = bright / 255;       // normalise to 0 to 1
    } 
    return ( inputs );
} 

/***
 * By APARNA
 * Randomly select a test data, predict its value using the existing model.
 */
function testit()    // test the network with a single exemplar, from global var "test_index"
{ 
  test_index = AB.randomIntAtoB(0, NOTEST - 1);
  let img   = mnist.test_images[test_index];
  let label = mnist.test_labels[test_index];

  // Normalize data by dividing it with 255
  let testImage = tf.reshape(tf.tensor(img, [1, PIXELSSQUARED]), [1, PIXELS, PIXELS, 1]).div(normalizeFactor);
  let prediction = networkModel.predict(testImage).arraySync()[0];
  let guess = prediction.indexOf(Math.max(...prediction));

  total_tests++;
  if (guess + 1 == label)  total_correct++;

  let percent = (total_correct / total_tests) * 100 ;
  
  thehtml =  " testrun: " + testrun + "<br> no: " + total_tests + " <br> " +
        " correct: " + total_correct + "<br>" +
        "  score: " + greenspan + percent.toFixed(2) + "</span>";
  AB.msg ( thehtml, 6 );

  
  test_index++;
  if ( total_tests == NOTEST ) 
  {
    console.log( "finished testrun: " + testrun + " score: " + percent.toFixed(2) );
    total_tests = 0;
    total_correct = 0;
  }
}




//--- find no.1 (and maybe no.2) output nodes ---------------------------------------
// (restriction) assumes array values start at 0 (which is true for output nodes) 


function find12 (a)         // return array showing indexes of no.1 and no.2 values in array 
{
  let no1 = 0;
  let no2 = 0;
  let no1value = 0;     
  let no2value = 0;
  
  for (let i = 0; i < a.length; i++) 
  {
    if (a[i] > no1value)   // new no1
    {
      // old no1 becomes no2
      no2 = no1;
      no2value = no1value;
      // now put in the new no1
      no1 = i;
      no1value = a[i];
    }
    else if (a[i] > no2value)  // new no2 
    {
      no2 = i;
      no2value = a[i];
    }
  }
  
  var b = [ no1, no2 ];
  return b;
}



// just get the maximum - separate function for speed - done many times 
// find our guess - the max of the output nodes array

function findMax (a)        
{
  let no1 = 0;
  let no1value = 0;     
  
  for (let i = 0; i < a.length; i++) 
  {
    if (a[i] > no1value) 
    {
      no1 = i;
      no1value = a[i];
    }
  }
  
  return no1;
}




// --- the draw function -------------------------------------------------------------
// every step:
/***
 * By APARNA
 * The variable 'trainingInProgress' ensures that training doesn't start 
 * if last batch's training is still going on. Otherwise, CNN would throw error.
 */ 
function draw() 
{
    // check if libraries and data loaded yet:
    if ( typeof mnist == 'undefined' ) return;

      background ('black');
    
    if ( do_training )    
    {

        if (trainingInProgress == false)
        {
            trainBatch()
        }

        // do some testing per step 
        if(testingInProgress == false)
        {
            testingInProgress = true;
            for (let i = 0; i < TESTPERSTEP; i++) 
                testit();
            testingInProgress = false;
        }    
    }

  // keep drawing demo and doodle images 
  // and keep guessing - we will update our guess as time goes on 
  
  if ( demo_exists )
  {
    drawDemo();
    guessDemo();
  }
  if ( doodle_exists ) 
  {
    drawDoodle();
    guessDoodle();
  }


// detect doodle drawing 
// (restriction) the following assumes doodle starts at 0,0 

  if ( mouseIsPressed )         // gets called when we click buttons, as well as if in doodle corner  
  {
     // console.log ( mouseX + " " + mouseY + " " + pmouseX + " " + pmouseY );
     var MAX = ZOOMPIXELS + 20;     // can draw up to this pixels in corner 
     if ( (mouseX < MAX) && (mouseY < MAX) && (pmouseX < MAX) && (pmouseY < MAX) )
     {
        mousedrag = true;       // start a mouse drag 
        doodle_exists = true;
        doodle.stroke('white');
        doodle.strokeWeight( DOODLE_THICK );
        doodle.line(mouseX, mouseY, pmouseX, pmouseY);      
     }
  }
  else 
  {
      // are we exiting a drawing
      if ( mousedrag )
      {
            mousedrag = false;
            // console.log ("Exiting draw. Now blurring.");
            doodle.filter (BLUR, DOODLE_BLUR);    // just blur once 
            //   console.log (doodle);
      }
  }
}




//--- demo -------------------------------------------------------------
// demo some test image and predict it
// get it from test set so have not used it in training


function makeDemo()
{
    demo_exists = true;
    var  i = AB.randomIntAtoB ( 0, NOTEST - 1 );  
    
    demo        = mnist.test_images[i];     
    var label   = mnist.test_labels[i];
    
   thehtml =  "Test image no: " + i + "<br>" + 
            "Classification: " + String.fromCharCode(96 + label) + "<br>" ;
   AB.msg ( thehtml, 8 );
   
   // type "demo" in console to see raw data 
}


function drawDemo()
{
    var theimage = getImage ( demo );
     //  console.log (theimage);
     
    image ( theimage,   0,                canvasheight - ZOOMPIXELS,    ZOOMPIXELS,     ZOOMPIXELS  );      // magnified 
    image ( theimage,   ZOOMPIXELS+50,    canvasheight - ZOOMPIXELS,    PIXELS,         PIXELS      );      // original
}


function guessDemo()
{
  // Predict the demo image BY APARNA 
  let demoImage = tf.reshape(tf.tensor(demo, [1, PIXELSSQUARED]), [1, PIXELS, PIXELS, 1]).div(normalizeFactor);
  let prediction = networkModel.predict(demoImage).arraySync()[0];
  let guess = prediction.indexOf(Math.max(...prediction));
  
  thehtml =   " We classify it as: " + greenspan + String.fromCharCode(97 + guess) + "</span>" ;
  AB.msg ( thehtml, 9 );
}




//--- doodle -------------------------------------------------------------

function drawDoodle()
{
    // doodle is createGraphics not createImage
    let theimage = doodle.get();
    // console.log (theimage);
    
    image ( theimage,   0,                0,    ZOOMPIXELS,     ZOOMPIXELS  );      // original 
    image ( theimage,   ZOOMPIXELS+50,    0,    PIXELS,         PIXELS      );      // shrunk
}
      
      
function guessDoodle() 
{
   // doodle is createGraphics not createImage
   let img = doodle.get();
  
  img.resize ( PIXELS, PIXELS );     
  img.loadPixels();

  // set up inputs   
  let inputs = [];
  for (let i = 0; i < PIXELSSQUARED ; i++) 
  {
     inputs[i] = img.pixels[i * 4];
  }
  
  // Predict the doodle image. BY APARNA
  let demoImage = tf.reshape(tf.tensor(inputs, [1, PIXELSSQUARED]), [1, PIXELS, PIXELS, 1]).div(normalizeFactor);
  let prediction = networkModel.predict(demoImage).arraySync()[0];

  let b             = find12(prediction);       // get no.1 and no.2 guesses  

  thehtml =   " We classify it as: " + greenspan + String.fromCharCode(97 + b[0]) + "</span> <br>" +
            " No.2 guess is: " + greenspan + String.fromCharCode(97 + b[1]) + "</span>";
  AB.msg ( thehtml, 2 );
}


function wipeDoodle()    
{
    doodle_exists = false;
    doodle.background('black');
}




// --- debugging --------------------------------------------------
// in console
// showInputs(demo_inputs);
// showInputs(doodle_inputs);


function showInputs ( inputs )
// display inputs row by row, corresponding to square of pixels 
{
    var str = "";
    for (let i = 0; i < inputs.length; i++) 
    {
      if ( i % PIXELS == 0 )    str = str + "\n";                                   // new line for each row of pixels 
      var value = inputs[i];
      str = str + " " + value.toFixed(2) ; 
    }
    console.log (str);
}