Code viewer for World: Speech to text "Hello World"
// OpenAI Whispers API Hello World Using Flask Reverse Proxy
// Cloned from the Chat with GPT model
// By Kevin James Tomescu - 21435066 - tomesck2 + Niamh Gowran - 21389501 - gowrann2

// Initializing variables
var apikey = "";
var mediaRecorder;
var audioChunks = [];

//HTML code for the page, including the API key input and the buttons to start and stop recording.
//The result of the API call is displayed in the div with id="apiResponse".
$("body").css({ margin: "20px", padding: "20px" });
document.write(`
  <h1>Interact with OpenAI Whispers API</h1>
  <div id="enterkey">
    <h3>Enter API key</h3>
    Enter API key: 
    <input style='width:25vw;' maxlength='2000' id="apikey" value=''>  
    <button onclick='setkey();' class='ab-normbutton'>Set API key</button>
  </div>
  <div>
    <button id="startRecord">Start Recording</button>
    <button id="stopRecord" disabled>Stop Recording</button>
  </div>
  <div id="whispersResult" style="margin-top: 20px;">
    <h3>Whispers API Response:</h3>
    <div id="apiResponse"></div>
  </div>
`);

// Set the API key and display a message
// credit to https://ancientbrain.com/viewjs.php?world=2850716357 for the starter code for this function
function setkey() {
  apikey = $("#apikey").val().trim();
  $("#enterkey").html("<b>API key has been set.</b>");
}

// Start recording audio from the microphone
// credit to https://developer.mozilla.org/en-US/docs/Web/API/MediaRecorder for mediaRecorder documentation and example
$("#startRecord").click(async () => {
  const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
  mediaRecorder = new MediaRecorder(stream);
  audioChunks = [];

  // Add audio stream
  mediaRecorder.ondataavailable = (event) => {
    audioChunks.push(event.data);
  };

  mediaRecorder.start();

  // prevent user from clicking start button again
  $("#startRecord").prop("disabled", true);
  $("#stopRecord").prop("disabled", false);
});

// Stop recording and process audio
// again credit to https://developer.mozilla.org/en-US/docs/Web/API/MediaRecorder
$("#stopRecord").click(() => {
  mediaRecorder.stop();
  $("#startRecord").prop("disabled", false);
  $("#stopRecord").prop("disabled", true);

  mediaRecorder.onstop = async () => {
    const audioBlob = new Blob(audioChunks, { type: "audio/wav" });
    sendAudioToWhispersAPI(audioBlob);
  };
});

// Send audio to Flask proxy server and display the result
// credit to https://platform.openai.com/docs/guides/speech-to-text for the API documentation
// credit to https://developer.mozilla.org/en-US/docs/Web/API/FileReader for the FileReader documentation
// credit to https://api.jquery.com/jquery.ajax/ for the jQuery AJAX documentation
// credit to https://developer.mozilla.org/en-US/docs/Glossary/Base64 for the base64 documentation
function sendAudioToWhispersAPI(audioBlob) {
  const reader = new FileReader();

  reader.onloadend = function () {
    const base64data = reader.result;

    // Prepare the request payload, including the model
    const payload = {
      api_endpoint: "https://api.openai.com/v1/audio/transcriptions", // OpenAI Transcriptions API endpoint
      api_key: apikey, // API key
      model: "whisper-1", // Model name
      data: base64data, // Audio data in base64 format, as required by the API
    };

    // send request and handle response
    $.ajax({
      type: "POST",
      url: "https://gowtom-proxy.azurewebsites.net/speech-to-text", // URL of our Flask server
      data: JSON.stringify(payload),
      contentType: "application/json",
      success: function (response) {
        $("#apiResponse").text(JSON.stringify(response, null, 2));
      },
      error: function (_, __, error) {
        $("#apiResponse").html(
          "<font color='red'><b>An error occurred: " + error + "</b></font>"
        );
      },
    });
  };

  reader.readAsDataURL(audioBlob); // Convert blob to base64
}

// Credits / references
// https://ancientbrain.com/viewjs.php?world=2850716357
// https://developer.mozilla.org/en-US/docs/Web/API/MediaRecorder
// https://platform.openai.com/docs/guides/speech-to-text
// https://developer.mozilla.org/en-US/docs/Web/API/FileReader
// https://api.jquery.com/jquery.ajax/
// https://developer.mozilla.org/en-US/docs/Glossary/Base64
// https://openai.com/research/whisper
// https://github.com/openai/whisper