// OpenAI Whispers API Hello World Using Flask Reverse Proxy
// Cloned from the Chat with GPT model
// By Kevin James Tomescu - 21435066 - tomesck2 + Niamh Gowran - 21389501 - gowrann2
// Initializing variables
var apikey = "";
var mediaRecorder;
var audioChunks = [];
//HTML code for the page, including the API key input and the buttons to start and stop recording.
//The result of the API call is displayed in the div with id="apiResponse".
$("body").css({ margin: "20px", padding: "20px" });
document.write(`
<h1>Interact with OpenAI Whispers API</h1>
<div id="enterkey">
<h3>Enter API key</h3>
Enter API key:
<input style='width:25vw;' maxlength='2000' id="apikey" value=''>
<button onclick='setkey();' class='ab-normbutton'>Set API key</button>
</div>
<div>
<button id="startRecord">Start Recording</button>
<button id="stopRecord" disabled>Stop Recording</button>
</div>
<div id="whispersResult" style="margin-top: 20px;">
<h3>Whispers API Response:</h3>
<div id="apiResponse"></div>
</div>
`);
// Set the API key and display a message
// credit to https://ancientbrain.com/viewjs.php?world=2850716357 for the starter code for this function
function setkey() {
apikey = $("#apikey").val().trim();
$("#enterkey").html("<b>API key has been set.</b>");
}
// Start recording audio from the microphone
// credit to https://developer.mozilla.org/en-US/docs/Web/API/MediaRecorder for mediaRecorder documentation and example
$("#startRecord").click(async () => {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
mediaRecorder = new MediaRecorder(stream);
audioChunks = [];
// Add audio stream
mediaRecorder.ondataavailable = (event) => {
audioChunks.push(event.data);
};
mediaRecorder.start();
// prevent user from clicking start button again
$("#startRecord").prop("disabled", true);
$("#stopRecord").prop("disabled", false);
});
// Stop recording and process audio
// again credit to https://developer.mozilla.org/en-US/docs/Web/API/MediaRecorder
$("#stopRecord").click(() => {
mediaRecorder.stop();
$("#startRecord").prop("disabled", false);
$("#stopRecord").prop("disabled", true);
mediaRecorder.onstop = async () => {
const audioBlob = new Blob(audioChunks, { type: "audio/wav" });
sendAudioToWhispersAPI(audioBlob);
};
});
// Send audio to Flask proxy server and display the result
// credit to https://platform.openai.com/docs/guides/speech-to-text for the API documentation
// credit to https://developer.mozilla.org/en-US/docs/Web/API/FileReader for the FileReader documentation
// credit to https://api.jquery.com/jquery.ajax/ for the jQuery AJAX documentation
// credit to https://developer.mozilla.org/en-US/docs/Glossary/Base64 for the base64 documentation
function sendAudioToWhispersAPI(audioBlob) {
const reader = new FileReader();
reader.onloadend = function () {
const base64data = reader.result;
// Prepare the request payload, including the model
const payload = {
api_endpoint: "https://api.openai.com/v1/audio/transcriptions", // OpenAI Transcriptions API endpoint
api_key: apikey, // API key
model: "whisper-1", // Model name
data: base64data, // Audio data in base64 format, as required by the API
};
// send request and handle response
$.ajax({
type: "POST",
url: "https://gowtom-proxy.azurewebsites.net/speech-to-text", // URL of our Flask server
data: JSON.stringify(payload),
contentType: "application/json",
success: function (response) {
$("#apiResponse").text(JSON.stringify(response, null, 2));
},
error: function (_, __, error) {
$("#apiResponse").html(
"<font color='red'><b>An error occurred: " + error + "</b></font>"
);
},
});
};
reader.readAsDataURL(audioBlob); // Convert blob to base64
}
// Credits / references
// https://ancientbrain.com/viewjs.php?world=2850716357
// https://developer.mozilla.org/en-US/docs/Web/API/MediaRecorder
// https://platform.openai.com/docs/guides/speech-to-text
// https://developer.mozilla.org/en-US/docs/Web/API/FileReader
// https://api.jquery.com/jquery.ajax/
// https://developer.mozilla.org/en-US/docs/Glossary/Base64
// https://openai.com/research/whisper
// https://github.com/openai/whisper