// Cloned by Toma on 12 Nov 2023 from World "Chat with GPT model" by Starter user
// Please leave this clone trail here.
// Image generation from text using OpenAI DALL-E
// adapted from:
// https://platform.openai.com/docs/api-reference/images/create
var apiKey = "";
// default body is margin 0 and padding 0
// give it more whitespace:
$('body').css( "margin", "20px" );
$('body').css( "padding", "20px" );
document.write(`
<h1> Generate Images from Audio </h1>
Running World:
<a href='https://ancientbrain.com/world.php?world=7353908265'> AI Image Generation from Speech with Open AI </a>.
<br>
Generate Images from Audio with
<A HREF="https://platform.openai.com/docs/models/overview">DALL-E and Whisper</A>
using the
<A HREF="https://en.wikipedia.org/wiki/OpenAI">OpenAI </A> API.
<br>
<pre>
</pre>
<div id=enterkey>
Enter API key:
<input style='width:25vw;' maxlength='2000' NAME="apikey" id="apikey" VALUE='' >
<button onclick='setkey();' class=ab-normbutton >Set API key</button>
</div>
<pre>
</pre>
Begin by recording an Audio file describing the image you would like to generate (Press the Start Recording button below when you are ready). <br>
When you are done speaking, press the Stop Recoding.<br>
You can playback your recorded Audio by clicking Play.<br>
If satisfied with the recording, Click Use Audio to generate image. <br>
<pre>
</pre>
<div style="display: flex; flex-direction: column; place-items: left; justify-content: center;">
<div>
<button id="startRecording">Start Recording</button>
<button id="stopRecording" disabled>Stop Recording</button>
</div>
<br /><br />
<div style="display: flex; place-items: center">
<audio id="audioPlayback" controls style="margin-right: 20px"></audio>
<button id="useAudioButton" style="margin-right:10px;">Use Audio</button>
<button id="supriseMeButton">Surprise me</button>
</div>
<h2 id="isLoading"></h2>
<img id="myImage" src="" alt="No image" srcset="" style="width: 500px; height: 500px; border: 2px solid black;" />
<p id="transcribedText"></p>
</div>
`);
const myImage = document.getElementById("myImage");
const useAudioButton = document.getElementById("useAudioButton");
const startRecordingButton = document.getElementById("startRecording");
const stopRecordingButton = document.getElementById("stopRecording");
const isLoading = document.getElementById("isLoading");
const transcribedText = document.getElementById("transcribedText");
let mediaRecorder;
let audioChunks = [];
useAudioButton.disabled = true;
startRecordingButton.addEventListener("click", startRecording);
stopRecordingButton.addEventListener("click", stopRecording);
useAudioButton.addEventListener("click", function () {
isLoading.innerHTML = "Loading........";
const audioBlob = new Blob(audioChunks, { type: "audio/mpeg" });
transcribeAudio(audioBlob);
});
supriseMeButton.addEventListener("click", function () {
isLoading.innerHTML = "Loading........";
generateRandomPromptAndImage();
})
function setkey()
{
apiKey = jQuery("input#apikey").val();
apiKey = apiKey.trim();
$("#enterkey").html ( "<b> API key has been set. </b>" );
}
function startRecording() {
navigator.mediaDevices
.getUserMedia({ audio: true })
.then((stream) => {
mediaRecorder = new MediaRecorder(stream);
mediaRecorder.ondataavailable = (event) => {
audioChunks.push(event.data);
};
mediaRecorder.onstop = () => {
const audioBlob = new Blob(audioChunks, { type: "audio/mpeg" });
const audioUrl = URL.createObjectURL(audioBlob);
document.getElementById("audioPlayback").src = audioUrl;
};
audioChunks = [];
mediaRecorder.start();
startRecordingButton.disabled = true;
useAudioButton.disabled = true;
stopRecordingButton.disabled = false;
})
.catch((e) => console.error(e));
}
function stopRecording() {
mediaRecorder.stop();
startRecordingButton.disabled = false;
stopRecordingButton.disabled = true;
useAudioButton.disabled = false;
}
function transcribeAudio(audioBlob) {
const formData = new FormData();
formData.append("file", audioBlob, "recording.mp3");
formData.append("model", "whisper-1");
formData.append("language", "en");
fetch("https://api.openai.com/v1/audio/transcriptions", {
method: "POST",
body: formData,
headers: {
Authorization: `Bearer ${apiKey}`,
},
})
.then((response) => response.json())
.then((data) => {
generateImage(data.text);
})
.catch((error) => {
isLoading.innerHTML = "";
});
}
function generateImage(text) {
fetch("https://api.openai.com/v1/images/generations", {
method: "POST",
headers: {
Authorization: `Bearer ${apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
prompt: text,
}),
})
.then((response) => response.json())
.then((data) => {
isLoading.innerHTML = "";
transcribedText.innerHTML = text;
var imageUrl = data.data[0].url;
myImage.src = imageUrl;
})
.catch((error) => {
console.error("Error calling DALLĀ·E API", error);
isLoading.innerHTML = "";
});
}
function generateRandomPromptAndImage() {
const body = {
model: "gpt-3.5-turbo",
messages: [
{
role: "user",
content:
"Generate a prompt for dalle image generation no more than 20 words",
},
],
};
fetch("https://api.openai.com/v1/chat/completions", {
method: "POST",
body: JSON.stringify(body),
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${apiKey}`,
},
})
.then((response) => response.json())
.then((data) => {
generateImage(data.choices[0].message.content);
})
.catch((error) => {
console.error("Error:", error.message);
isLoading.innerHTML = "";
});
}
// function errorfn()
// {
// if ( apikey == "" ) $("#them").html ( "<font color=red><b> Enter API key to be able to chat. </b></font>" );
// else $("#them").html ( "<font color=red><b> Unknown error. </b></font>" );
// }