Code viewer for World: Image Captions (PRACTICAL)
// World by Ethan Doyle.
// Student Number 22497082.

// ** IMPORTANT **
// The API keys are listed in the order that they were submitted.

// World that makes calls to multiple AI Models which give a caption/description of an image uploaded to the world.
// If code is taken from a website, it will be commented beside the code.

// Tutorials / code resources used: https://www.w3schools.com/howto/howto_html_file_upload_button.asp
// https://stackoverflow.com/questions/18694437/how-to-preview-image-before-uploading-in-jquery/62382964#62382964
// https://www.w3schools.com/jquery/jquery_intro.asp
// https://www.w3schools.com/jquery/jquery_ajax_intro.asp
// https://www.w3schools.com/howto/howto_html_file_upload_button.asp
// https://adamfard.com/blog/how-to-use-chatgpt-api
// https://community.openai.com/t/uploading-images-to-the-chatgpt-api/985494
// https://refine.dev/blog/css-rounded-corners/#what-is-css-border-radius
// https://www.w3schools.com/html/html_css.asp
// https://htmlcheatsheet.com/css/
// Everything else was done through trial & error.

// This world uses a couple of AI models.
// The first model it uses is OpenAI's ChatGPT model, GPT-4o-mini.
// The second and third models are from Hugging Face, a community-driven website that contains a plethora of AI models to choose from. It uses BLIP and VIT-GPT2.
// The last model it uses is grok-vision-beta (xAI) to compare the three prior models.


$('body').css("margin", "15px"); // Learnt how to modify margin and padding universal values from https://ancientbrain.com/world.php?world=2850716357's source code.
$('body').css("padding", "15px");

$(document).ready(function() { // From W3Schools jQuery tutorial
    $('body').css("background-color", "black");
    $('body').css('color', 'white');
});

// The base HTML written into the document. Handles the functions for getting the API keys, file upload and getting captions.
// CSS elements gleaned from https://www.w3schools.com/html/html_css.asp and https://htmlcheatsheet.com/css/
document.write (`
    <h1>Image Captions</h1>
    
    This world is centered around uploading an image (PNG or JPEG, please) to the site. <br>
    The AI models GPT-4o-mini, BLIP and VIT-GPT2 will give a brief caption to your uploaded image.<br> 
    The captions will then be evaluated and compared by Grok-Vision-Beta.<br>
    <div>
        <h1>API Keys</h1>
        <p>Before moving on, you must enter three API keys.</p>
        <p> Note that...<br>
            API Key 1 is for ChatGPT.<br>
            API Key 2 is for Hugging Face.<br>
            API Key 3 is for xAI.
        </p>
        <form id="apiForm">
            <label for="api1">API Key 1: </label>
            <input type="text" id="apiKey1" style="width:450px"><br>
            <label for="api2">API Key 2: </label>
            <input type="text" id="apiKey2" style="width:450px"><br>
            <label for="api3">API Key 3: </label>
            <input type="text" id="apiKey3" style="width:450px">
            <button type="button" onclick="getApi()">Submit Keys</button>
        </form>
        <div id="apikeyout"> </div>
    </div>
    <div style="text-align: center; display: flex; flex-direction: column; align-items: center;">
        <h1>Upload an Image</h1>
        <p>Please upload an image for captioning below.</p>
        <input type="file" id="imageUpload" accept="image/*" onChange={handleImageUpload(event)}> <br>
        <button onclick="onSubmit()">Get Caption</button>
        <br>
        <div id="errordiv"> </div>
        <br>
        <img id="imageUploaded" alt="Uploaded Image" style="display:none; max-height:750px; max-width:1000px">
    </div>
    <br>
    <div id="output">
    </div>
`);


let userimg = ""; // Holds the user submitted imaage
let typechecker = "";  // For type comparison



// **
// Please enter the API keys here (in form submission).
// **

let key1 = "";
let key2 = "";
let key3 = "";




function getApi() { // Gets the API keys.
    
    key1 = $(apiKey1).val();
    key2 = $(apiKey2).val();
    key3 = $(apiKey3).val();
    
    console.log(key1);
    console.log(key2);
    console.log(key3);
    
    if(key1 === "" || key2 === "" || key3 === ""){
        $("#apikeyout").html("<font color=red><h2>Error: Please enter a value for all keys.</h2></font>");
    }
    else{
        $("#apikeyout").html("<p>API Keys set.</p>");
    }
}




function handleImageUpload(event) { // Handles image upload from the onChange tag in <input>.
    const file = event.target.files[0]; // Grabs the file from user input.

    let lenz = file.name.length; // Gets the file length so we can slice it.
    console.log(lenz);
    console.log(file.name); // logging for testing    
    typechecker = ((file.name).slice(lenz - 4, lenz)).trim(); // Makes sure the file is a .png or .jpeg.
    
    if(!(typechecker === ".png"  || typechecker === "jpeg" || typechecker == ".jpg")){ // Checker for above.
        $("#errordiv").html("<font color=red><h2>Please enter an image with the correct file extensions. (Accepted types: PNG, JPEG/JPG)</h2></font>"); // Puts a message on the screen telling the user to enter in an image.
        $("#output").html("");
        $("#imageUploaded").attr("src", "").hide();
        console.log("Please upload a .png or .jpeg file.");
        return;
    }
    else if(!file) { // Makes sure that there is actually a file uploaded.
        $("#errordiv").html("<font color=red><h2>Please enter an image! (Accepted types: PNG, JPEG/JPG)</h2></font>");
        $("#output").html("");
        $("#imageUploaded").attr("src", '').hide();
        console.log("Please upload a valid image.");
        return;
    }
    else{ // This else statement is for clearing the error message on the screen (if there is one) and the AI model responses (if there are any).
        $("#errordiv").html("<p></p>"); 
        $("#output").html("");
    }

    const reader = new FileReader(); // Reads the file.
    reader.onload = function () { // Function to read and display the file.
        $("#imageUploaded").attr("src", reader.result).show(); // Makes sure that the file is displayed in the <img> tag.
        userimg = reader.result.split(",")[1];
    }
    reader.readAsDataURL(file);
}





async function onSubmit() { // For button submission.
    
    let lenz = userimg.length;
    let temp = userimg;
    
    if(!(typechecker === ".png"  || typechecker === "jpeg" || typechecker == ".jpg")){ // Does the same as the checker in handleImageUpload. Honestly, kind of unnecessary, but it serves as a just-in-case scenario.
        $("#errordiv").html("<font color=red><h2>Please enter an image! (Accepted types: PNG, JPEG/JPG)</h2></font>");
        console.log("Please upload a .png or .jpeg file.");
        return;
    }
    else if(!userimg) { // If there is no image, tell the user there is none and prompt them to upload an image.
        $("#errordiv").html("<font color=red><h2>Please enter an image! (Accepted types: PNG, JPEG/JPG)</h2></font>");
        console.log("Please upload an image first.");
        return;
    }
    else if(key1 === "" || key2 === "" || key3 === ""){ // If no keys are entered, do not submit the images.
        $("#errordiv").html("<font color=red><h2>You haven't fully entered in any API keys yet.</h2></font>");
        return;
    }
    else{ // This else statement is for clearing the error message on the screen (if there is one) and the AI model responses (if there are any).
        $("#errordiv").html("<p></p>"); 
        $("#output").html("");
    }
    
    $("#output").html("<h2 style='display:flex; justify-content: center'>Processing caption...</h2>");
    try {
        const caption1 = await getCaption1(userimg); // These three variables await the functions that call the API. Hold the data from the AI responses.
        const caption2 = await getCaption2(userimg);
        const caption3 = await getCaption3(userimg);
        const comparison = await getComparison(userimg, caption1.choices[0].message.content, caption2[0].generated_text, caption3[0].generated_text); // This variable awaits the comparison.
        
        // Code in <style> tags was gleaned from https://stackoverflow.com/questions/50058483/how-to-display-divs-horizontally-in-line-in-html
        // Also gleaned from many CSS tutorials such as https://refine.dev/blog/css-rounded-corners/#what-is-css-border-radius and https://www.w3schools.com/html/html_css.asp
        
        $("#output").html(`
            <div class="sideways-container" style="display:flex; justify-content: center; flex-direction:row">
            <div class="item">
                <h2>GPT-4o-mini's Caption:</h2>
                <p>${caption1.choices[0].message.content}</p>
            </div>
            <br>
            <div class="item">
                <h2>BLIP's Caption:</h2>
                <p>${caption2[0].generated_text}</p>
            </div>
            <br>
            <div class="item">
                <h2>VIT-GPT2's Caption:</h2>
                <p>${caption3[0].generated_text}</p>
            </div>
            </div>
            
            <br>
            
            <div class="sideways-container2" style="display:flex; justify-content: center; flex-direction:row">
            <div style="width:60vw; padding: 20px; color: black; border: 1px solid black; background-color: lightblue; border-radius: 15px; max-width:810px">
                <h2>Grok Vision's Comparison:</h2>
                <p>${comparison.choices[0].message.content}</p>
            </div>
            
            <style>
            .item {
                width:60vw;
                color: black;
                border: 1px solid black;
                padding: 20px;
                max-width:350px;
                margin-right: 10px;
                background-color: lightblue;
                border-radius: 15px
            }
            </style>`
        );
        

    } catch (error) {
        console.log(error.message);
    }
}





async function getCaption1(sendingImage) { 
    const response = await fetch("https://api.openai.com/v1/chat/completions", { // Fetch request API reference at https://platform.openai.com/docs/api-reference/chat
        method: 'POST',
        headers: {
            'Content-Type': 'application/json',
            'Authorization': 'Bearer ' + key1
        },
        body: JSON.stringify({ // Convert body to a JSON string.
            model: 'gpt-4o-mini',
            messages: [
                {
                    'role': 'user', 
                    'content': [ 
                        {'type': 'text', 'text': 'Give this image a brief caption. Be formal.'},
                        {
                            'type': 'image_url',
                            'image_url': {
                                'url': 'data:image/*;base64,' + sendingImage // Converts image to a base64 string. It should already be encoded, but we want to include the beginning part of the string.
                            }
                        }
                    ]
                }
            ],
        })
    }).catch(err=>console.log(err));
    
    const data = await response.json(); // Catch errors in console (and so on)
    console.log(data);
    return data;
}





async function getCaption2(sendingImage) {
    const response = await fetch("https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large", { // Fetch request API reference taken from https://huggingface.co/Salesforce/blip-image-captioning-base?inference_api=true
        method: 'POST',
        headers: {
            'Content-Type': 'application/json',
            'Authorization': 'Bearer ' + key2
        },
        body: JSON.stringify({
            'inputs': sendingImage
        })
    }).catch(err=>console.log(err)); 
    
    const data = await response.json();
    console.log(data);
    return data;
}





async function getCaption3(sendingImage){
    const response = await fetch("https://api-inference.huggingface.co/models/nlpconnect/vit-gpt2-image-captioning", { // Fetch request snippet taken from https://huggingface.co/nlpconnect/vit-gpt2-image-captioning.
        method: 'POST',
        headers: {
            'Content-Type': 'application/json',
            'Authorization': 'Bearer ' + key2,
        },
        body: JSON.stringify({
            'image': sendingImage
        })
    }).catch(err=>console.log(err));
    
    const data = await response.json();
    console.log(data);
    return data;
    
}

async function getComparison(sendingImage, response1, response2, response3){
    const response = await fetch("https://api.x.ai/v1/chat/completions", { // Fetch request snippet taken from https://docs.x.ai/api#authentication
        method: 'POST',
        headers: {
            'Content-Type': 'application/json',
            'Authorization': 'Bearer ' + key3,
        },
        body: JSON.stringify({
            messages: [
                {
                    'role': 'user', 
                    'content': [ 
                        {'type': 'text', 'text': 'I have three written captions from three different AI models for the attached image. They are as following: ChatGPT: ' + response1 + ' BLIP: ' + response2 + ' VIT-GPT: ' + response3 + '. On a scale of 1-10, rank each caption and why. Separate each ranking by a new line.'},
                        {
                            'type': 'image_url',
                            'image_url': {
                                'url': 'data:image/*;base64,' + sendingImage // We want to include the beginning part of the base64 encoded string.
                            }
                        }
                    ]
                }
            ],
            model: "grok-vision-beta"
        })
    }).catch(err=>console.log(err));
    
    const data = await response.json();
    console.log(data);
    return(data);
    
}





//---- setup -------------------------------------------------------
// Do NOT make a setup function.
// This is done for you in the API. The API setup just creates a canvas.
// Anything else you want to run at the start should go into the following two functions.


function beforesetup()      // Optional 
{
	// Anything you want to run at the start BEFORE the canvas is created 
}


function aftersetup()       // Optional
{
	// Anything you want to run at the start AFTER the canvas is created 
}


//---- draw -------------------------------------------------------

function draw()             // Optional
{
	// Can put P5 instructions to be executed every step here, or in AB.world.nextStep()  
}