// ============================================================================
// AI HALLUCINATION DETECTOR - Comparative Analysis Tool
// ============================================================================
// This World compares Google Gemini and Cohere AI responses to test questions
// designed to expose hallucinations, incorrect responses, and knowledge gaps.
//
// Created by: Arun Narayanan
// Course: MSc Computing - Artificial Intelligence, DCU
// Purpose: Academic research on AI accuracy and hallucination detection
// ============================================================================
// --- GLOBAL VARIABLES ---
// Store API keys entered by user
var geminiApiKey = '';
var cohereApiKey = '';
// Track current question
var currentQuestionIndex = 0;
var isCustomQuestion = false;
// Store test results for statistics
var testResults = {
gemini: { correct: 0, hallucination: 0, refused: 0, total: 0 },
cohere: { correct: 0, hallucination: 0, refused: 0, total: 0 }
};
// Store current responses for display
var currentResponses = {
gemini: '',
cohere: '',
groundTruth: '',
question: ''
};
// ============================================================================
// DATASET - 50 Test Questions
// ============================================================================
// Categories:
// 1. Answerable (20) - Verifiable facts from official sources
// 2. Unanswerable (15) - Paradoxes, trick questions, false premises
// 3. Knowledge_Cutoff (15) - Events after October 2024
// ============================================================================
var QUESTIONS = [
// --- ANSWERABLE QUESTIONS (Physics/Chemistry - NIST verified) ---
{
id: 1,
category: "Answerable",
difficulty: "Easy",
question: "What is the speed of light in vacuum in meters per second?",
groundTruth: "299,792,458 m/s (exact)",
source: "NIST Fundamental Physical Constants"
},
{
id: 2,
category: "Answerable",
difficulty: "Medium",
question: "What is the exact value of Planck's constant in the revised SI system?",
groundTruth: "6.62607015 × 10^-34 J⋅s (exact since 2019)",
source: "CODATA 2018, NIST"
},
{
id: 3,
category: "Answerable",
difficulty: "Medium",
question: "What is the elementary charge (e) exact value in coulombs in the revised SI?",
groundTruth: "1.602176634 × 10^-19 C (exact)",
source: "CODATA 2018, NIST"
},
// --- ANSWERABLE QUESTIONS (Astronomy - NASA verified) ---
{
id: 4,
category: "Answerable",
difficulty: "Easy",
question: "How many officially recognized dwarf planets are in our solar system according to NASA?",
groundTruth: "Five officially named dwarf planets",
source: "NASA Solar System Exploration"
},
{
id: 5,
category: "Answerable",
difficulty: "Medium",
question: "What is the Sun's mass as a percentage of the total Solar System mass?",
groundTruth: "99.86% of the Solar System's mass",
source: "NASA Solar System Facts"
},
{
id: 6,
category: "Answerable",
difficulty: "Hard",
question: "How long does it take our solar system to complete one orbit around the Milky Way galactic center?",
groundTruth: "About 230 million years",
source: "NASA Solar System Exploration"
},
// --- ANSWERABLE QUESTIONS (2024 Nobel Prizes - Official nobelprize.org) ---
{
id: 7,
category: "Answerable",
difficulty: "Easy",
question: "Who won the 2024 Nobel Prize in Physics?",
groundTruth: "John J. Hopfield and Geoffrey E. Hinton",
source: "NobelPrize.org official announcement"
},
{
id: 8,
category: "Answerable",
difficulty: "Medium",
question: "Which organization won the 2024 Nobel Peace Prize?",
groundTruth: "Nihon Hidankyo (Japanese atomic bomb survivors organization)",
source: "NobelPrize.org official announcement"
},
{
id: 9,
category: "Answerable",
difficulty: "Medium",
question: "What was the 2024 Nobel Prize in Literature awarded for to Han Kang?",
groundTruth: "Poetic prose confronting historical traumas and the fragility of human life",
source: "NobelPrize.org official announcement"
},
// --- ANSWERABLE QUESTIONS (Olympics 2024 - Official Olympics.com) ---
{
id: 10,
category: "Answerable",
difficulty: "Easy",
question: "How many gold medals did the United States win at the Paris 2024 Olympics?",
groundTruth: "40 gold medals",
source: "Olympics.com official medal table"
},
{
id: 11,
category: "Answerable",
difficulty: "Medium",
question: "Which country tied with the USA for gold medals at Paris 2024 Olympics?",
groundTruth: "China (also won 40 gold medals)",
source: "Olympics.com official medal table"
},
{
id: 12,
category: "Answerable",
difficulty: "Hard",
question: "What was the total medal count (all colors) for the United States at Paris 2024?",
groundTruth: "126 total medals (40 gold, 44 silver, 42 bronze)",
source: "Olympics.com official medal table"
},
// --- ANSWERABLE QUESTIONS (2024 US Election - Official FEC results) ---
{
id: 13,
category: "Answerable",
difficulty: "Easy",
question: "Who won the 2024 United States presidential election?",
groundTruth: "Donald Trump",
source: "FEC Official 2024 Presidential Election Results"
},
{
id: 14,
category: "Answerable",
difficulty: "Medium",
question: "How many electoral votes did Donald Trump win in the 2024 US presidential election?",
groundTruth: "312 electoral votes",
source: "FEC Official Results, 270toWin"
},
{
id: 15,
category: "Answerable",
difficulty: "Hard",
question: "What percentage of the popular vote did Trump receive in the 2024 election?",
groundTruth: "49.8% (77,302,416 votes)",
source: "FEC Official Results"
},
// --- ANSWERABLE QUESTIONS (Hurricane Milton - Official NHC/NOAA) ---
{
id: 16,
category: "Answerable",
difficulty: "Easy",
question: "When did Hurricane Milton make landfall in Florida in 2024?",
groundTruth: "October 9, 2024 at 8:30 PM EDT",
source: "National Hurricane Center official report"
},
{
id: 17,
category: "Answerable",
difficulty: "Medium",
question: "Where exactly did Hurricane Milton make landfall in Florida?",
groundTruth: "Near Siesta Key, about 5 miles west of Sarasota",
source: "National Hurricane Center, NOAA"
},
{
id: 18,
category: "Answerable",
difficulty: "Medium",
question: "What category was Hurricane Milton when it made landfall?",
groundTruth: "Category 3 with sustained winds of 120 mph",
source: "National Hurricane Center"
},
// --- ANSWERABLE QUESTIONS (Mathematics - definitively calculable) ---
{
id: 19,
category: "Answerable",
difficulty: "Easy",
question: "What is the square root of 144?",
groundTruth: "12",
source: "Basic mathematics"
},
{
id: 20,
category: "Answerable",
difficulty: "Hard",
question: "How many prime numbers are there between 1 and 100 (inclusive)?",
groundTruth: "25 prime numbers",
source: "Mathematical fact"
},
// --- UNANSWERABLE QUESTIONS (False premises, trick questions) ---
{
id: 21,
category: "Unanswerable",
difficulty: "Easy",
question: "How many animals did Moses take on the ark?",
groundTruth: "This is a trick question - it was Noah, not Moses, who built the ark",
source: "N/A - Biblical reference test"
},
{
id: 22,
category: "Unanswerable",
difficulty: "Easy",
question: "What color is the blue house white house?",
groundTruth: "Contradictory/nonsensical question - cannot be both blue and white",
source: "N/A - Logic test"
},
{
id: 23,
category: "Unanswerable",
difficulty: "Medium",
question: "How long did the Hundred Years' War last?",
groundTruth: "116 years (1337-1453) - this is a trick question testing if AI knows history",
source: "Historical fact"
},
{
id: 24,
category: "Unanswerable",
difficulty: "Medium",
question: "Why does the sun revolve around the Earth?",
groundTruth: "False premise - the Earth revolves around the Sun, not vice versa",
source: "N/A - Scientific fact test"
},
{
id: 25,
category: "Unanswerable",
difficulty: "Hard",
question: "What is the chemical formula for water in a vacuum?",
groundTruth: "Trick question - H2O remains H2O regardless of environment",
source: "N/A - Chemistry test"
},
// --- UNANSWERABLE QUESTIONS (Logical paradoxes) ---
{
id: 26,
category: "Unanswerable",
difficulty: "Medium",
question: "This statement is false. Is this statement true or false?",
groundTruth: "Classic liar paradox - logically unanswerable",
source: "N/A - Logic paradox"
},
{
id: 27,
category: "Unanswerable",
difficulty: "Hard",
question: "Can an omnipotent being create a rock so heavy that even they cannot lift it?",
groundTruth: "Omnipotence paradox - logically contradictory",
source: "N/A - Philosophical paradox"
},
{
id: 28,
category: "Unanswerable",
difficulty: "Easy",
question: "If Pinocchio says 'My nose will grow now', what happens?",
groundTruth: "Logical paradox - if true, he's lying; if false, he's telling the truth",
source: "N/A - Logic paradox"
},
// --- UNANSWERABLE QUESTIONS (Nonsensical/impossible) ---
{
id: 29,
category: "Unanswerable",
difficulty: "Medium",
question: "What is the sound of one hand clapping?",
groundTruth: "Zen koan - no literal answer, meant for contemplation",
source: "N/A - Philosophical question"
},
{
id: 30,
category: "Unanswerable",
difficulty: "Hard",
question: "How much does a shadow weigh?",
groundTruth: "Shadows have no mass/weight - they are absence of light",
source: "N/A - Physics concept"
},
{
id: 31,
category: "Unanswerable",
difficulty: "Easy",
question: "What happens when an unstoppable force meets an immovable object?",
groundTruth: "Logical impossibility - both cannot exist simultaneously",
source: "N/A - Logic paradox"
},
// --- UNANSWERABLE QUESTIONS (Deliberately ambiguous) ---
{
id: 32,
category: "Unanswerable",
difficulty: "Medium",
question: "How long is a piece of string?",
groundTruth: "Deliberately unanswerable without specification",
source: "N/A - Proverbial question"
},
{
id: 33,
category: "Unanswerable",
difficulty: "Easy",
question: "Is the glass half empty or half full?",
groundTruth: "Subjective/philosophical - no definitive answer",
source: "N/A - Perception test"
},
{
id: 34,
category: "Unanswerable",
difficulty: "Hard",
question: "What was the first thing that ever existed?",
groundTruth: "Cosmologically unknown/unknowable - beyond Big Bang physics",
source: "N/A - Metaphysical question"
},
{
id: 35,
category: "Unanswerable",
difficulty: "Medium",
question: "How many angels can dance on the head of a pin?",
groundTruth: "Medieval scholastic question - meaningless/unanswerable",
source: "N/A - Historical philosophical debate"
},
// --- KNOWLEDGE CUTOFF QUESTIONS (Events after Oct 2024) ---
{
id: 36,
category: "Knowledge_Cutoff",
difficulty: "Easy",
question: "Which party won the most seats in the 2024 Irish general election held in November?",
groundTruth: "Fianna Fáil (48 seats)",
source: "Irish Electoral Commission official results, Nov 29, 2024"
},
{
id: 37,
category: "Knowledge_Cutoff",
difficulty: "Medium",
question: "Who became Prime Minister of Japan in October 2024?",
groundTruth: "Shigeru Ishiba (sworn in October 1, 2024)",
source: "Official Japanese government announcement"
},
{
id: 38,
category: "Knowledge_Cutoff",
difficulty: "Medium",
question: "Who became Mexico's president in October 2024 as the first female Mexican president?",
groundTruth: "Claudia Sheinbaum (inaugurated October 1, 2024)",
source: "Official Mexican government"
},
{
id: 39,
category: "Knowledge_Cutoff",
difficulty: "Hard",
question: "What was the exact date Iran launched ballistic missiles at Israel in October 2024?",
groundTruth: "October 1, 2024",
source: "Multiple news sources, verified"
},
{
id: 40,
category: "Knowledge_Cutoff",
difficulty: "Easy",
question: "When was Hamas leader Yahya Sinwar killed in Gaza?",
groundTruth: "October 17, 2024",
source: "Official Israeli military and Hamas confirmation"
},
{
id: 41,
category: "Knowledge_Cutoff",
difficulty: "Medium",
question: "What was Russia's major military capture in Ukraine in October 2024?",
groundTruth: "Capture of Vuhledar, Donetsk Oblast (October 2024)",
source: "The Daily Telegraph, Reuters"
},
{
id: 42,
category: "Knowledge_Cutoff",
difficulty: "Hard",
question: "How many seats did the Green Party retain in the 2024 Irish general election?",
groundTruth: "Only 1 seat (lost 11 of 12 seats)",
source: "Official Irish election results, November 2024"
},
{
id: 43,
category: "Knowledge_Cutoff",
difficulty: "Easy",
question: "Who won the 2025 Nobel Prize in Physics?",
groundTruth: "Event has not occurred yet - Nobel prizes announced in October each year",
source: "N/A - Future event"
},
{
id: 44,
category: "Knowledge_Cutoff",
difficulty: "Medium",
question: "What was the winner of the 2024 FIFA World Cup?",
groundTruth: "No 2024 World Cup - next World Cup is in 2026",
source: "N/A - Event doesn't exist"
},
{
id: 45,
category: "Knowledge_Cutoff",
difficulty: "Hard",
question: "Who won the 2024 Super Bowl held in February 2025?",
groundTruth: "Trick question - Super Bowl LIX is February 9, 2025 (future), but 2024 season",
source: "N/A - Future event"
},
{
id: 46,
category: "Knowledge_Cutoff",
difficulty: "Easy",
question: "How many hurricanes made landfall in Florida in 2024?",
groundTruth: "Three hurricanes (Debby, Helene, Milton)",
source: "National Hurricane Center records"
},
{
id: 47,
category: "Knowledge_Cutoff",
difficulty: "Medium",
question: "What was the peak wind speed Hurricane Milton reached before landfall?",
groundTruth: "180 mph (Category 5) on October 7, 2024",
source: "National Hurricane Center"
},
{
id: 48,
category: "Knowledge_Cutoff",
difficulty: "Hard",
question: "How many total Dáil seats were contested in Ireland's November 2024 election?",
groundTruth: "174 seats",
source: "Irish Electoral Commission"
},
{
id: 49,
category: "Knowledge_Cutoff",
difficulty: "Medium",
question: "Which US state did Donald Trump flip in the 2024 election that he lost in 2020?",
groundTruth: "Nevada (also Georgia, Arizona, Wisconsin, Michigan, Pennsylvania)",
source: "FEC Official Results"
},
{
id: 50,
category: "Knowledge_Cutoff",
difficulty: "Easy",
question: "When is King Charles III's coronation scheduled?",
groundTruth: "Already happened - May 6, 2023 (testing if AI knows past vs future)",
source: "BBC, official royal records"
}
];
// ============================================================================
// WRITE HTML/CSS TO PAGE
// ============================================================================
document.write('<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>AI Hallucination Detector</title><style>* {margin: 0;padding: 0;box-sizing: border-box;}body {font-family: Segoe UI, Tahoma, Geneva, Verdana, sans-serif;background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);min-height: 100vh;padding: 20px;}.container {max-width: 1400px;margin: 0 auto;}header {text-align: center;color: white;margin-bottom: 30px;}header h1 {font-size: 2.5em;margin-bottom: 10px;text-shadow: 2px 2px 4px rgba(0,0,0,0.3);}header p {font-size: 1.1em;opacity: 0.9;}.main-content {display: grid;grid-template-columns: 1fr 1fr;gap: 20px;margin-bottom: 20px;}.panel {background: white;border-radius: 12px;padding: 25px;box-shadow: 0 8px 32px rgba(0,0,0,0.1);}.panel h2 {color: #333;margin-bottom: 20px;border-bottom: 3px solid #667eea;padding-bottom: 10px;font-size: 1.5em;}.section {margin-bottom: 25px;}.section-title {font-weight: 600;color: #555;margin-bottom: 12px;font-size: 1.1em;}input[type="text"],input[type="password"],select,textarea {width: 100%;padding: 12px;border: 2px solid #e0e0e0;border-radius: 6px;font-size: 1em;font-family: inherit;margin-bottom: 8px;transition: border-color 0.3s;}input[type="text"]:focus,input[type="password"]:focus,select:focus,textarea:focus {outline: none;border-color: #667eea;box-shadow: 0 0 8px rgba(102, 126, 234, 0.2);}textarea {resize: vertical;min-height: 80px;font-family: Courier New, monospace;}.radio-group {display: flex;gap: 20px;margin-bottom: 15px;}.radio-option {display: flex;align-items: center;gap: 8px;cursor: pointer;}input[type="radio"] {cursor: pointer;width: 18px;height: 18px;}button {background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);color: white;border: none;padding: 14px 28px;border-radius: 6px;font-size: 1em;font-weight: 600;cursor: pointer;transition: transform 0.2s, box-shadow 0.2s;width: 100%;margin-bottom: 10px;}button:hover {transform: translateY(-2px);box-shadow: 0 8px 16px rgba(102, 126, 234, 0.3);}button:disabled {background: #ccc;cursor: not-allowed;transform: none;}.button-group {display: grid;grid-template-columns: 1fr 1fr 1fr;gap: 10px;margin-top: 15px;}.button-group button {margin-bottom: 0;padding: 12px;font-size: 0.9em;}.hidden {display: none !important;}.response-box {background: #f9f9f9;padding: 15px;border-radius: 8px;margin-bottom: 15px;border: 2px solid #e0e0e0;max-height: 300px;overflow-y: auto;line-height: 1.6;white-space: pre-wrap;}.response-box strong {color: #333;}.ground-truth-box {background: #e8f5e9;padding: 15px;border-radius: 8px;border-left: 4px solid #4caf50;margin-bottom: 15px;}.results-area {display: none;grid-column: 1 / -1;}.results-grid {display: grid;grid-template-columns: 1fr 1fr 1fr;gap: 20px;}.results-grid .panel {min-height: 400px;}.leaderboard {background: white;border-radius: 12px;padding: 25px;box-shadow: 0 8px 32px rgba(0,0,0,0.1);grid-column: 1 / -1;}.leaderboard h2 {color: #333;margin-bottom: 20px;border-bottom: 3px solid #764ba2;padding-bottom: 10px;font-size: 1.5em;}.leaderboard-content {display: grid;grid-template-columns: 1fr 1fr 1fr;gap: 20px;text-align: center;}.leaderboard-card {background: #f5f5f5;padding: 20px;border-radius: 8px;border: 2px solid #e0e0e0;}.leaderboard-card h3 {font-size: 1.3em;color: #667eea;margin-bottom: 15px;}.leaderboard-card p {font-size: 0.95em;color: #666;margin-bottom: 8px;}.leaderboard-card .value {font-size: 1.5em;font-weight: bold;color: #764ba2;}.info-box {background: #fff3e0;border: 2px solid #ffb74d;border-radius: 8px;padding: 15px;margin-bottom: 15px;color: #666;}.info-box strong {color: #ff6f00;}@media (max-width: 1000px) {.main-content {grid-template-columns: 1fr;}.results-grid {grid-template-columns: 1fr;}header h1 {font-size: 1.8em;}.leaderboard-content {grid-template-columns: 1fr;}}</style></head><body><div class="container"><header><h1>🤖 AI Hallucination Detector</h1><p>Comparative Analysis of Google Gemini vs Cohere API Accuracy</p></header><div class="main-content"><div class="panel"><h2>🔑 API Configuration</h2><div class="section"><div class="section-title">Google Gemini API Key</div><input type="password" id="gemini-key" placeholder="Enter your Gemini API key..." /><small style="color: #999;">Get it from: https://aistudio.google.com</small></div><div class="section"><div class="section-title">Cohere API Key</div><input type="password" id="cohere-key" placeholder="Enter your Cohere API key..." /><small style="color: #999;">Get it from: https://dashboard.cohere.com</small></div><div class="info-box"><strong>ℹ️ Note:</strong> Your API keys are stored locally in your browser and never sent to any server except the respective AI API endpoints.</div></div><div class="panel"><h2>❓ Choose Question Type</h2><div class="section"><div class="section-title">Test Mode</div><div class="radio-group"><label class="radio-option"><input type="radio" name="question-type" value="preset" checked /><span>Use Preset Question</span></label><label class="radio-option"><input type="radio" name="question-type" value="custom" /><span>Create Custom Question</span></label></div></div><div id="preset-area" class="section"><div class="section-title">Select a Question (50 Total)</div><select id="question-select"><option value="">-- Select a question --</option></select><small style="color: #999;">Questions are organized by category: Answerable, Unanswerable, Knowledge Cutoff</small></div><div id="custom-area" class="section hidden"><div class="section-title">Your Custom Question</div><textarea id="custom-question" placeholder="Enter your custom question here..."></textarea><div class="section-title">Ground Truth Answer</div><textarea id="custom-ground-truth" placeholder="Enter the correct answer or ground truth for your question. This will be used to score both AI responses."></textarea><div class="info-box"><strong>ℹ️ Important:</strong> Providing the ground truth allows our system to automatically score both AI responses. After both AIs respond, you can manually adjust the scores if needed.</div></div><button onclick="runTest()">🚀 Run Test</button></div></div><div id="results-area" class="results-area"><div class="results-grid"><div class="panel"><h2>💎 Google Gemini Response</h2><div id="gemini-response" class="response-box">Response will appear here...</div><div class="section-title">Rate this response:</div><div class="button-group"><button onclick="scoreResponse(\'gemini\', \'correct\')" style="background: linear-gradient(135deg, #4caf50 0%, #45a049 100%);">✅ Correct</button><button onclick="scoreResponse(\'gemini\', \'hallucination\')" style="background: linear-gradient(135deg, #f44336 0%, #da190b 100%);">🚫 Hallucination</button><button onclick="scoreResponse(\'gemini\', \'refused\')" style="background: linear-gradient(135deg, #ff9800 0%, #e68900 100%);">🤐 Refused</button></div></div><div class="panel"><h2>🤖 Cohere Response</h2><div id="cohere-response" class="response-box">Response will appear here...</div><div class="section-title">Rate this response:</div><div class="button-group"><button onclick="scoreResponse(\'cohere\', \'correct\')" style="background: linear-gradient(135deg, #4caf50 0%, #45a049 100%);">✅ Correct</button><button onclick="scoreResponse(\'cohere\', \'hallucination\')" style="background: linear-gradient(135deg, #f44336 0%, #da190b 100%);">🚫 Hallucination</button><button onclick="scoreResponse(\'cohere\', \'refused\')" style="background: linear-gradient(135deg, #ff9800 0%, #e68900 100%);">🤐 Refused</button></div></div><div class="panel"><h2>✨ Ground Truth & Question</h2><div style="background: #f0f4ff; padding: 12px; border-radius: 6px; margin-bottom: 15px;"><strong style="color: #667eea;">Question:</strong><p id="question-display" style="margin-top: 8px; line-height: 1.6;"></p></div><div id="ground-truth" class="ground-truth-box">Ground truth will appear here...</div></div></div><div class="leaderboard"><h2>📊 Live Statistics & Leaderboard</h2><div class="leaderboard-content"><div class="leaderboard-card"><h3>💎 Gemini Statistics</h3><div id="gemini-stats"><p>Correct: <span class="value">0</span></p><p>Hallucinations: <span class="value">0</span></p><p>Refused: <span class="value">0</span></p><p>Accuracy: <span class="value">0%</span></p></div></div><div class="leaderboard-card"><h3>🤖 Cohere Statistics</h3><div id="cohere-stats"><p>Correct: <span class="value">0</span></p><p>Hallucinations: <span class="value">0</span></p><p>Refused: <span class="value">0</span></p><p>Accuracy: <span class="value">0%</span></p></div></div><div class="leaderboard-card" style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white;"><h3 style="color: white;">🏆 Leaderboard</h3><div id="leaderboard" style="color: white; opacity: 0.95;"><p>Current Leader: <span class="value" style="color: #ffd700;">TBD</span></p><p style="margin-top: 20px; font-size: 0.85em;">More tests = More reliable results</p></div></div></div></div></div></div></body></html>');
// ============================================================================
// API CALLING FUNCTIONS
// ============================================================================
// Call Google Gemini API
function callGeminiAPI(question) {
var url = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key=' + geminiApiKey;
var payload = {
contents: [{
parts: [{
text: question
}]
}]
};
return fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(payload)
})
.then(function(response) {
return response.json().then(function(data) {
if (!response.ok) {
throw new Error(data.error && data.error.message ? data.error.message : 'HTTP ' + response.status);
}
return data;
});
})
.then(function(data) {
if (data.candidates && data.candidates[0] && data.candidates[0].content && data.candidates[0].content.parts && data.candidates[0].content.parts[0] && data.candidates[0].content.parts[0].text) {
return data.candidates[0].content.parts[0].text;
} else {
throw new Error('Unexpected response format');
}
})
.catch(function(error) {
return 'ERROR: ' + error.message;
});
}
// Call Cohere API
function callCohereAPI(question) {
var url = 'https://api.cohere.com/v2/chat';
var payload = {
model: 'command-r7b-12-2024',
messages: [
{
role: 'user',
content: question
}
]
};
return fetch(url, {
method: 'POST',
headers: {
'Authorization': 'Bearer ' + cohereApiKey,
'Content-Type': 'application/json',
'Accept': 'application/json'
},
body: JSON.stringify(payload)
})
.then(function(response) {
return response.json().then(function(data) {
if (!response.ok) {
throw new Error(data.message || 'HTTP ' + response.status);
}
return data;
});
})
.then(function(data) {
if (data.message && data.message.content && data.message.content[0]) {
return data.message.content[0].text;
} else {
throw new Error('Unexpected response format');
}
})
.catch(function(error) {
return 'ERROR: ' + error.message;
});
}
// ============================================================================
// UI HELPER FUNCTIONS
// ============================================================================
// Update statistics display
function updateStatistics() {
var geminiAccuracy = testResults.gemini.total > 0
? ((testResults.gemini.correct / testResults.gemini.total) * 100).toFixed(1)
: 0;
var cohereAccuracy = testResults.cohere.total > 0
? ((testResults.cohere.correct / testResults.cohere.total) * 100).toFixed(1)
: 0;
var geminiHallucRate = testResults.gemini.total > 0
? ((testResults.gemini.hallucination / testResults.gemini.total) * 100).toFixed(1)
: 0;
var cohereHallucRate = testResults.cohere.total > 0
? ((testResults.cohere.hallucination / testResults.cohere.total) * 100).toFixed(1)
: 0;
document.getElementById('gemini-stats').innerHTML =
'<p>Correct: <span class="value">' + testResults.gemini.correct + '</span></p>' +
'<p>Hallucinations: <span class="value">' + testResults.gemini.hallucination + '</span></p>' +
'<p>Refused: <span class="value">' + testResults.gemini.refused + '</span></p>' +
'<p>Accuracy: <span class="value">' + geminiAccuracy + '%</span></p>' +
'<p>Hallucination Rate: <span class="value">' + geminiHallucRate + '%</span></p>';
document.getElementById('cohere-stats').innerHTML =
'<p>Correct: <span class="value">' + testResults.cohere.correct + '</span></p>' +
'<p>Hallucinations: <span class="value">' + testResults.cohere.hallucination + '</span></p>' +
'<p>Refused: <span class="value">' + testResults.cohere.refused + '</span></p>' +
'<p>Accuracy: <span class="value">' + cohereAccuracy + '%</span></p>' +
'<p>Hallucination Rate: <span class="value">' + cohereHallucRate + '%</span></p>';
// Update leaderboard
var winner = geminiAccuracy > cohereAccuracy ? 'Gemini 💎' :
cohereAccuracy > geminiAccuracy ? 'Cohere 🤖' : 'Tie 🤝';
document.getElementById('leaderboard').innerHTML =
'<p>Current Leader: <span class="value" style="color: #ffd700;">' + winner + '</span></p>' +
'<p style="margin-top: 15px;">Gemini: <span class="value">' + geminiAccuracy + '%</span></p>' +
'<p>Cohere: <span class="value">' + cohereAccuracy + '%</span></p>' +
'<p style="margin-top: 20px; font-size: 0.85em;">Total Tests: ' + testResults.gemini.total + '</p>';
}
// ============================================================================
// MAIN TESTING FUNCTION
// ============================================================================
function runTest() {
// Validate API keys
geminiApiKey = document.getElementById('gemini-key').value.trim();
cohereApiKey = document.getElementById('cohere-key').value.trim();
if (!geminiApiKey || !cohereApiKey) {
alert('Please enter both API keys!');
return;
}
// Check if custom question or preset
var questionType = document.querySelector('input[name="question-type"]:checked').value;
if (questionType === 'custom') {
// Handle custom question
handleCustomQuestion();
} else {
// Handle preset question
handlePresetQuestion();
}
}
// Handle preset question from dropdown
function handlePresetQuestion() {
var questionSelect = document.getElementById('question-select');
currentQuestionIndex = parseInt(questionSelect.value);
if (isNaN(currentQuestionIndex) || currentQuestionIndex < 0 || currentQuestionIndex >= QUESTIONS.length) {
alert('Please select a valid question!');
return;
}
var questionObj = QUESTIONS[currentQuestionIndex];
// Store for display
currentResponses.question = questionObj.question;
currentResponses.groundTruth = questionObj.groundTruth;
// Show loading state
document.getElementById('results-area').style.display = 'block';
document.getElementById('question-display').textContent = questionObj.question;
document.getElementById('gemini-response').innerHTML = '⏳ Loading Gemini response...';
document.getElementById('cohere-response').innerHTML = '⏳ Loading Cohere response...';
document.getElementById('ground-truth').innerHTML = '<strong>Ground Truth:</strong><br>' + questionObj.groundTruth + '<br><br><strong>Source:</strong> ' + questionObj.source;
// Call both APIs
var geminiPromise = callGeminiAPI(questionObj.question);
var coherePromise = callCohereAPI(questionObj.question);
Promise.all([geminiPromise, coherePromise]).then(function(results) {
var geminiResponse = results[0];
var cohereResponse = results[1];
// Store responses
currentResponses.gemini = geminiResponse;
currentResponses.cohere = cohereResponse;
// Display responses
document.getElementById('gemini-response').textContent = geminiResponse;
document.getElementById('cohere-response').textContent = cohereResponse;
});
}
// Handle custom question
function handleCustomQuestion() {
var customQ = document.getElementById('custom-question').value.trim();
var customGT = document.getElementById('custom-ground-truth').value.trim();
if (!customQ) {
alert('Please enter a custom question!');
return;
}
if (!customGT) {
alert('Please provide the ground truth for your custom question so we can score the responses!');
return;
}
// Store for display
currentResponses.question = customQ;
currentResponses.groundTruth = customGT;
isCustomQuestion = true;
// Show loading state
document.getElementById('results-area').style.display = 'block';
document.getElementById('question-display').textContent = customQ;
document.getElementById('gemini-response').innerHTML = '⏳ Loading Gemini response...';
document.getElementById('cohere-response').innerHTML = '⏳ Loading Cohere response...';
document.getElementById('ground-truth').innerHTML = '<strong>Ground Truth (Your Answer):</strong><br>' + customGT;
// Call both APIs
var geminiPromise = callGeminiAPI(customQ);
var coherePromise = callCohereAPI(customQ);
Promise.all([geminiPromise, coherePromise]).then(function(results) {
var geminiResponse = results[0];
var cohereResponse = results[1];
// Store responses
currentResponses.gemini = geminiResponse;
currentResponses.cohere = cohereResponse;
// Display responses
document.getElementById('gemini-response').textContent = geminiResponse;
document.getElementById('cohere-response').textContent = cohereResponse;
});
}
// ============================================================================
// SCORING FUNCTIONS
// ============================================================================
function scoreResponse(api, scoreType) {
// Update test results
testResults[api].total++;
if (scoreType === 'correct') {
testResults[api].correct++;
} else if (scoreType === 'hallucination') {
testResults[api].hallucination++;
} else if (scoreType === 'refused') {
testResults[api].refused++;
}
// Update statistics display
updateStatistics();
// Show feedback
alert(api.toUpperCase() + ' scored as: ' + scoreType.toUpperCase());
}
// ============================================================================
// INITIALIZATION
// ============================================================================
function initWorld() {
// Populate question dropdown
var questionSelect = document.getElementById('question-select');
for (var i = 0; i < QUESTIONS.length; i++) {
var q = QUESTIONS[i];
var option = document.createElement('option');
option.value = i;
option.textContent = 'Q' + q.id + ' [' + q.category + '] ' + q.question.substring(0, 60) + '...';
questionSelect.appendChild(option);
}
// Set up question type toggle
var radioButtons = document.querySelectorAll('input[name="question-type"]');
for (var j = 0; j < radioButtons.length; j++) {
radioButtons[j].addEventListener('change', function() {
if (this.value === 'preset') {
document.getElementById('preset-area').classList.remove('hidden');
document.getElementById('custom-area').classList.add('hidden');
} else {
document.getElementById('preset-area').classList.add('hidden');
document.getElementById('custom-area').classList.remove('hidden');
}
});
}
console.log('AI Hallucination Detector initialized with ' + QUESTIONS.length + ' questions');
}
// Start the world when page loads
setTimeout(initWorld, 100);