
chat-pdf
Chatbot with pdf RAG
Prompt
buatkan laman web chat-bot dengan fitur upload dokumen PDF sebagai RAG untuk chat respon.
A system prompt was added to support web rendering
```html
<!DOCTYPE html>
<html lang="id">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Chatbot PDF RAG Demo</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css" rel="stylesheet">
<style>
body { background: #f6f8fa; }
.chat-container {
max-width: 600px; margin: 40px auto; background: #fff;
border-radius: 12px; box-shadow: 0 2px 16px rgba(0,0,0,0.06); padding:24px;
}
.messages {
max-height: 300px; overflow-y: auto; margin-bottom: 16px;
border: 1px solid #ececec; border-radius: 6px; padding: 12px; background: #f8fafd;
}
.msg-user { text-align: right; margin-bottom: 10px; }
.msg-bot { text-align: left; margin-bottom: 10px; }
.msg-bubble {
display: inline-block;
padding: 8px 14px;
border-radius: 16px;
max-width: 75%;
word-break: break-word;
font-size: 1rem;
}
.msg-user .msg-bubble {
background: #2847d2; color: #fff ;
border-bottom-right-radius: 2px;
}
.msg-bot .msg-bubble {
background: #e5e7ef; color: #2c334d;
border-bottom-left-radius: 2px;
}
.pdf-preview {
margin-bottom:10px;font-size:0.97em;color:#555;
background: #edeeff; padding:7px 13px; border-radius:6px; display: inline-block;
}
.loading-dot { display: inline-block; width: 8px; height: 8px; border-radius: 50%; background: #2847d2; margin: 0 1px; animation: bounce 1s infinite;}
@keyframes bounce {
0%,80%,100% {transform:scale(1);}
40% {transform:scale(1.5);}
}
</style>
<!-- pdf.js -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/4.2.67/pdf.min.js"></script>
</head>
<body>
<div class="chat-container">
<h4 class="mb-4">Chatbot RAG PDF</h4>
<div>
<label for="pdf-upload" class="form-label">Unggah dokumen PDF:</label>
<input class="form-control mb-2" type="file" accept="application/pdf" id="pdf-upload" />
<div id="pdf-info"></div>
</div>
<hr class="mb-3 mt-3"/>
<div class="messages" id="messages"></div>
<form id="chat-form" autocomplete="off" class="d-flex mt-2">
<input id="msg-input" type="text" class="form-control me-2" placeholder="Tulis pertanyaan..."/>
<button class="btn btn-primary" type="submit">Kirim</button>
</form>
<div class="form-text mt-2">
* Semua proses berjalan di browser.<br>
Untuk demo, jawaban chatbot hanya berdasarkan dokumen yang diunggah.
</div>
</div>
<script>
// Helper: Simple text splitter by page and by para/line
function splitText(text) {
// Split by paragraph or every ~300 chars
let chunks = [];
let buffer = '';
let lines = text.split('\n');
for (let line of lines) {
if (line.trim().length === 0) continue;
if ((buffer.length + line.length) > 300) {
chunks.push(buffer);
buffer = line;
} else {
buffer += (buffer.length > 0 ? ' ' : '') + line;
}
}
if (buffer.trim().length > 0) chunks.push(buffer);
return chunks;
}
// Global state
let pdfChunks = [];
let pdfFileName = "";
let pdfLoaded = false;
// PDF Upload and parsing
const pdfUpload = document.getElementById('pdf-upload');
const pdfInfo = document.getElementById('pdf-info');
pdfUpload.addEventListener('change', async function() {
pdfInfo.innerHTML = '';
pdfChunks = [];
pdfLoaded = false;
if (!this.files || !this.files[0]) return;
const file = this.files[0];
if (file.type !== 'application/pdf') {
pdfInfo.innerHTML = '<div class="text-danger">File bukan PDF.</div>';
return;
}
pdfFileName = file.name;
pdfInfo.innerHTML = `<span class="pdf-preview">Memproses <b>${file.name}</b>...</span>`;
// Load with pdf.js
const reader = new FileReader();
reader.onload = async function(e) {
const typedarray = new Uint8Array(e.target.result);
const loadingTask = window['pdfjsLib'].getDocument({ data: typedarray });
loadingTask.promise.then(async function(pdf) {
let allText = '';
for(let pageNum=1; pageNum<=pdf.numPages; pageNum++) {
const page = await pdf.getPage(pageNum);
const textContent = await page.getTextContent();
// Concatenate all text items with spaces
const pageText = textContent.items.map(i => i.str).join(' ');
allText += pageText + '\n';
}
// Split and store segments
pdfChunks = splitText(allText);
pdfLoaded = true;
pdfInfo.innerHTML = `<span class="pdf-preview">Dokumen <b>${pdfFileName}</b> dimuat (${pdfChunks.length} potongan dokumen).</span>`;
}, function(error) {
pdfInfo.innerHTML = '<div class="text-danger">Gagal membuka PDF. Pastikan PDF tidak rusak.</div>';
});
};
reader.readAsArrayBuffer(file);
});
// Chat logic
const messagesDiv = document.getElementById('messages');
const chatForm = document.getElementById('chat-form');
const msgInput = document.getElementById('msg-input');
function addMsg(text, who='user', loading=false) {
let msgDiv = document.createElement('div');
msgDiv.className = 'msg-' + (who === 'user' ? 'user' : 'bot');
let bubble = document.createElement('span');
bubble.className = 'msg-bubble';
if (loading) {
bubble.innerHTML = '<span class="loading-dot"></span><span class="loading-dot"></span><span class="loading-dot"></span>';
} else {
// Untuk bot: deteksi baris referensi
if (who === "bot" && text._ref) {
bubble.innerHTML = `${text.res}<br/><span style="color:#2847d2;font-size:90%;">[Referensi dokumen]</span>`;
} else {
bubble.textContent = text;
}
}
msgDiv.appendChild(bubble);
messagesDiv.appendChild(msgDiv);
messagesDiv.scrollTop = messagesDiv.scrollHeight;
return msgDiv;
}
// Simple "RAG" (Retrieve-and-Generate) logic: Cosine similarity by word overlap
function simpleSimilarity(q, chunk) {
// Convert to lower case and bag-of-words
let qSet = new Set(q.toLowerCase().split(/\W+/));
let cSet = new Set(chunk.toLowerCase().split(/\W+/));
// Count overlap
let inter = new Set([...qSet].filter(x => cSet.has(x)));
return inter.size / (Math.sqrt(qSet.size) * Math.sqrt(cSet.size) + 1e-5);
}
// Generate chatbot answer
async function chatBotAnswer(userMsg) {
if (!pdfLoaded || pdfChunks.length == 0) {
return "Silakan unggah dokumen PDF terlebih dahulu.";
}
// Cari 1-2 potongan dokumen terdekat berdasarkan similarity
let sims = pdfChunks.map(chunk => simpleSimilarity(userMsg, chunk));
let maxIdx = sims.indexOf(Math.max(...sims));
// Ambil 1 atau 2 potongan dengan similarity tertinggi jika cukup tinggi
let simsSorted = [...sims].map((v,i)=>[v,i]).sort( (a,b)=>b[0]-a[0] );
let ctxs = [];
if (simsSorted[0][0] > 0.07) ctxs.push(pdfChunks[simsSorted[0][1]]);
if (simsSorted[1] && simsSorted[1][0] > 0.07) ctxs.push(pdfChunks[simsSorted[1][1]]);
if (ctxs.length == 0) {
return "Maaf, saya tidak menemukan jawaban dalam dokumen.";
}
// Buat jawaban dengan sedikit parafrase
let baseResp = "Berdasarkan dokumen, berikut adalah informasi terkait:\n";
for (let ctx of ctxs) {
baseResp += '- "' + ctx.substr(0,260).replace(/\n/g,' ') + ((ctx.length>260)?'..."':'') + '"\n';
}
return {res: baseResp, _ref: true};
}
chatForm.addEventListener('submit', async function(e) {
e.preventDefault();
let userMsg = msgInput.value.trim();
if (!userMsg) return;
addMsg(userMsg, 'user');
msgInput.value = '';
// Show loading
let botMsgDiv = addMsg('', 'bot', true);
// Wait for "processing"
setTimeout(async () => {
let botAnswer = await chatBotAnswer(userMsg);
botMsgDiv.querySelector('.msg-bubble').innerHTML = '';
if (typeof botAnswer === "string") {
botMsgDiv.querySelector('.msg-bubble').textContent = botAnswer;
} else {
botMsgDiv.querySelector('.msg-bubble').innerHTML = botAnswer.res.replace(/\n/g,'<br/>') + `<br/><span style="color:#2847d2;font-size:90%;">[Referensi dokumen]</span>`;
}
messagesDiv.scrollTop = messagesDiv.scrollHeight;
}, 800);
});
// Enter = submit
msgInput.addEventListener('keydown', function(e) {
if (e.key === 'Enter' && !e.shiftKey) {
e.preventDefault();
chatForm.dispatchEvent(new Event('submit'));
}
});
</script>
</body>
</html>
```
Drag to resize