Chatbot with pdf RAG
Prompt
buatkan laman web chat-bot dengan fitur upload dokumen PDF sebagai RAG untuk chat respon.
A system prompt was added to support web rendering
```html <!DOCTYPE html> <html lang="id"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1"> <title>Chatbot PDF RAG Demo</title> <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css" rel="stylesheet"> <style> body { background: #f6f8fa; } .chat-container { max-width: 600px; margin: 40px auto; background: #fff; border-radius: 12px; box-shadow: 0 2px 16px rgba(0,0,0,0.06); padding:24px; } .messages { max-height: 300px; overflow-y: auto; margin-bottom: 16px; border: 1px solid #ececec; border-radius: 6px; padding: 12px; background: #f8fafd; } .msg-user { text-align: right; margin-bottom: 10px; } .msg-bot { text-align: left; margin-bottom: 10px; } .msg-bubble { display: inline-block; padding: 8px 14px; border-radius: 16px; max-width: 75%; word-break: break-word; font-size: 1rem; } .msg-user .msg-bubble { background: #2847d2; color: #fff ; border-bottom-right-radius: 2px; } .msg-bot .msg-bubble { background: #e5e7ef; color: #2c334d; border-bottom-left-radius: 2px; } .pdf-preview { margin-bottom:10px;font-size:0.97em;color:#555; background: #edeeff; padding:7px 13px; border-radius:6px; display: inline-block; } .loading-dot { display: inline-block; width: 8px; height: 8px; border-radius: 50%; background: #2847d2; margin: 0 1px; animation: bounce 1s infinite;} @keyframes bounce { 0%,80%,100% {transform:scale(1);} 40% {transform:scale(1.5);} } </style> <!-- pdf.js --> <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/4.2.67/pdf.min.js"></script> </head> <body> <div class="chat-container"> <h4 class="mb-4">Chatbot RAG PDF</h4> <div> <label for="pdf-upload" class="form-label">Unggah dokumen PDF:</label> <input class="form-control mb-2" type="file" accept="application/pdf" id="pdf-upload" /> <div id="pdf-info"></div> </div> <hr class="mb-3 mt-3"/> <div class="messages" id="messages"></div> <form id="chat-form" autocomplete="off" class="d-flex mt-2"> <input id="msg-input" type="text" class="form-control me-2" placeholder="Tulis pertanyaan..."/> <button class="btn btn-primary" type="submit">Kirim</button> </form> <div class="form-text mt-2"> * Semua proses berjalan di browser.<br> Untuk demo, jawaban chatbot hanya berdasarkan dokumen yang diunggah. </div> </div> <script> // Helper: Simple text splitter by page and by para/line function splitText(text) { // Split by paragraph or every ~300 chars let chunks = []; let buffer = ''; let lines = text.split('\n'); for (let line of lines) { if (line.trim().length === 0) continue; if ((buffer.length + line.length) > 300) { chunks.push(buffer); buffer = line; } else { buffer += (buffer.length > 0 ? ' ' : '') + line; } } if (buffer.trim().length > 0) chunks.push(buffer); return chunks; } // Global state let pdfChunks = []; let pdfFileName = ""; let pdfLoaded = false; // PDF Upload and parsing const pdfUpload = document.getElementById('pdf-upload'); const pdfInfo = document.getElementById('pdf-info'); pdfUpload.addEventListener('change', async function() { pdfInfo.innerHTML = ''; pdfChunks = []; pdfLoaded = false; if (!this.files || !this.files[0]) return; const file = this.files[0]; if (file.type !== 'application/pdf') { pdfInfo.innerHTML = '<div class="text-danger">File bukan PDF.</div>'; return; } pdfFileName = file.name; pdfInfo.innerHTML = `<span class="pdf-preview">Memproses <b>${file.name}</b>...</span>`; // Load with pdf.js const reader = new FileReader(); reader.onload = async function(e) { const typedarray = new Uint8Array(e.target.result); const loadingTask = window['pdfjsLib'].getDocument({ data: typedarray }); loadingTask.promise.then(async function(pdf) { let allText = ''; for(let pageNum=1; pageNum<=pdf.numPages; pageNum++) { const page = await pdf.getPage(pageNum); const textContent = await page.getTextContent(); // Concatenate all text items with spaces const pageText = textContent.items.map(i => i.str).join(' '); allText += pageText + '\n'; } // Split and store segments pdfChunks = splitText(allText); pdfLoaded = true; pdfInfo.innerHTML = `<span class="pdf-preview">Dokumen <b>${pdfFileName}</b> dimuat (${pdfChunks.length} potongan dokumen).</span>`; }, function(error) { pdfInfo.innerHTML = '<div class="text-danger">Gagal membuka PDF. Pastikan PDF tidak rusak.</div>'; }); }; reader.readAsArrayBuffer(file); }); // Chat logic const messagesDiv = document.getElementById('messages'); const chatForm = document.getElementById('chat-form'); const msgInput = document.getElementById('msg-input'); function addMsg(text, who='user', loading=false) { let msgDiv = document.createElement('div'); msgDiv.className = 'msg-' + (who === 'user' ? 'user' : 'bot'); let bubble = document.createElement('span'); bubble.className = 'msg-bubble'; if (loading) { bubble.innerHTML = '<span class="loading-dot"></span><span class="loading-dot"></span><span class="loading-dot"></span>'; } else { // Untuk bot: deteksi baris referensi if (who === "bot" && text._ref) { bubble.innerHTML = `${text.res}<br/><span style="color:#2847d2;font-size:90%;">[Referensi dokumen]</span>`; } else { bubble.textContent = text; } } msgDiv.appendChild(bubble); messagesDiv.appendChild(msgDiv); messagesDiv.scrollTop = messagesDiv.scrollHeight; return msgDiv; } // Simple "RAG" (Retrieve-and-Generate) logic: Cosine similarity by word overlap function simpleSimilarity(q, chunk) { // Convert to lower case and bag-of-words let qSet = new Set(q.toLowerCase().split(/\W+/)); let cSet = new Set(chunk.toLowerCase().split(/\W+/)); // Count overlap let inter = new Set([...qSet].filter(x => cSet.has(x))); return inter.size / (Math.sqrt(qSet.size) * Math.sqrt(cSet.size) + 1e-5); } // Generate chatbot answer async function chatBotAnswer(userMsg) { if (!pdfLoaded || pdfChunks.length == 0) { return "Silakan unggah dokumen PDF terlebih dahulu."; } // Cari 1-2 potongan dokumen terdekat berdasarkan similarity let sims = pdfChunks.map(chunk => simpleSimilarity(userMsg, chunk)); let maxIdx = sims.indexOf(Math.max(...sims)); // Ambil 1 atau 2 potongan dengan similarity tertinggi jika cukup tinggi let simsSorted = [...sims].map((v,i)=>[v,i]).sort( (a,b)=>b[0]-a[0] ); let ctxs = []; if (simsSorted[0][0] > 0.07) ctxs.push(pdfChunks[simsSorted[0][1]]); if (simsSorted[1] && simsSorted[1][0] > 0.07) ctxs.push(pdfChunks[simsSorted[1][1]]); if (ctxs.length == 0) { return "Maaf, saya tidak menemukan jawaban dalam dokumen."; } // Buat jawaban dengan sedikit parafrase let baseResp = "Berdasarkan dokumen, berikut adalah informasi terkait:\n"; for (let ctx of ctxs) { baseResp += '- "' + ctx.substr(0,260).replace(/\n/g,' ') + ((ctx.length>260)?'..."':'') + '"\n'; } return {res: baseResp, _ref: true}; } chatForm.addEventListener('submit', async function(e) { e.preventDefault(); let userMsg = msgInput.value.trim(); if (!userMsg) return; addMsg(userMsg, 'user'); msgInput.value = ''; // Show loading let botMsgDiv = addMsg('', 'bot', true); // Wait for "processing" setTimeout(async () => { let botAnswer = await chatBotAnswer(userMsg); botMsgDiv.querySelector('.msg-bubble').innerHTML = ''; if (typeof botAnswer === "string") { botMsgDiv.querySelector('.msg-bubble').textContent = botAnswer; } else { botMsgDiv.querySelector('.msg-bubble').innerHTML = botAnswer.res.replace(/\n/g,'<br/>') + `<br/><span style="color:#2847d2;font-size:90%;">[Referensi dokumen]</span>`; } messagesDiv.scrollTop = messagesDiv.scrollHeight; }, 800); }); // Enter = submit msgInput.addEventListener('keydown', function(e) { if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); chatForm.dispatchEvent(new Event('submit')); } }); </script> </body> </html> ```