Data Extraction from Documents Tool
Paste Document Text Here
Extracted Data
Results will appear here after extraction.
Please paste some document text to extract.
'; downloadPdfBtn.classList.add('hidden'); return; } // Simulated Extraction Logic let extracted = { keyValuePairs: [], tables: [] }; // Key-Value Pair Extraction (using regex) const patterns = { "Invoice Number": /Invoice Number:\s*(\S+)/i, "Date": /Date:\s*(\S+)/i, "Due Date": /Due Date:\s*(\S+)/i, "Project": /Project:\s*(.+)/i, "Subtotal": /Subtotal:\s*(\S+)/i, "Tax": /Tax.*:\s*(\S+)/i, "Total": /^TOTAL:\s*(\S+)/im, "Client": /Bill To:\s*([^\n]+)/i }; for (const key in patterns) { const match = text.match(patterns[key]); if (match && match[1]) { extracted.keyValuePairs.push({ key, value: match[1].trim() }); } } // Table Extraction (simple markdown-like table) const tableRegex = /Description\s*\|.*?\n-+\s*\|.*?\n((?:.*\|.*\n)+)/i; const tableMatch = text.match(tableRegex); if (tableMatch && tableMatch[1]) { const headers = ["Description", "Quantity", "Unit Price", "Total"]; const rows = tableMatch[1].trim().split('\n').map(row => { return row.split('|').map(cell => cell.trim()); }); extracted.tables.push({ title: "Line Items", headers, rows }); } displayResults(extracted); }; /** * Renders the extracted data in the results panel. * @param {object} data - The extracted data object. */ const displayResults = (data) => { let html = ''; if (data.keyValuePairs.length > 0) { html += 'Key Information
';
data.keyValuePairs.forEach(pair => {
html += `
';
}
if (data.tables.length > 0) {
data.tables.forEach(table => {
html += `
${pair.key}:
${pair.value}
`;
});
html += '${table.title}
`; html += '| ${header} | `; }); html += '
|---|
| ${cell} | `; }); html += '
Could not extract structured data. Please check the text format.
'; downloadPdfBtn.classList.add('hidden'); } else { downloadPdfBtn.classList.remove('hidden'); } resultsOutput.innerHTML = html; }; /** * Generates and downloads a PDF of the extracted data. */ const generatePdf = () => { const { jsPDF } = window.jspdf; const doc = new jsPDF(); let yPos = 20; doc.setFontSize(18); doc.text('Extracted Document Data', 14, yPos); yPos += 15; // Key-Value Pairs const kvpSection = resultsOutput.querySelector('h3'); if (kvpSection && kvpSection.textContent === 'Key Information') { doc.setFontSize(14); doc.setFont(undefined, 'bold'); doc.text('Key Information', 14, yPos); yPos += 8; const pairs = Array.from(kvpSection.nextElementSibling.children).map(div => { return [div.querySelector('strong').textContent, div.querySelector('span').textContent]; }); doc.autoTable({ startY: yPos, body: pairs, theme: 'plain', styles: { fontSize: 10, cellPadding: 1.5 }, columnStyles: { 0: { fontStyle: 'bold' } } }); yPos = doc.lastAutoTable.finalY + 10; } // Tables const tableEl = resultsOutput.querySelector('table'); if (tableEl) { const tableTitle = resultsOutput.querySelector('h3:last-of-type').textContent; doc.setFontSize(14); doc.setFont(undefined, 'bold'); doc.text(tableTitle, 14, yPos); yPos += 8; doc.autoTable({ html: tableEl, startY: yPos, theme: 'grid', headStyles: { fillColor: [243, 244, 246], textColor: [55, 65, 81], fontStyle: 'bold' }, styles: { cellPadding: 2.5, fontSize: 9 }, }); } doc.save('Extracted_Data.pdf'); }; // --- Event Listeners --- extractBtn.addEventListener('click', extractData); downloadPdfBtn.addEventListener('click', generatePdf); });