import { PDFDocumentProxy, PDFPageProxy, getDocument } from 'npm:pdfjs-dist'; export async function parsePDF(pdfPath: string): Promise { // Load the PDF file const loadingTask = getDocument(pdfPath); const pdf: PDFDocumentProxy = await loadingTask.promise; const numPages = pdf.numPages; const textContent: string[] = []; // Iterate over each page and extract text content for (let i = 1; i <= numPages; i++) { const page: PDFPageProxy = await pdf.getPage(i); const pageContent = await page.getTextContent(); // Extract text from the content items const pageText = pageContent.items.map(item => item.str).join(' '); textContent.push(pageText); } // Combine the text content from all pages return textContent.join('\n'); }