import * as pdfjs from 'pdfjs-dist';
import { RenderParameters, TextItem } from 'pdfjs-dist/types/src/display/api';

import { scanTextFromImage } from './scanner';

pdfjs.GlobalWorkerOptions.workerSrc = new URL(
  'pdfjs-dist/legacy/build/pdf.worker.mjs',
  import.meta.url,
).toString();

export async function convertPdfToImages(pdfData: Uint8Array): Promise<File[]> {
  const pdf = await pdfjs.getDocument({ data: pdfData }).promise;

  const images: File[] = [];
  for (let i = 1; i <= pdf.numPages; i++) {
    const page = await pdf.getPage(i);
    const scale = 1.5;
    const viewport = page.getViewport({ scale });
    const canvas = document.createElement('canvas');
    const context = canvas.getContext('2d');
    canvas.height = viewport.height;
    canvas.width = viewport.width;
    const renderContext = {
      canvasContext: context,
      viewport,
    };
    try {
      await page.render(renderContext as RenderParameters).promise;
      images.push(await canvasToFile(canvas, 'temporaryFileName', 'image/jpeg'));
    } catch (_) {} // just keep processing the other pages so we always return an array
  }
  return images;
}

function canvasToFile(
  canvas: HTMLCanvasElement,
  filename: string,
  mimeType: string,
): Promise<File> {
  return new Promise((resolve, reject) => {
    canvas.toBlob((blob) => {
      if (!blob) {
        reject(new Error('Failed to convert canvas to Blob'));
        return;
      }
      const file = new File([blob], filename, { type: mimeType });
      resolve(file);
    }, mimeType);
  });
}

export async function extractTextFromPDF(file: File): Promise<string> {
  const reader = new FileReader();

  return new Promise((resolve, reject) => {
    reader.onload = async function () {
      try {
        if (!this.result) {
          return '';
        }
        const pdfData = new Uint8Array(this.result as ArrayBuffer);
        const pdfDataCopy = new Uint8Array(pdfData);

        const pdf = await pdfjs.getDocument({ data: pdfData }).promise;
        const maxPages = pdf.numPages;
        let textContent = '';

        for (let pageNum = 1; pageNum <= maxPages; pageNum++) {
          const page = await pdf.getPage(pageNum);
          const textContentItems = await page.getTextContent();
          textContentItems.items.forEach((item) => {
            textContent += (item as TextItem).str + ' ';
          });

          if (textContentItems.items.length === 0) {
            const imageFiles = await convertPdfToImages(pdfDataCopy);
            if (imageFiles[0]) {
              textContent = await scanTextFromImage(imageFiles[0]); // first page is enough for most receipts
            }
          }
        }

        resolve(textContent);
      } catch (error) {
        reject(error);
      }
    };

    reader.onerror = function (error) {
      reject(error);
    };

    reader.readAsArrayBuffer(file);
  });
}
