import { Question } from './type'; // Adjust this import to your file structure
import sanitizeHtml from 'sanitize-html';
import DOMPurify from 'dompurify';
import { MathJax } from 'better-react-mathjax';
import { Box } from '@mui/material';

/**
 * Extract multiple-choice questions (MCQs) from HTML content.
 * @param html - The HTML string content extracted from DOCX.
 * @returns An array of Question objects.
 */

//Example of text from DOCX file
/*
ĐỀ SỐ 1
PHẦN I. Câu trắc nghiệm nhiều phương án lựa chọn. Thí sinh trả lời từ câu 1 đến câu 18. Mỗi câu hỏi thí sinh chỉ chọn một phương án.
Câu 1: Nội dung câu 1.
A. Câu a.	B. Câu b.	C. Câu c.	D. Câu d.
Hướng dẫn
Chọn A
PHẦN II. Câu trắc nghiệm đúng sai. Thí sinh trả lời từ câu 1 đến câu 4. Trong mỗi ý a), b), c), d) ở mỗi câu, thí sinh chọn đúng hoặc sai.
Câu 1: Nội dung câu 1.
a) Câu a. 
b) Câu b. 
c) Câu c.
d) Câu d.
Hướng dẫn
Giải thích câu a ⇒ a) Sai
Giải thích câu b ⇒ b) Đúng
Giải thích câu c ⇒ c) Đúng
Giải thích câu d ⇒ d) Sai
PHẦN III. Câu trắc nghiệm trả lời ngắn. Thí sinh trả lời từ câu 1 đến câu 6
Câu 1: Nội dung câu 1.
Hướng dẫn
Giải thích câu 1
Trả lời ngắn: 0,01
*/

//Example of html from DOCX file
/*
<p><span style="font-weight: bold; ">ĐỀ SỐ 1</span></p>
<p><span style="font-weight: bold; ">PHẦN I. Câu trắc nghiệm nhiều phương án lựa chọn.</span><span style=""> Thí sinh trả lời từ câu 1 đến câu 18. Mỗi câu hỏi thí sinh chỉ chọn một phương án.</span></p>
<p><span style="font-weight: bold; ">Câu 1:</span><span style=""> Nội dung câu 1.</span></p>
<p><span style="font-weight: bold; ">A.</span><span style=""> Câu a.</span><span style="font-weight: bold; ">B.</span><span style="font-style: italic; "> </span><span style="">Câu b.</span><span style="font-weight: bold; ">C.</span><span style=""> Câu c.</span><span style="font-weight: bold; ">D.</span><span style=""> Câu d.</span></p>
<p><span style="font-weight: bold; ">Hướng dẫn</span></p>
<p><span style="font-weight: bold; ">Chọn A</span></p>
<p><span style="font-weight: bold; ">PHẦN II. Câu trắc nghiệm đúng sai. </span><span style="">Thí sinh trả lời từ câu 1 đến câu 4. Trong mỗi ý </span><span style="font-weight: bold; ">a), b), c), d)</span><span style=""> ở mỗi câu, thí sinh chọn đúng hoặc sai.</span></p>
<p><span style="font-weight: bold; ">Câu 1: </span><span style="">Nội dung câu 1.</span></p>
<p><span style="font-weight: bold; ">a)</span><span style=""> Câu a. </span></p>
<p><span style="font-weight: bold; ">b)</span><span style=""> Câu b. </span></p>
<p><span style="font-weight: bold; ">c)</span><span style=""> Câu c.</span></p>
<p><span style="font-weight: bold; ">d)</span><span style=""> Câu d.</span></p>
<p><span style="font-weight: bold; ">Hướng dẫn</span></p>
<p><span style="">Giải thích câu a ⇒</span><span style="font-weight: bold; "> a) Sai</span></p>
<p><span style="">Giải thích câu b ⇒</span><span style="font-weight: bold; "> b) Đúng</span></p>
<p><span style="">Giải thích câu c ⇒</span><span style="font-weight: bold; "> c) Đúng</span></p>
<p><span style="">Giải thích câu d ⇒</span><span style="font-weight: bold; "> d) Sai</span></p>
<p><span style="font-weight: bold; ">PHẦN III. Câu trắc nghiệm trả lời ngắn. </span><span style="">Thí sinh trả lời từ câu 1 đến câu 6</span></p>
<p><span style="font-weight: bold; ">Câu 1:</span><span style=""> Nội dung câu 1.</span></p>
<p><span style="font-weight: bold; ">Hướng dẫn</span></p>
<p><span style="">Giải thích câu 1</span></p>
<p><span style="font-weight: bold; ">Trả lời ngắn:</span><span style=""> 0,01</span></p>
*/

let pendingImageUrl = ''; // Store the URL of the pending image for the next question
let pendingExplainImageUrl = ''; // Store the URL of the pending image for the next question

export const extractQuestions = (html: string): Question[] => {
  const questions: Question[] = [];

  // Regex patterns to match sections, questions, options, and answers
  const sectionNumberPattern = /PHẦN\s(\d+)/g; // PHẦN 1, PHẦN 2, PHẦN 3, ...
  const sectionIPattern = /PHẦN\s([IVXLCDM]+)/g; // PHẦN I, PHẦN II, PHẦN III, ...
  const questionPattern = /Câu\s(\d+):\s*([\s\S]*?)(?=(?:Câu\s\d+:|$))/g; // Câu 1: ... Câu 2: ...
  const optionMCQPattern = /([ABCD])\.\s*([\s\S]*?)(?=(?:[ABCD]\.|$))/g; // A. ... B. ... C. ... D. ...
  const optionMCTFPattern = /([abcd])\)\s*([\s\S]*?)(?=(?:[abcd]\)|$))/g; // a) ... b) ... c) ... d) ...
  const answerMCQPattern = /Chọn\s*([ABCD])/; // Chọn A, Chọn B, Chọn C, Chọn D
  const answerMCTFPattern = /([abcd])\)\s*(Đúng|Sai)/; // a) Đúng, b) Sai, c) Đúng, d) Sai
  const answerSAPattern = /Trả lời ngắn:\s*([\s\S]*)/; // Trả lời ngắn: ...
  const explainPattern = /Hướng dẫn\s*([\s\S]*?)(?=(?:Câu\s\d+:|$))/g; // Hướng dẫn ... (Answer content)

  let currentSection = '';
  let match;

  // Combine both section patterns (numbers and Roman numerals) to find sections
  let sectionMatches = [
    ...html.matchAll(sectionNumberPattern),
    ...html.matchAll(sectionIPattern),
  ];

  // Sort sections based on their appearance in the HTML content
  sectionMatches.sort((a, b) => (a.index ?? 0) - (b.index ?? 0));

  // If no sections are found, treat the entire HTML as one large section
  if (sectionMatches.length === 0) {
    currentSection = '';
    extractQuestionsFromContent(
      html,
      questions,
      currentSection,
      questionPattern,
      optionMCQPattern,
      optionMCTFPattern,
      answerMCQPattern,
      answerMCTFPattern,
      answerSAPattern,
      explainPattern
    );
  } else {
    // Loop through each section found in the HTML content
    sectionMatches.forEach((sectionMatch, sectionIndex) => {
      currentSection = sectionMatch[0]; // e.g., "PHẦN I" or "PHẦN 1"

      // Find all questions after the current section
      let sectionEndIndex =
        sectionIndex < sectionMatches.length - 1
          ? sectionMatches[sectionIndex + 1].index
          : html.length;

      let sectionContentAll = html.slice(
        sectionMatch.index ?? 0,
        sectionEndIndex
      );

      let rollbackText = findRollBackTag(
        html,
        html.lastIndexOf(sectionMatch[0])
      );

      sectionContentAll = rollbackText + sectionContentAll;

      // Extract questions within the current section
      extractQuestionsFromContent(
        sectionContentAll,
        questions,
        currentSection,
        questionPattern,
        optionMCQPattern,
        optionMCTFPattern,
        answerMCQPattern,
        answerMCTFPattern,
        answerSAPattern,
        explainPattern
      );
    });
  }

  return questions;
};

/**
 * Finds the closing tag from the given position and extracts the content after it until the original position.
 *
 * @param {string} questionText - The full text of the question.
 * @param {number} rollbackPos - The current position to start rolling back from.
 * @returns {string} - The text content found after the closing tag until the original rollback position.
 */
const findRollBackTag = (questionText: string, rollbackPos: number) => {
  let rollbackText = '';
  const originalPos = rollbackPos; // Store the original position for slicing content

  // Roll back through the text to find a closing tag
  while (rollbackPos > 0) {
    // Look for closing tag '</'
    if (
      questionText[rollbackPos] === '<' &&
      questionText[rollbackPos + 1] === '/'
    ) {
      // Move forward to find the closing '>' of the tag
      let rollbackPos2 = rollbackPos;
      while (rollbackPos2 < questionText.length) {
        if (questionText[rollbackPos2] === '>') {
          // Slice the content from after the closing '>' to the original position
          rollbackText = questionText.slice(rollbackPos2 + 1, originalPos);
          break;
        }
        rollbackPos2++;
      }
      break; // Exit the outer loop once the closing tag is found
    }
    rollbackPos--;
  }

  return rollbackText;
};

/**
 * Extracts questions from the given section or entire content.
 */
const extractQuestionsFromContent = (
  content: string,
  questions: Question[],
  currentSection: string,
  questionPattern: RegExp,
  optionMCQPattern: RegExp,
  optionMCTFPattern: RegExp,
  answerMCQPattern: RegExp,
  answerMCTFPattern: RegExp,
  answerSAPattern: RegExp,
  explainPattern: RegExp
) => {
  // Extract the section content (the part before the first question)
  let sectionContentMatch = content.split(questionPattern)[0];
  const sectionTrim = sectionContentMatch.replace(currentSection, '').trim();

  checkImageAtTheEnd(sectionTrim);

  // Exclude the section title from the section content
  let sectionContent = cleanHTMLTags(sectionTrim);

  let match;
  while ((match = questionPattern.exec(content)) !== null) {
    const index = parseInt(match[1], 10); // Extract question number (e.g., "Câu 1:")
    let questionText = match[2].trim(); // Extract the question text

    let options: { [key: string]: string } = { A: '', B: '', C: '', D: '' };

    let explain = ''; // Store the explanation for the answer
    let explainWithImage = ''; // Store the explanation with image for the answer

    // Find the explanation first (if any), and remove it from the question text
    const explainMatch = questionText.match(explainPattern);

    if (explainMatch) {
      explain = explainMatch[0].trim();
      // Add "<span style="font-weight: bold; ">" to the beginning of the explain text
      //explain = '<span style="font-weight: bold;">' + explain;

      //Rollback to the previous position until find any closing tag, then add from after that position to the end of the string to the beginning of the explain text
      let rollbackText = findRollBackTag(
        questionText,
        questionText.lastIndexOf(explainMatch[0])
      );

      explain = rollbackText + explain;
      questionText = questionText.replace(explain, '').trim();

      explainWithImage = explain;
      explain = cleanHTMLTags(explain, true);
    } else {
      // Throw an error if the explanation is not found
      throw new Error(
        `Hướng dẫn không được tìm thấy cho câu hỏi ${index} trong phần ${currentSection}`
      );
    }

    // Find the correct answer in the explanation text and get the type of question based on the answer
    const correctAnswerMatchMCQ = explain.match(answerMCQPattern);
    const correctAnswerMatchMCTF = explain.match(answerMCTFPattern);
    const correctAnswerMatchSA = explain.match(answerSAPattern);

    let correctAnswer = '';
    let questionType: 'mcq' | 'mctf' | 'sa' = 'mcq'; // Assume multiple choice for now

    if (correctAnswerMatchMCQ) {
      correctAnswer = correctAnswerMatchMCQ[1].trim();
      questionType = 'mcq';
    } else if (correctAnswerMatchMCTF) {
      // find all 4 match and add '1' or '0' to the correct answer
      let copyExplain = explain;
      while (true) {
        const nextMatch = copyExplain.match(answerMCTFPattern);
        if (nextMatch) {
          correctAnswer += nextMatch[2] === 'Đúng' ? '1' : '0';
          copyExplain = copyExplain.replace(nextMatch[0], '').trim();
        } else {
          break;
        }
      }

      questionType = 'mctf';
    } else if (correctAnswerMatchSA) {
      // Remove all the tags in the answer, only keep the text
      correctAnswer = stripHTMLTags(correctAnswerMatchSA[1]);
      questionType = 'sa';
    } else {
      // Throw an error if the correct answer is not found
      throw new Error(
        `Câu trả lời không được tìm thấy cho câu hỏi ${index} trong phần ${currentSection}`
      );
    }

    // Find the options A, B, C, D for each question based on the question type
    let optionMatch;

    if (questionType === 'mcq') {
      while ((optionMatch = optionMCQPattern.exec(questionText)) !== null) {
        const optionLabel = optionMatch[1]; // e.g., "A", "B", "C", "D"
        let optionText = optionMatch[2].trim(); // Extract the text of the option

        options[optionLabel] = cleanHTMLTags(optionText);
      }
    } else if (questionType === 'mctf') {
      while ((optionMatch = optionMCTFPattern.exec(questionText)) !== null) {
        const optionLabel = optionMatch[1]; // e.g., "a", "b", "c", "d"
        let optionText = optionMatch[2].trim(); // Extract the text of the option

        options[optionLabel.toUpperCase()] = cleanHTMLTags(optionText);
      }
    } else {
      // For short answer questions, there are no options
      options = { A: '', B: '', C: '', D: '' };
    }

    // Extract the question text by splitting the question text before the first option based on the question type
    let extractQuestionText = '';

    if (questionType === 'mcq') {
      extractQuestionText = cleanHTMLTags(
        questionText.split(/[ ]?[A-D]\./)[0].trim()
      );
    } else if (questionType === 'mctf') {
      // a) ... b) ... c) ... d) ...
      extractQuestionText = cleanHTMLTags(
        questionText.split(/[ ]?[abcd]\)/)[0].trim()
      );
    } else {
      extractQuestionText = cleanHTMLTags(questionText);
    }

    let questionImageUrl = '';
    let explainImageUrl = '';

    //If pendingImageUrl is not empty, assign it to the questionImageUrl
    if (pendingImageUrl) {
      questionImageUrl = pendingImageUrl;
      pendingImageUrl = '';
    } else {
      // Check for any image within the question text
      const questionImageMatch = questionText.match(/<img[^>]+src="([^">]+)"/);
      questionImageUrl = questionImageMatch ? questionImageMatch[1] : '';
    }

    let questionImage: File | null = null;
    if (questionImageUrl.startsWith('data:image')) {
      const mimeType =
        questionImageUrl.match(/data:([^;]+)/)?.[1] || 'image/jpeg'; // Extract MIME type
      questionImage = base64ToFile(
        questionImageUrl,
        'questionImage.jpeg',
        mimeType
      );
    }

    checkImageAtTheEnd(questionText, true); // Check for any image at the end of the question text and assign it to pendingExplainImageUrl

    if (pendingExplainImageUrl) {
      explainImageUrl = pendingExplainImageUrl;
      pendingExplainImageUrl = '';
    } else {
      // Check for any image within the explanation text
      const explainImageMatch = explain.match(/<img[^>]+src="([^">]+)"/);
      explainImageUrl = explainImageMatch ? explainImageMatch[1] : '';
    }

    // Convert Base64 explanation image URL to a File object if it's a data URL
    let explainImage: File | null = null;
    if (explainImageUrl.startsWith('data:image')) {
      const mimeType =
        explainImageUrl.match(/data:([^;]+)/)?.[1] || 'image/jpeg';
      explainImage = base64ToFile(
        explainImageUrl,
        'explainImage.jpeg',
        mimeType
      );
    }

    checkImageAtTheEnd(explainWithImage); // Check for any image at the end of the explanation text and assign it to pendingImageUrl

    // Create the question object
    const question: Question = {
      questionId: `q_${index}_${currentSection}`, // Generate a unique ID
      section: currentSection || '', // Section of the question
      sectionContent: sectionContent, // Section content (if any)
      index,
      type: questionType, // Assume multiple choice for now
      correctAnswer, // The extracted correct answer
      question: extractQuestionText, // Extract the part before options
      questionImage: questionImage,
      questionImageUrl: questionImageUrl || '', // Attach image URL (if any)
      explain: explain, // Store answer here
      explainImage: explainImage,
      explainImageUrl: '', // No answer URL for now
      questionA: options.A,
      questionB: options.B,
      questionC: options.C,
      questionD: options.D,
    };

    // Add the question to the list
    questions.push(question);
  }
};

function base64ToFile(
  base64String: string,
  fileName: string,
  mimeType: string
): File {
  const byteString = atob(base64String.split(',')[1]); // Decode base64 string (remove data URL part)
  const ab = new ArrayBuffer(byteString.length);
  const ia = new Uint8Array(ab);
  for (let i = 0; i < byteString.length; i++) {
    ia[i] = byteString.charCodeAt(i);
  }
  return new File([ab], fileName, { type: mimeType });
}

/**
 * Clean and fix HTML tags to ensure proper formatting, and add <p> tags if necessary.
 */
const cleanHTMLTags = (content: string, withptag: boolean = false): string => {
  const allowedTags = [
    'math',
    'mn',
    'mo',
    'mi',
    'mrow',
    'msup',
    'msub',
    'sup',
    'sub',
    'mfrac',
    'msqrt',
    'mtext',
    'munder',
    'mover',
    'mspace',
    'span',
  ];

  // Add <p> tag if withptag is true
  if (withptag) {
    allowedTags.push('p');
  }

  return DOMPurify.sanitize(content, {
    ALLOWED_TAGS: allowedTags, // Allow math-related tags and <p> if specified
    ALLOWED_ATTR: [
      'style',
      'display',
      'xmlns',
      'class',
      'mathvariant',
      'data-*', // Allow custom data attributes if any
    ], // Allow essential attributes for math and style
    KEEP_CONTENT: true, // Ensure that no child content is stripped out
  });
};

// Remove all HTML tags from the content
const stripHTMLTags = (content: string): string => {
  return sanitizeHtml(content, {
    allowedTags: [],
    allowedAttributes: {},
  });
};

// Remove all HTML tags, just keep the text content and the img tag
const stripHTMLTagsAndImg = (content: string): string => {
  return sanitizeHtml(content, {
    allowedTags: ['img'], // Allow img tag
    allowedAttributes: {
      img: ['src', 'alt'], // Allow specific attributes for img
    },
  });
};

function processMathContent(htmlContent: string, splitTag: string): string {
  // Match all math spans in the HTML content
  const mathContent = htmlContent.match(
    /<span class="math">([\s\S]*?)<\/span>/g
  );

  // Set a threshold for when the math content is considered "too long"
  const maxLength = 24;

  if (mathContent) {
    mathContent.forEach((content) => {
      // Remove HTML tags to calculate the pure math content length
      const plainTextContent = content
        .replace(/<[^>]+>/g, '')
        .replace(/&nbsp;/g, '');

      // Check if the content length exceeds the threshold
      if (plainTextContent.length <= maxLength) {
        // If the content is not too long, skip splitting and move to the next item
        return;
      }

      let newContent = '';

      // Split the content using the provided splitTag
      const splitContent = content.split(splitTag);

      // Loop through each part except the last, and append the splitTag along with custom tags
      for (let i = 0; i < splitContent.length - 1; i++) {
        const leftSide = splitContent[i];

        // Properly restructure the new content with span and math tags
        newContent += `${leftSide}<mspace width="5px"/></math></span><span class="math"><math>${splitTag}`;
      }

      // Add the last part (after the final split tag)
      const lastContent = splitContent[splitContent.length - 1];
      newContent += lastContent;

      // Replace the original content in htmlContent with the updated structure
      htmlContent = htmlContent.replace(content, newContent);
    });
  }

  return htmlContent;
}

// Function to render HTML with sanitization
export const renderHTMLContent = (htmlContent: string) => {
  htmlContent = processMathContent(htmlContent, '<mo>=</mo>');

  return (
    // <Box
    //   sx={{
    //     overflow: 'auto',
    //     '&::-webkit-scrollbar': {
    //       height: '8px', // Set the height of the horizontal scrollbar
    //     },
    //   }}
    // >
    <MathJax inline={true}>
      <span
        className="mathjax"
        dangerouslySetInnerHTML={{
          __html: htmlContent,
        }}
      />
    </MathJax>
    //</Box>
  );
};

// Function to render HTML with sanitization
export const renderHTMLContent2 = (htmlContent: string) => {
  htmlContent = processMathContent(htmlContent, '<mo>=</mo>');

  return (
    <Box
      sx={{
        overflow: 'auto',
        '&::-webkit-scrollbar': {
          height: '8px', // Set the height of the horizontal scrollbar
        },
      }}
    >
      <MathJax inline={true}>
        <span
          className="mathjax"
          dangerouslySetInnerHTML={{
            __html: htmlContent,
          }}
        />
      </MathJax>
    </Box>
  );
};

const checkImageAtTheEnd = (
  sectionTrim: string,
  isForExpain: boolean = false
): void => {
  const sectionImageMatch = sectionTrim.match(/<img[^>]+src="([^">]+)"/);

  if (sectionImageMatch) {
    const sectionTrimEnd = sectionTrim.slice(
      sectionTrim.lastIndexOf(sectionImageMatch[0])
    );

    if (stripHTMLTags(sectionTrimEnd).length === 0) {
      if (isForExpain) {
        pendingExplainImageUrl = sectionImageMatch[1];
      } else {
        pendingImageUrl = sectionImageMatch[1];
      }
    }
  }
};
