import {
  type ListVariant,
  lists,
  listsByVariant,
} from '@blissbook/lib/document'
import some from 'lodash/some'

// Classes that look like lists
const classNames = [
  'Bullet',
  'MsoListParagraph',
  'MsoListParagraphCxSpFirst',
  'MsoListParagraphCxSpMiddle',
  'MsoListParagraphCxSpLast',
]

const htmlRegExps = [/mso-list/, /\[if !supportLists\]/]

function isListNode(node: HTMLElement) {
  const hasClass = classNames.some((className) =>
    node.classList.contains(className),
  )
  const hasListClass = some(node.classList, (className) =>
    className.match(/list/i),
  )
  const hasListHtml = htmlRegExps.some((regEx) => node.innerHTML.match(regEx))
  return hasClass || hasListClass || hasListHtml
}

const bullets = '··•●'

const listIdByVariant: Record<ListVariant, RegExp> = {
  bulleted: /^\S$/,
  numbered: /^\S+[.)]$/,
}

function getTextNodes(node: Node, textNodes: Text[] = []) {
  for (const childNode of node.childNodes) {
    if (childNode.nodeType === 3) {
      textNodes.push(childNode as Text)
    } else {
      getTextNodes(childNode, textNodes)
    }
  }
  return textNodes
}

function getFirstTextNode(node: HTMLElement) {
  const textNodes = getTextNodes(node)
  return textNodes.find((textNode) => textNode.textContent.trim().length > 0)
}

function getVariantFromTextNode(textNode: Text) {
  const text = textNode.textContent
  for (const [variant, regEx] of Object.entries(listIdByVariant)) {
    const match = regEx.exec(text)
    if (match) return variant
  }
}

// Get the list variant from this node
function getVariant(node: HTMLElement) {
  const textNode = getFirstTextNode(node)
  if (!textNode) return

  // Check for bullet text
  if (bullets.includes(textNode.textContent[0])) {
    textNode.textContent = textNode.textContent.slice(1)
    return 'bulleted'
  }

  // If MS Word list, use more forgiving regex
  const isList = isListNode(node)
  if (!isList) return

  // Determine the variant
  const variant = getVariantFromTextNode(textNode)
  if (variant) {
    textNode.parentNode.removeChild(textNode)
    return variant
  }
}

// Get the MS Word list indent
function getListIndent(node: HTMLElement) {
  const style = node.getAttribute('style')
  if (!style) return

  const levelMatch = style.replace(/\n+/g, '').match(/level(\d+)/)
  return levelMatch ? Number.parseInt(levelMatch[1], 10) - 1 : 0
}

// Convert paragraphs to lists
function convertParagraphs(document: Document) {
  const nodes = document.querySelectorAll('p')
  for (const node of nodes) {
    // Determine the list variant
    const variant = getVariant(node)
    if (!variant) continue

    // Convert to a list item
    const listItemNode = document.createElement('li')
    listItemNode.innerHTML = node.innerHTML

    // Get the list indent
    const indent = getListIndent(node)
    if (indent) listItemNode.classList.add(`bb-indent-${indent}`)

    // Wrap in list
    const { tagName } = listsByVariant[variant]
    const listNode = document.createElement(tagName)
    listNode.appendChild(listItemNode)

    // Replace <p> with list
    node.replaceWith(listNode)
  }
}

// Merge adjacent lists
function mergeLists(document: Document) {
  for (const list of lists) {
    const { tagName } = list
    while (true) {
      const node = document.querySelector<HTMLElement>(
        `${tagName} + ${tagName}`,
      )
      if (!node) break

      const prevNode = node.previousElementSibling
      if (!prevNode) break

      // Move node elements into previous node
      while (node.firstChild) {
        prevNode.appendChild(node.firstChild)
      }
      node.remove()
    }
  }
}

export function normalizeLists(document: Document) {
  convertParagraphs(document)
  mergeLists(document)
}
