/**
 * The following is more or less copied from OpenSlides
 * @see https://github.com/OpenSlides/openslides-client/blob/1c3ad1e91976dbd00e7476f907b0e9d8ca8fe283/client/src/app/ui/modules/editor/components/editor/extensions/office.ts
 */

export default function handleMsWordHtmlCleaning(html: string): string {
  if (!isMsWordHtml(html)) {
    return html;
  }

  let cleanedHtml = cleanWhiteSpacing(html);
  cleanedHtml = transformLists(cleanedHtml);
  cleanedHtml = transformRemoveBookmarks(cleanedHtml);

  return cleanedHtml;
}

const listTypeRegex = /<!--\[if !supportLists]-->((.|\n)*)<!--\[endif]-->/m;
const listOrderRegex = {
  number: /[0-9]+\./,
  romanLower: /(?=[mdclxvi])m*(c[md]|d?c*)(x[cl]|l?x*)(i[xv]|v?i*)\./,
  romanUpper: /(?=[MDCLXVI])M*(C[MD]|D?C*)(X[CL]|L?X*)(I[XV]|V?I*)\./,
  letterLower: /[a-z]+\./,
  letterUpper: /[A-Z]+\./,
};
function getListPrefix(el: HTMLElement) {
  const matches = el.innerHTML.match(listTypeRegex);
  if (matches?.length) {
    const parser = new DOMParser();
    const doc = parser.parseFromString(matches[0], `text/html`);
    return doc.body.querySelector(`span`).textContent;
  }

  return ``;
}

function transformLists(html: string): string {
  if (html.indexOf(`mso-list:`) === -1) {
    return html;
  }

  const parser = new DOMParser();
  const doc = parser.parseFromString(html, `text/html`);

  let listStack: HTMLElement[] = [];
  let currentListId: string;
  const listElements = doc.querySelectorAll(`p[style*="mso-list:"]`);
  listElements.forEach((node) => {
    const el = <HTMLElement>node;
    const hasNonListItemSibling =
      !el.previousElementSibling ||
      !(el.previousElementSibling.nodeName === `OL` || el.previousElementSibling.nodeName === `UL`);

    // Parse `mso-list` style attribute
    const msoListValue: string = parseStyleAttribute(el)[`mso-list`];
    const msoListInfos = msoListValue.split(` `);
    const msoListId = msoListInfos.find((e) => /l[0-9]+/.test(e));
    const msoListLevel =
      // eslint-disable-next-line no-unsafe-optional-chaining
      +msoListInfos.find((e: string) => e.startsWith(`level`))?.substring(5) || 1;

    // Check for start of a new list
    if (currentListId !== msoListId && (hasNonListItemSibling || msoListLevel === 1)) {
      currentListId = msoListId;
      listStack = [];
    }

    while (msoListLevel > listStack.length) {
      const newList = createListElement(el);

      if (listStack.length > 0) {
        listStack[listStack.length - 1].appendChild(newList);
      } else {
        el.before(newList);
      }
      listStack.push(newList);
    }

    while (msoListLevel < listStack.length) {
      listStack.pop();
    }

    // Remove list item numbers and create li
    const li = document.createElement(`li`);
    li.innerHTML = el.innerHTML.replace(listTypeRegex, ``);
    listStack[listStack.length - 1].appendChild(li);
    el.remove();
  });

  return doc.documentElement.outerHTML;
}

function parseStyleAttribute(el: Element): { [prop: string]: string } {
  // eslint-disable-next-line dot-notation
  const styleRaw: string = el?.attributes['style'].value || ``;
  return Object.fromEntries(
    styleRaw.split(`;`).map((line) => line.split(`:`).map((v) => v.trim())),
  );
}

function createListElement(el: HTMLElement) {
  const listInfo = getListInfo(getListPrefix(el));
  const list = document.createElement(listInfo.type);
  if (listInfo.countType) {
    list.setAttribute(`type`, listInfo.countType);
  }
  if (listInfo.start > 1) {
    list.setAttribute(`start`, listInfo.start.toString());
  }
  return list;
}

function parseRomanNumber(_roman: string): number {
  // Parses a roman number string into a number
  // Example: a -> 1, ab -> 28, ...
  const roman = _roman.toUpperCase();
  let value = 0;
  const values = { I: 1, V: 5, X: 10, L: 50, C: 100, D: 500, M: 1000 };
  let i = roman.length;
  let lastVal = 0;
  // eslint-disable-next-line no-plusplus
  while (i--) {
    if (values[roman.charAt(i)] >= lastVal) {
      value += values[roman.charAt(i)];
    } else {
      value -= values[roman.charAt(i)];
    }
    lastVal = values[roman.charAt(i)];
  }

  return value;
}

function parseLetterNumber(str: string) {
  // Parses a list item index of letters.
  // Example: a -> 1, ab -> 28, ...
  const alphaVal = (s: string) => s.toLowerCase().charCodeAt(0) - 97 + 1;
  let value = 0;
  let i = str.length;
  // eslint-disable-next-line no-plusplus
  while (i--) {
    const factor = 26 ** (str.length - i - 1);
    value += alphaVal(str.charAt(i)) * factor;
  }
  return value;
}

function getListInfo(prefix: string) {
  let type = `ul`;
  let countType: string | null = null;
  let start = 1;
  if (listOrderRegex.number.test(prefix)) {
    type = `ol`;
    start = +prefix.match(listOrderRegex.number)[0].replace(`.`, ``);
  } else if (listOrderRegex.romanLower.test(prefix)) {
    type = `ol`;
    countType = `i`;
    start = +parseRomanNumber(prefix.match(listOrderRegex.romanLower)[0].replace(`.`, ``));
  } else if (listOrderRegex.romanUpper.test(prefix)) {
    type = `ol`;
    countType = `I`;
    start = +parseRomanNumber(prefix.match(listOrderRegex.romanUpper)[0].replace(`.`, ``));
  } else if (listOrderRegex.letterLower.test(prefix)) {
    type = `ol`;
    countType = `a`;
    start = +parseLetterNumber(prefix.match(listOrderRegex.letterLower)[0].replace(`.`, ``));
  } else if (listOrderRegex.letterUpper.test(prefix)) {
    type = `ol`;
    countType = `A`;
    start = +parseLetterNumber(prefix.match(listOrderRegex.letterUpper)[0].replace(`.`, ``));
  }

  return {
    type,
    start,
    countType,
  };
}

function cleanWhiteSpacing(html: string): string {
  // MS Word can get freaky with adding unnecessary white spacing.
  // While the following solves it, it does also trim literal succession of spaces for example.
  // ex: "Hello        world" => "Hello world"
  return html.replace(/\s\s+/g, ' ');
}

function unwrapNode(el: Node): void {
  const parent = el.parentNode;
  while (el.firstChild) {
    parent.insertBefore(el.firstChild, el);
  }
  parent.removeChild(el);
}

function transformRemoveBookmarks(html: string): string {
  const parser = new DOMParser();
  const doc = parser.parseFromString(html, `text/html`);
  const bookmarks = doc.querySelectorAll(`[style*="mso-bookmark:"]`);
  bookmarks.forEach((node) => {
    const bookmark = parseStyleAttribute(node)[`mso-bookmark`];
    const bookmarkLink = doc.querySelector(`a[name="${bookmark}"]`);
    if (bookmarkLink) {
      bookmarkLink.parentNode.removeChild(bookmarkLink);
    }
    unwrapNode(node as HTMLElement);
  });

  return doc.documentElement.outerHTML;
}

function isMsWordHtml(html: string): boolean {
  return html.indexOf(`microsoft-com`) !== -1 && html.indexOf(`office`) !== -1;
}
