import { ClipboardTransformerBase } from './clipboard-transformer.base';

const listSupportConditionPattern = new RegExp(/<!--\[if !supportLists]-->([\s\S]*)<!--\[endif]-->/);
const listIndentPattern = new RegExp(/\blevel(\d)\b/);
const listTypePattern = new RegExp(/\blfo(\d)\b/);
const headerPattern = new RegExp(/\b[hH]([1-6])\b/);

/**
 * MSWordAppTransformer
 * Converts pastes from MS Word PC app to traditional HTML.
 * Scrubs some elements that end up adding white-space and duplicated data. 
 */
export class MSWordAppTransformer extends ClipboardTransformerBase {
  transform(document: Document) {
    if (!this.documentMatchesWordPCSchema(document)) {
      return;
    }

    console.debug('Detected paste from MSWord App...');

    const bodyMatch = this.filterNode(document.documentElement, (el) => el.nodeName === 'BODY');
    const body = bodyMatch.length > 0 ? bodyMatch[0] : null;
    if (!body) {
      return;
    }

    const msoElements: HTMLElement[] = [];
    const headerElements: HeaderItem[] = [];

    const trashElements = [];
    for (const i in body.childNodes) {
      const child = body.childNodes[i] as HTMLElement;

      if (!child) {
        continue;
      }

      if (child.nodeName === 'P' && child.className.startsWith('Mso')) {
        msoElements.push(child);
      } else if (child instanceof Node) {
        const headerMatch = child.nodeName.match(headerPattern);
        if (headerMatch?.length === 2) {
          headerElements.push({
            content: child.textContent,
            element: child,
            level: parseInt(headerMatch[1]),
          });
          continue;
        }

        trashElements.push(child);
      }
    }

    for (let i = trashElements.length - 1; i >= 0; i--) {
      const el = trashElements[i];
      body.removeChild(el);
    }

    this.replaceMsoListItems(msoElements, body);
    this.replaceMsoTitles(msoElements, body);
    this.replaceMsoHeaders(headerElements);
  }

  private replaceMsoTitles(msoElements: HTMLElement[], body: HTMLElement) {
    for (const el of msoElements) {
      if (el.className.startsWith('MsoTitle')) {
        const header = body.ownerDocument.createElement('H1');
        header.innerHTML = el.innerHTML;
        body.replaceChild(header, el);
      }
    }
  }

  private replaceMsoHeaders(headers: HeaderItem[]) {
    for (const header of headers) {
      header.element.innerHTML = header.element.innerText;
    }
  }

  private replaceMsoListItems(msoElements: HTMLElement[], body: HTMLElement) {
    const listGroups: ListItem[][] = [];
    let listGroup: ListItem[] | null = null;
    for (const i in msoElements) {
      const msoEl = msoElements[i];

      if (msoEl.className.startsWith('MsoListParagraph')) {
        // Remove unsupported list html.
        msoEl.innerHTML = msoEl.innerHTML.replace(listSupportConditionPattern, '');
        const listItem = this.msoListParagraphToListItem(msoEl);
        if (!listItem) {
          listGroup = null;
          continue;
        }

        const startNewGroup = !listGroup || listGroup[listGroup.length - 1].isOrdered !== listItem.isOrdered;
        if (startNewGroup) {
          listGroup = [];
          listGroups.push(listGroup);
        }
        listGroup.push(listItem);
      } else {
        listGroup = null;
      }
    }

    for (listGroup of listGroups) {
      const element = this.listItemGroupToListElements(listGroup, document);

      body.insertBefore(element, listGroup[0].msoElement);
      for (const item of listGroup) {
        body.removeChild(item.msoElement);
      }
    }
  }

  private msoListParagraphToListItem(el: HTMLElement): ListItem | null {
    const style = el.getAttribute('style');
    if (!style) {
      return null;
    }

    const typeMatch = style.match(listTypePattern);
    if (!typeMatch || typeMatch.length != 2) {
      return null;
    }

    const listItem = {} as ListItem;
    if (typeMatch[1] === '1' || typeMatch[1] === '3') {
      listItem.isOrdered = true;
    } else if (typeMatch[1] === '2') {
      listItem.isOrdered = false;
    } else {
      return null;
    }

    const indentMatch = style.match(listIndentPattern);
    if (!indentMatch || indentMatch.length != 2) {
      return null;
    }

    listItem.indentLevel = parseInt(indentMatch[1]);
    listItem.content = el.innerText;
    listItem.msoElement = el;

    return listItem;
  }

  listItemGroupToListElements(listGroup: ListItem[], doc: Document): HTMLElement {
    if (
      listGroup.length === 0 ||
      listGroup.some((i) => i.isOrdered !== listGroup[0].isOrdered) ||
      listGroup.some((i) => i.indentLevel < 1 || i.indentLevel >= 10)
    ) {
      throw new Error('invalid list group');
    }

    const listTag = listGroup[0].isOrdered ? 'OL' : 'UL';
    const rootEl = doc.createElement(listTag, {});
    let depth = 1;
    let list = rootEl;
    for (const item of listGroup) {
      for (depth; depth < item.indentLevel; depth++) {
        const childList = doc.createElement(listTag);
        list.appendChild(childList);
        list = childList;
      }

      for (depth; depth > item.indentLevel; depth--) {
        list = list.parentElement;
      }

      const newLi = doc.createElement('LI');
      newLi.innerHTML = item.content;
      list.appendChild(newLi);
    }

    return rootEl;
  }

  documentMatchesWordPCSchema(document: Document) {
    const schema = document?.documentElement?.getAttribute('xmlns:o');

    return schema && schema === 'urn:schemas-microsoft-com:office:office';
  }
}

export interface ListItem {
  isOrdered: boolean;
  indentLevel: number;
  content: string;
  msoElement: HTMLElement;
}

export interface HeaderItem {
  content: string;
  level: number;
  element: HTMLElement;
}
