// taken from https://gomakethings.com/how-to-sanitize-html-strings-with-vanilla-js-to-reduce-your-risk-of-xss-attacks/
const getDocument = (html) => new DOMParser().parseFromString(html, 'text/html');

const removeScripts = (html) => {
  let scripts = html.querySelectorAll('script');
  for (let script of scripts) {
    script.remove();
  }
};

// explicitly whitelist
// todo: do we need more attributes whitelisted
const isSafe = (attr) => {
  return ['class'].includes(attr.name);
};

const removeAttributes = (node) => {
  let attributes = [...node.attributes];
  for (const attr of attributes) {
    if (isSafe(attr)) {
      continue;
    }
    node.removeAttribute(attr.name);
  }
};

const clean = (node) => {
  let nodes = node.children;
  for (let node of nodes) {
    removeAttributes(node);
    clean(node);
  }
};

export const sanitize = (html) => {
  let doc = getDocument(html);
  return doc.body.textContent || '';
};

export const sanitizeAsHtml = (html) => {
  let doc = getDocument(html);
  const body = doc.body || document.createElement('body');
  removeScripts(body);
  clean(body);
  return body.innerHTML;
};

export const newLineToParagraph = (text) => {
  return text
    .split(/\n+/)
    .map(sanitize) // sanitize after the split, because the sanitizer would probably remove the newlines
    .map((p) => `<p>${p}</p>`)
    .join('');
};