import { TokenisationFunction } from './models';

const latinise = (word: string) =>
  word
    .toUpperCase()
    .replaceAll('Á', 'A') // es
    .replaceAll('Â', 'A') // pt,fr
    .replaceAll('Ã', 'A') // pt
    .replaceAll('À', 'A') // pt, it
    .replaceAll('Ä', 'A') // nl
    .replaceAll('É', 'E') // es, it
    .replaceAll('Ê', 'E') // pt,fr
    .replaceAll('È', 'E') // pt, it
    .replaceAll('Ë', 'E') // fr,nl
    .replaceAll('Í', 'I') // es
    .replaceAll('Ì', 'I') // pt, it
    .replaceAll('Î', 'I') // fr
    .replaceAll('Ï', 'I') // fr,nl
    .replaceAll('Ó', 'O') // es, it
    .replaceAll('Ô', 'O') // pt,fr
    .replaceAll('Õ', 'O') // pt
    .replaceAll('Ò', 'O') // pt, it
    .replaceAll('Ö', 'O') // pt,nl
    .replaceAll('Ú', 'U') // es
    .replaceAll('Û', 'U') // fr
    .replaceAll('Ü', 'U') // es,fr,nl
    .replaceAll('Ù', 'U'); // pt, it

export const tokeniseSpanish: TokenisationFunction = (word: string) => {
  const letterTokens: string[] = [];

  const latinisedUpperWord = latinise(word);
  const letters = latinisedUpperWord.split('');

  let letterTokensIdx = 0;
  let skipNextLetter = false;
  letters.forEach((letter, i) => {
    if (skipNextLetter) {
      skipNextLetter = false;
      return;
    }

    let letterToken = '';
    if (letter === 'L') {
      if (letters[i + 1] === 'L') {
        letterToken = 'LL';
        skipNextLetter = true;
      } else {
        letterToken = 'L';
      }
    } else if (letter === 'R') {
      if (letters[i + 1] === 'R') {
        letterToken = 'RR';
        skipNextLetter = true;
      } else {
        letterToken = 'R';
      }
    } else if (letter === 'C') {
      if (letters[i + 1] === 'H') {
        letterToken = 'CH';
        skipNextLetter = true;
      } else {
        letterToken = 'C';
      }
    } else {
      letterToken = letter;
    }
    letterTokens[letterTokensIdx++] = letterToken;
  });

  return letterTokens;
};

export const tokeniseFrench: TokenisationFunction = (word: string) =>
  latinise(word)
    .replaceAll('Ç', 'C') // hopefully this is right
    .split('');

export const tokeniseBasic: TokenisationFunction = (word: string) =>
  latinise(word).split('');
