package nlp

import (
	"bufio"
	"errors"
	"fmt"
	"github.com/sajari/fuzzy"
	"os"
	"path"
	"path/filepath"
	"regexp"
	"sort"
	"strconv"
	"strings"
	"unicode/utf8"
)

var wordPattern = regexp.MustCompile(`[A-Za-z]+`)

type Dictionary struct {
	model                 *fuzzy.Model
	words                 map[string]struct{}
	wordsByFirstCharacter map[rune][]string
}

func NewDictionary() (*Dictionary, error) {
	words, err := loadWords()

	if err != nil {
		return nil, err
	}

	wordSet := makeWordSet(words)
	wordsByFirstCharacter := makeWordsByFirstCharacter(words)
	model, loadErr := loadCachedModel()

	if loadErr == nil {
		return &Dictionary{model: model, words: wordSet, wordsByFirstCharacter: wordsByFirstCharacter}, nil
	}

	model = fuzzy.NewModel()

	model.SetThreshold(1)
	model.SetDepth(1)
	model.SetUseAutocomplete(false)
	model.Train(words)

	_ = saveCachedModel(model)

	return &Dictionary{model: model, words: wordSet, wordsByFirstCharacter: wordsByFirstCharacter}, nil
}

func (dictionary *Dictionary) IsWord(token string) bool {
	token = normalizeToken(token)

	if token == "" || dictionary == nil {
		return false
	}

	return dictionary.isLexiconWord(token)
}

func (dictionary *Dictionary) Suggest(token string) string {
	token = normalizeToken(token)

	if token == "" || dictionary == nil || dictionary.model == nil {
		return ""
	}

	if dictionary.isLexiconWord(token) {
		return ""
	}

	suggestions := dictionary.model.SpellCheckSuggestions(token, 1)

	if len(suggestions) == 0 {
		return ""
	}

	if suggestions[0] == token {
		return ""
	}

	return suggestions[0]
}

func (dictionary *Dictionary) isLexiconWord(token string) bool {
	if dictionary == nil {
		return false
	}

	if _, ok := dictionary.words[token]; ok {
		return true
	}

	candidates := make([]string, 0, 16)
	candidates = append(candidates, inflectionCandidates(token)...)
	candidates = append(candidates, spellingVariantCandidates(token)...)

	for _, candidate := range inflectionCandidates(token) {
		candidates = append(candidates, spellingVariantCandidates(candidate)...)
	}

	uniqueCandidates := make(map[string]struct{}, len(candidates))

	for _, candidate := range candidates {
		if candidate == "" || candidate == token {
			continue
		}

		if _, seen := uniqueCandidates[candidate]; seen {
			continue
		}

		uniqueCandidates[candidate] = struct{}{}

		if _, ok := dictionary.words[candidate]; ok {
			return true
		}
	}

	return false
}

func (dictionary *Dictionary) AbbreviationExpansion(token string) (string, bool) {
	token = normalizeToken(token)

	if token == "" || dictionary == nil {
		return "", false
	}

	tokenLength := utf8.RuneCountInString(token)

	if tokenLength <= 1 || tokenLength > 4 {
		return "", false
	}

	firstCharacter, _ := utf8.DecodeRuneInString(token)
	candidates := dictionary.wordsByFirstCharacter[firstCharacter]

	if len(candidates) == 0 {
		return "", false
	}

	bestCandidate := ""
	bestScore := 1 << 30

	for _, candidate := range candidates {
		if !isLikelyAbbreviationForToken(token, candidate) {
			continue
		}

		score := abbreviationScore(token, candidate)

		if score < bestScore {
			bestScore = score
			bestCandidate = candidate
		}
	}

	if bestCandidate == "" {
		return "", false
	}

	return bestCandidate, true
}

func isLikelyAbbreviationForToken(token string, candidate string) bool {
	if candidate == "" || token == "" || token == candidate {
		return false
	}

	tokenLength := utf8.RuneCountInString(token)
	candidateLength := utf8.RuneCountInString(candidate)

	if candidateLength <= tokenLength {
		return false
	}

	if !isSubsequence(token, candidate) {
		return false
	}

	if strings.HasPrefix(candidate, token) && tokenLength <= 4 {
		return true
	}

	tokenConsonants := consonantSkeleton(token)
	candidateConsonants := consonantSkeleton(candidate)

	if tokenConsonants == "" || candidateConsonants == "" {
		return false
	}

	if isSubsequence(tokenConsonants, candidateConsonants) && tokenLength <= 5 {
		return true
	}

	return false
}

func abbreviationScore(token string, candidate string) int {
	tokenLength := utf8.RuneCountInString(token)
	candidateLength := utf8.RuneCountInString(candidate)
	lengthGap := max(candidateLength-tokenLength, 0)
	score := lengthGap * 10

	if strings.HasPrefix(candidate, token) {
		score -= 3
	}

	return score
}

func isSubsequence(shorter string, longer string) bool {
	shorterRunes := []rune(shorter)
	longerRunes := []rune(longer)
	shorterIndex := 0

	for _, character := range longerRunes {
		if shorterIndex >= len(shorterRunes) {
			break
		}

		if shorterRunes[shorterIndex] == character {
			shorterIndex++
		}
	}

	return shorterIndex == len(shorterRunes)
}

func consonantSkeleton(word string) string {
	var builder strings.Builder

	for _, character := range word {
		switch character {
		case 'a', 'e', 'i', 'o', 'u':
			continue
		default:
			builder.WriteRune(character)
		}
	}

	return builder.String()
}

func inflectionCandidates(token string) []string {
	candidates := make([]string, 0, 8)

	if strings.HasSuffix(token, "ies") && len(token) > 3 {
		candidates = append(candidates, token[:len(token)-3]+"y")
	}

	if strings.HasSuffix(token, "es") && len(token) > 2 {
		candidates = append(candidates, token[:len(token)-2])
	}

	if strings.HasSuffix(token, "s") && len(token) > 1 {
		candidates = append(candidates, token[:len(token)-1])
	}

	if strings.HasSuffix(token, "ed") && len(token) > 2 {
		candidateWithoutSuffix := token[:len(token)-2]
		candidates = append(candidates, candidateWithoutSuffix)
		candidates = append(candidates, candidateWithoutSuffix+"e")

		if len(candidateWithoutSuffix) >= 2 {
			lastCharacter := candidateWithoutSuffix[len(candidateWithoutSuffix)-1]
			secondToLastCharacter := candidateWithoutSuffix[len(candidateWithoutSuffix)-2]

			if lastCharacter == secondToLastCharacter {
				candidates = append(candidates, candidateWithoutSuffix[:len(candidateWithoutSuffix)-1])
			}
		}
	}

	if strings.HasSuffix(token, "ing") && len(token) > 3 {
		candidateWithoutSuffix := token[:len(token)-3]
		candidates = append(candidates, candidateWithoutSuffix)
		candidates = append(candidates, candidateWithoutSuffix+"e")
	}

	if strings.HasSuffix(token, "er") && len(token) > 2 {
		candidateWithoutSuffix := token[:len(token)-2]
		candidates = append(candidates, candidateWithoutSuffix)
		candidates = append(candidates, candidateWithoutSuffix+"e")

		if len(candidateWithoutSuffix) >= 2 {
			lastCharacter := candidateWithoutSuffix[len(candidateWithoutSuffix)-1]
			secondToLastCharacter := candidateWithoutSuffix[len(candidateWithoutSuffix)-2]

			if lastCharacter == secondToLastCharacter {
				candidates = append(candidates, candidateWithoutSuffix[:len(candidateWithoutSuffix)-1])
			}
		}
	}

	if strings.HasSuffix(token, "ize") && len(token) > 3 {
		candidates = append(candidates, token[:len(token)-3])
	}

	if strings.HasSuffix(token, "ized") && len(token) > 4 {
		candidates = append(candidates, token[:len(token)-4])
	}

	if strings.HasSuffix(token, "izing") && len(token) > 5 {
		candidates = append(candidates, token[:len(token)-5])
	}

	if strings.HasSuffix(token, "izer") && len(token) > 4 {
		candidates = append(candidates, token[:len(token)-4])
	}

	if strings.HasSuffix(token, "ization") && len(token) > 7 {
		candidates = append(candidates, token[:len(token)-7])
	}

	return candidates
}

func spellingVariantCandidates(token string) []string {
	candidates := make([]string, 0, 8)

	appendSuffixVariant(&candidates, token, "isation", "ization")
	appendSuffixVariant(&candidates, token, "ization", "isation")
	appendSuffixVariant(&candidates, token, "ising", "izing")
	appendSuffixVariant(&candidates, token, "izing", "ising")
	appendSuffixVariant(&candidates, token, "ised", "ized")
	appendSuffixVariant(&candidates, token, "ized", "ised")
	appendSuffixVariant(&candidates, token, "iser", "izer")
	appendSuffixVariant(&candidates, token, "izer", "iser")
	appendSuffixVariant(&candidates, token, "ise", "ize")
	appendSuffixVariant(&candidates, token, "ize", "ise")
	appendSuffixVariant(&candidates, token, "our", "or")
	appendSuffixVariant(&candidates, token, "or", "our")
	appendSuffixVariant(&candidates, token, "tre", "ter")
	appendSuffixVariant(&candidates, token, "ter", "tre")

	return candidates
}

func appendSuffixVariant(candidates *[]string, token string, fromSuffix string, toSuffix string) {
	if !strings.HasSuffix(token, fromSuffix) || len(token) <= len(fromSuffix) {
		return
	}

	root := token[:len(token)-len(fromSuffix)]
	*candidates = append(*candidates, root+toSuffix)
}

func makeWordSet(words []string) map[string]struct{} {
	set := make(map[string]struct{}, len(words))

	for _, word := range words {
		set[word] = struct{}{}
	}

	return set
}

func makeWordsByFirstCharacter(words []string) map[rune][]string {
	grouped := make(map[rune][]string)

	for _, word := range words {
		firstCharacter, size := utf8.DecodeRuneInString(word)

		if firstCharacter == utf8.RuneError && size == 0 {
			continue
		}

		grouped[firstCharacter] = append(grouped[firstCharacter], word)
	}

	for firstCharacter := range grouped {
		sort.Strings(grouped[firstCharacter])
	}

	return grouped
}

func loadWords() ([]string, error) {
	configuredDictionaryPaths := parseDictionaryPaths(os.Getenv("KIVIA_DICTIONARY_PATH"))

	if len(configuredDictionaryPaths) > 0 {
		words, err := loadWordsFromPaths(configuredDictionaryPaths, true)

		if err != nil {
			return nil, err
		}

		if len(words) == 0 {
			return nil, errors.New("configured dictionary sources contain no usable words")
		}

		return words, nil
	}

	words, err := loadWordsFromPaths(defaultDictionaryPaths, false)

	if err != nil {
		return nil, err
	}

	if len(words) == 0 {
		return nil, errors.New("no usable dictionary words found; set KIVIA_DICTIONARY_PATH")
	}

	return words, nil
}

func readWordsFromFile(filePath string) ([]string, error) {
	file, err := os.Open(filePath)

	if err != nil {
		return nil, err
	}

	defer file.Close()

	words := make([]string, 0, 1024)
	scanner := bufio.NewScanner(file)
	isSpellDictionaryFile := strings.EqualFold(path.Ext(filePath), ".dic")
	lineNumber := 0

	for scanner.Scan() {
		lineNumber++

		line := normalizeDictionaryLine(scanner.Text(), lineNumber, isSpellDictionaryFile)

		if line == "" {
			continue
		}

		words = append(words, line)
	}

	if err := scanner.Err(); err != nil {
		return nil, err
	}

	return normalizeWords(words), nil
}

func parseDictionaryPaths(value string) []string {
	trimmedValue := strings.TrimSpace(value)

	if trimmedValue == "" {
		return nil
	}

	expandedValue := strings.ReplaceAll(trimmedValue, ",", string(os.PathListSeparator))
	parts := strings.Split(expandedValue, string(os.PathListSeparator))
	paths := make([]string, 0, len(parts))

	for _, entry := range parts {
		candidate := strings.TrimSpace(entry)

		if candidate == "" {
			continue
		}

		paths = append(paths, candidate)
	}

	return paths
}

func loadWordsFromPaths(paths []string, strict bool) ([]string, error) {
	combinedWords := make([]string, 0, 4096)

	for _, dictionaryPath := range paths {
		words, err := readWordsFromFile(dictionaryPath)

		if err != nil {
			if strict {
				return nil, fmt.Errorf("failed to read dictionary %q: %w", dictionaryPath, err)
			}

			continue
		}

		combinedWords = append(combinedWords, words...)
	}

	return normalizeWords(combinedWords), nil
}

func normalizeDictionaryLine(line string, lineNumber int, isSpellDictionaryFile bool) string {
	trimmedLine := strings.TrimSpace(line)

	if trimmedLine == "" || strings.HasPrefix(trimmedLine, "#") {
		return ""
	}

	if isSpellDictionaryFile && lineNumber == 1 {
		if _, err := strconv.Atoi(trimmedLine); err == nil {
			return ""
		}
	}

	if slashIndex := strings.Index(trimmedLine, "/"); slashIndex >= 0 {
		trimmedLine = trimmedLine[:slashIndex]
	}

	return trimmedLine
}

func normalizeWords(words []string) []string {
	unique := make(map[string]struct{}, len(words))

	for _, word := range words {
		normalized := normalizeToken(word)

		if normalized == "" {
			continue
		}

		if len(normalized) <= 1 {
			continue
		}

		unique[normalized] = struct{}{}
	}

	output := make([]string, 0, len(unique))

	for word := range unique {
		output = append(output, word)
	}

	sort.Strings(output)

	return output
}

func normalizeToken(token string) string {
	token = strings.ToLower(strings.TrimSpace(token))

	if token == "" {
		return ""
	}

	match := wordPattern.FindString(token)

	if match == "" {
		return ""
	}

	return match
}

func cachePath() (string, error) {
	base, err := os.UserCacheDir()

	if err != nil {
		return "", err
	}

	return filepath.Join(base, "kivia", "fuzzy_model_v1.json"), nil
}

func loadCachedModel() (*fuzzy.Model, error) {
	path, err := cachePath()

	if err != nil {
		return nil, err
	}

	model, err := fuzzy.Load(path)

	if err != nil {
		return nil, err
	}

	return model, nil
}

func saveCachedModel(model *fuzzy.Model) error {
	if model == nil {
		return errors.New("Model cannot be nil.")
	}

	path, err := cachePath()

	if err != nil {
		return err
	}

	if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
		return err
	}

	return model.Save(path)
}

var defaultDictionaryPaths = []string{
	"/usr/share/dict/words",
	"/usr/dict/words",
	"/usr/share/dict/web2",
	"/usr/share/dict/web2a",
	"/usr/share/dict/propernames",
	"/usr/share/dict/connectives",
	"/usr/share/hunspell/en_US.dic",
	"/usr/share/hunspell/en_GB.dic",
	"/usr/share/hunspell/en_CA.dic",
	"/usr/share/hunspell/en_AU.dic",
	"/usr/share/myspell/en_US.dic",
	"/usr/share/myspell/en_GB.dic",
	"/opt/homebrew/share/hunspell/en_US.dic",
	"/opt/homebrew/share/hunspell/en_GB.dic",
	"/usr/local/share/hunspell/en_US.dic",
	"/usr/local/share/hunspell/en_GB.dic",
}