aboutsummaryrefslogtreecommitdiff
path: root/internal
diff options
context:
space:
mode:
Diffstat (limited to 'internal')
-rw-r--r--internal/analyze/analyze.go208
-rw-r--r--internal/analyze/analyze_test.go174
-rw-r--r--internal/analyze/resources.go26
-rw-r--r--internal/collect/collect.go331
-rw-r--r--internal/nlp/dictionary.go614
-rw-r--r--internal/nlp/dictionary_test.go165
-rw-r--r--internal/report/report.go80
7 files changed, 1598 insertions, 0 deletions
diff --git a/internal/analyze/analyze.go b/internal/analyze/analyze.go
new file mode 100644
index 0000000..315f086
--- /dev/null
+++ b/internal/analyze/analyze.go
@@ -0,0 +1,208 @@
+package analyze
+
+import (
+ "github.com/Fuwn/kivia/internal/collect"
+ "strings"
+ "unicode"
+ "unicode/utf8"
+)
+
+type Options struct {
+ MinEvaluationLength int
+}
+
+type Result struct {
+ Violations []Violation `json:"violations"`
+}
+
+type Violation struct {
+ Identifier collect.Identifier `json:"identifier"`
+ Reason string `json:"reason"`
+}
+
+func Run(identifiers []collect.Identifier, options Options) (Result, error) {
+ minimumEvaluationLength := options.MinEvaluationLength
+
+ if minimumEvaluationLength <= 0 {
+ minimumEvaluationLength = 1
+ }
+
+ resources, err := getResources()
+
+ if err != nil {
+ return Result{}, err
+ }
+
+ violations := make([]Violation, 0)
+
+ for _, identifier := range identifiers {
+ if utf8.RuneCountInString(strings.TrimSpace(identifier.Name)) < minimumEvaluationLength {
+ continue
+ }
+
+ evaluation := evaluateIdentifier(identifier, resources, minimumEvaluationLength)
+
+ if !evaluation.isViolation {
+ continue
+ }
+
+ violation := Violation{
+ Identifier: identifier,
+ Reason: evaluation.reason,
+ }
+ violations = append(violations, violation)
+ }
+
+ return Result{Violations: violations}, nil
+}
+
+type evaluationResult struct {
+ isViolation bool
+ reason string
+}
+
+func evaluateIdentifier(identifier collect.Identifier, resources resources, minimumTokenLength int) evaluationResult {
+ name := strings.TrimSpace(identifier.Name)
+
+ if name == "" {
+ return evaluationResult{}
+ }
+
+ tokens := tokenize(name)
+
+ if len(tokens) == 0 {
+ return evaluationResult{}
+ }
+
+ for _, token := range tokens {
+ if utf8.RuneCountInString(token) < minimumTokenLength {
+ continue
+ }
+
+ if !isAlphabeticToken(token) {
+ continue
+ }
+
+ if resources.dictionary.IsWord(token) {
+ continue
+ }
+
+ if isUpperCaseToken(name, token) {
+ continue
+ }
+
+ if isDisallowedAbbreviation(token, resources) {
+ return evaluationResult{isViolation: true, reason: "Contains abbreviation: " + token + "."}
+ }
+
+ return evaluationResult{isViolation: true, reason: "Term not found in dictionary: " + token + "."}
+ }
+
+ return evaluationResult{}
+}
+
+func isUpperCaseToken(identifierName string, token string) bool {
+ tokenLength := utf8.RuneCountInString(token)
+
+ if tokenLength < 2 || tokenLength > 8 {
+ return false
+ }
+
+ return strings.Contains(identifierName, strings.ToUpper(token))
+}
+
+func tokenize(name string) []string {
+ name = strings.TrimSpace(name)
+
+ if name == "" {
+ return nil
+ }
+
+ parts := strings.FieldsFunc(name, func(r rune) bool {
+ return r == '_' || r == '-' || r == ' '
+ })
+
+ if len(parts) == 0 {
+ return nil
+ }
+
+ result := make([]string, 0, len(parts)*2)
+
+ for _, part := range parts {
+ if part == "" {
+ continue
+ }
+
+ result = append(result, splitCamel(part)...)
+ }
+
+ return result
+}
+
+func splitCamel(input string) []string {
+ if input == "" {
+ return nil
+ }
+
+ runes := []rune(input)
+
+ if len(runes) == 0 {
+ return nil
+ }
+
+ tokens := make([]string, 0, 2)
+ start := 0
+
+ for index := 1; index < len(runes); index++ {
+ current := runes[index]
+ previous := runes[index-1]
+ next := rune(0)
+
+ if index+1 < len(runes) {
+ next = runes[index+1]
+ }
+
+ isBoundary := false
+
+ if unicode.IsLower(previous) && unicode.IsUpper(current) {
+ isBoundary = true
+ }
+
+ if unicode.IsDigit(previous) != unicode.IsDigit(current) {
+ isBoundary = true
+ }
+
+ if unicode.IsUpper(previous) && unicode.IsUpper(current) && next != 0 && unicode.IsLower(next) {
+ isBoundary = true
+ }
+
+ if isBoundary {
+ tokens = append(tokens, strings.ToLower(string(runes[start:index])))
+ start = index
+ }
+ }
+
+ tokens = append(tokens, strings.ToLower(string(runes[start:])))
+
+ return tokens
+}
+
+func isDisallowedAbbreviation(token string, resources resources) bool {
+ _, hasExpansion := resources.dictionary.AbbreviationExpansion(token)
+
+ return hasExpansion
+}
+
+func isAlphabeticToken(token string) bool {
+ if token == "" {
+ return false
+ }
+
+ for _, character := range token {
+ if !unicode.IsLetter(character) {
+ return false
+ }
+ }
+
+ return true
+}
diff --git a/internal/analyze/analyze_test.go b/internal/analyze/analyze_test.go
new file mode 100644
index 0000000..8aebf8d
--- /dev/null
+++ b/internal/analyze/analyze_test.go
@@ -0,0 +1,174 @@
+package analyze_test
+
+import (
+ "github.com/Fuwn/kivia/internal/analyze"
+ "github.com/Fuwn/kivia/internal/collect"
+ "os"
+ "path/filepath"
+ "testing"
+)
+
+func dictionaryPathForTests(testingContext *testing.T) string {
+ testingContext.Helper()
+
+ return filepath.Join("..", "..", "testdata", "dictionary", "words.txt")
+}
+
+func TestAnalyzeFlagsAbbreviations(testingContext *testing.T) {
+ testingContext.Setenv("KIVIA_DICTIONARY_PATH", dictionaryPathForTests(testingContext))
+
+ root := filepath.Join("..", "..", "testdata", "samplepkg")
+ identifiers, err := collect.FromPath(root)
+
+ if err != nil {
+ testingContext.Fatalf("collect.FromPath returned an error: %v", err)
+ }
+
+ result, err := analyze.Run(identifiers, analyze.Options{})
+
+ if err != nil {
+ testingContext.Fatalf("analyze.Run returned an error: %v", err)
+ }
+
+ if len(result.Violations) == 0 {
+ testingContext.Fatalf("Expected at least one violation, got none.")
+ }
+
+ mustContainViolation(testingContext, result, "ctx")
+ mustContainViolation(testingContext, result, "userNum")
+ mustContainViolation(testingContext, result, "usr")
+}
+
+func TestAnalyzeFlagsTechnicalTermsNotInDictionary(testingContext *testing.T) {
+ testingContext.Setenv("KIVIA_DICTIONARY_PATH", dictionaryPathForTests(testingContext))
+
+ identifiers := []collect.Identifier{
+ {Name: "userID", Kind: "variable"},
+ {Name: "httpClient", Kind: "variable"},
+ }
+ result, err := analyze.Run(identifiers, analyze.Options{})
+
+ if err != nil {
+ testingContext.Fatalf("analyze.Run returned an error: %v", err)
+ }
+
+ if len(result.Violations) == 0 {
+ testingContext.Fatalf("Expected violations, got none.")
+ }
+
+ mustContainViolation(testingContext, result, "userID")
+ mustContainViolation(testingContext, result, "httpClient")
+}
+
+func TestAnalyzeDoesNotFlagNormalDictionaryWords(testingContext *testing.T) {
+ testingContext.Setenv("KIVIA_DICTIONARY_PATH", dictionaryPathForTests(testingContext))
+
+ identifiers := []collect.Identifier{
+ {Name: "options", Kind: "variable"},
+ {Name: "parsedResource", Kind: "variable"},
+ {Name: "hasResources", Kind: "variable"},
+ {Name: "allowlist", Kind: "variable"},
+ }
+ result, err := analyze.Run(identifiers, analyze.Options{})
+
+ if err != nil {
+ testingContext.Fatalf("analyze.Run returned an error: %v", err)
+ }
+
+ if len(result.Violations) != 0 {
+ testingContext.Fatalf("Expected no violations, got %d.", len(result.Violations))
+ }
+}
+
+func TestAnalyzeMinEvaluationLengthSkipsSingleLetterIdentifiers(testingContext *testing.T) {
+ testingContext.Setenv("KIVIA_DICTIONARY_PATH", dictionaryPathForTests(testingContext))
+
+ identifiers := []collect.Identifier{
+ {Name: "t", Kind: "parameter"},
+ {Name: "v", Kind: "receiver"},
+ {Name: "ctx", Kind: "parameter"},
+ }
+ result, err := analyze.Run(identifiers, analyze.Options{
+ MinEvaluationLength: 2,
+ })
+
+ if err != nil {
+ testingContext.Fatalf("analyze.Run returned an error: %v", err)
+ }
+
+ if len(result.Violations) != 1 {
+ testingContext.Fatalf("Expected one violation, got %d.", len(result.Violations))
+ }
+
+ if result.Violations[0].Identifier.Name != "ctx" {
+ testingContext.Fatalf("Expected only ctx to be evaluated, got %q.", result.Violations[0].Identifier.Name)
+ }
+}
+
+func TestAnalyzeFlagsExpressionAbbreviation(testingContext *testing.T) {
+ testingContext.Setenv("KIVIA_DICTIONARY_PATH", dictionaryPathForTests(testingContext))
+
+ identifiers := []collect.Identifier{
+ {Name: "expr", Kind: "variable"},
+ }
+ result, err := analyze.Run(identifiers, analyze.Options{
+ MinEvaluationLength: 1,
+ })
+
+ if err != nil {
+ testingContext.Fatalf("analyze.Run returned an error: %v", err)
+ }
+
+ if len(result.Violations) != 1 {
+ testingContext.Fatalf("Expected one violation, got %d.", len(result.Violations))
+ }
+
+ if result.Violations[0].Identifier.Name != "expr" {
+ testingContext.Fatalf("Expected expr to be flagged, got %q.", result.Violations[0].Identifier.Name)
+ }
+}
+
+func TestAnalyzeAllowsUpperCaseTokens(testingContext *testing.T) {
+ testingContext.Setenv("KIVIA_DICTIONARY_PATH", dictionaryPathForTests(testingContext))
+
+ identifiers := []collect.Identifier{
+ {Name: "JSON", Kind: "variable"},
+ }
+ result, err := analyze.Run(identifiers, analyze.Options{})
+
+ if err != nil {
+ testingContext.Fatalf("analyze.Run returned an error: %v", err)
+ }
+
+ if len(result.Violations) != 0 {
+ testingContext.Fatalf("Expected no violations, got %d.", len(result.Violations))
+ }
+}
+
+func TestAnalyzeFailsWhenDictionaryIsUnavailable(testingContext *testing.T) {
+ emptyDictionaryPath := filepath.Join(testingContext.TempDir(), "empty.txt")
+
+ if err := os.WriteFile(emptyDictionaryPath, []byte("\n"), 0o644); err != nil {
+ testingContext.Fatalf("os.WriteFile returned an error: %v", err)
+ }
+
+ testingContext.Setenv("KIVIA_DICTIONARY_PATH", emptyDictionaryPath)
+
+ _, err := analyze.Run([]collect.Identifier{{Name: "ctx", Kind: "parameter"}}, analyze.Options{})
+
+ if err == nil {
+ testingContext.Fatalf("Expected analyze.Run to fail when dictionary data is unavailable.")
+ }
+}
+
+func mustContainViolation(testingContext *testing.T, result analyze.Result, name string) {
+ testingContext.Helper()
+
+ for _, violation := range result.Violations {
+ if violation.Identifier.Name == name {
+ return
+ }
+ }
+
+ testingContext.Fatalf("Expected a violation for %q.", name)
+}
diff --git a/internal/analyze/resources.go b/internal/analyze/resources.go
new file mode 100644
index 0000000..f42c757
--- /dev/null
+++ b/internal/analyze/resources.go
@@ -0,0 +1,26 @@
+package analyze
+
+import (
+ "fmt"
+ "github.com/Fuwn/kivia/internal/nlp"
+)
+
+type resources struct {
+ dictionary *nlp.Dictionary
+}
+
+func getResources() (resources, error) {
+ return loadResources()
+}
+
+func loadResources() (resources, error) {
+ dictionary, err := nlp.NewDictionary()
+
+ if err != nil {
+ return resources{}, fmt.Errorf("Failed to load dictionary: %w", err)
+ }
+
+ return resources{
+ dictionary: dictionary,
+ }, nil
+}
diff --git a/internal/collect/collect.go b/internal/collect/collect.go
new file mode 100644
index 0000000..ccb3b46
--- /dev/null
+++ b/internal/collect/collect.go
@@ -0,0 +1,331 @@
+package collect
+
+import (
+ "bytes"
+ "fmt"
+ "go/ast"
+ "go/parser"
+ "go/printer"
+ "go/token"
+ "io/fs"
+ "os"
+ "path/filepath"
+ "sort"
+ "strings"
+)
+
+type Context struct {
+ EnclosingFunction string `json:"enclosingFunction,omitempty"`
+ Type string `json:"type,omitempty"`
+ ValueExpression string `json:"valueExpression,omitempty"`
+ ParentType string `json:"parentType,omitempty"`
+}
+
+type Identifier struct {
+ Name string `json:"name"`
+ Kind string `json:"kind"`
+ File string `json:"file"`
+ Line int `json:"line"`
+ Column int `json:"column"`
+ Context Context `json:"context"`
+}
+
+func FromPath(path string) ([]Identifier, error) {
+ files, err := discoverFiles(path)
+
+ if err != nil {
+ return nil, err
+ }
+
+ fileSet := token.NewFileSet()
+ identifiers := make([]Identifier, 0, 128)
+
+ for _, filePath := range files {
+ fileNode, parseErr := parser.ParseFile(fileSet, filePath, nil, parser.SkipObjectResolution)
+
+ if parseErr != nil {
+ return nil, fmt.Errorf("Failed to parse %s: %w", filePath, parseErr)
+ }
+
+ collector := visitor{
+ fileSet: fileSet,
+ file: filePath,
+ }
+
+ ast.Walk(&collector, fileNode)
+
+ identifiers = append(identifiers, collector.identifiers...)
+ }
+
+ return identifiers, nil
+}
+
+type visitor struct {
+ fileSet *token.FileSet
+ file string
+ identifiers []Identifier
+ functionStack []string
+ typeStack []string
+}
+
+func (identifierVisitor *visitor) Visit(node ast.Node) ast.Visitor {
+ switch typedNode := node.(type) {
+ case *ast.FuncDecl:
+ identifierVisitor.addIdentifier(typedNode.Name, "function", Context{})
+
+ identifierVisitor.functionStack = append(identifierVisitor.functionStack, typedNode.Name.Name)
+
+ identifierVisitor.captureFieldList(typedNode.Recv, "receiver")
+ identifierVisitor.captureFieldList(typedNode.Type.Params, "parameter")
+ identifierVisitor.captureFieldList(typedNode.Type.Results, "result")
+
+ return leaveScope(identifierVisitor, func() {
+ identifierVisitor.functionStack = identifierVisitor.functionStack[:len(identifierVisitor.functionStack)-1]
+ })
+ case *ast.TypeSpec:
+ identifierVisitor.addIdentifier(typedNode.Name, "type", Context{})
+
+ identifierVisitor.typeStack = append(identifierVisitor.typeStack, typedNode.Name.Name)
+
+ identifierVisitor.captureTypeMembers(typedNode.Name.Name, typedNode.Type)
+
+ return leaveScope(identifierVisitor, func() { identifierVisitor.typeStack = identifierVisitor.typeStack[:len(identifierVisitor.typeStack)-1] })
+ case *ast.ValueSpec:
+ declaredType := renderExpression(identifierVisitor.fileSet, typedNode.Type)
+ rightHandValue := renderExpressionList(identifierVisitor.fileSet, typedNode.Values)
+
+ for _, name := range typedNode.Names {
+ identifierVisitor.addIdentifier(name, "variable", Context{Type: declaredType, ValueExpression: rightHandValue})
+ }
+ case *ast.AssignStmt:
+ if typedNode.Tok != token.DEFINE {
+ break
+ }
+
+ rightHandValue := renderExpressionList(identifierVisitor.fileSet, typedNode.Rhs)
+
+ for index, left := range typedNode.Lhs {
+ identifierNode, ok := left.(*ast.Ident)
+
+ if !ok {
+ continue
+ }
+
+ assignmentContext := Context{ValueExpression: rightHandValue}
+
+ if index < len(typedNode.Rhs) {
+ assignmentContext.Type = inferTypeFromExpression(typedNode.Rhs[index])
+ }
+
+ identifierVisitor.addIdentifier(identifierNode, "variable", assignmentContext)
+ }
+ case *ast.RangeStmt:
+ if typedNode.Tok != token.DEFINE {
+ break
+ }
+
+ if keyIdentifier, ok := typedNode.Key.(*ast.Ident); ok {
+ identifierVisitor.addIdentifier(keyIdentifier, "rangeKey", Context{ValueExpression: renderExpression(identifierVisitor.fileSet, typedNode.X)})
+ }
+
+ if valueIdentifier, ok := typedNode.Value.(*ast.Ident); ok {
+ identifierVisitor.addIdentifier(valueIdentifier, "rangeValue", Context{ValueExpression: renderExpression(identifierVisitor.fileSet, typedNode.X)})
+ }
+ }
+
+ return identifierVisitor
+}
+
+type scopeExit struct {
+ parent *visitor
+ onLeave func()
+}
+
+func leaveScope(parent *visitor, onLeave func()) ast.Visitor {
+ return &scopeExit{parent: parent, onLeave: onLeave}
+}
+
+func (scopeExitVisitor *scopeExit) Visit(node ast.Node) ast.Visitor {
+ if node == nil {
+ scopeExitVisitor.onLeave()
+
+ return nil
+ }
+
+ return scopeExitVisitor.parent
+}
+
+func (identifierVisitor *visitor) captureFieldList(fields *ast.FieldList, kind string) {
+ if fields == nil {
+ return
+ }
+
+ for _, field := range fields.List {
+ declaredType := renderExpression(identifierVisitor.fileSet, field.Type)
+
+ for _, name := range field.Names {
+ identifierVisitor.addIdentifier(name, kind, Context{Type: declaredType})
+ }
+ }
+}
+
+func (identifierVisitor *visitor) captureTypeMembers(typeName string, typeExpression ast.Expr) {
+ switch typedType := typeExpression.(type) {
+ case *ast.StructType:
+ if typedType.Fields == nil {
+ return
+ }
+
+ for _, field := range typedType.Fields.List {
+ memberType := renderExpression(identifierVisitor.fileSet, field.Type)
+
+ for _, fieldName := range field.Names {
+ identifierVisitor.addIdentifier(fieldName, "field", Context{Type: memberType, ParentType: typeName})
+ }
+ }
+ case *ast.InterfaceType:
+ if typedType.Methods == nil {
+ return
+ }
+
+ for _, method := range typedType.Methods.List {
+ memberType := renderExpression(identifierVisitor.fileSet, method.Type)
+
+ for _, methodName := range method.Names {
+ identifierVisitor.addIdentifier(methodName, "interfaceMethod", Context{Type: memberType, ParentType: typeName})
+ }
+ }
+ }
+}
+
+func (identifierVisitor *visitor) addIdentifier(identifier *ast.Ident, kind string, context Context) {
+ if identifier == nil || identifier.Name == "_" {
+ return
+ }
+
+ position := identifierVisitor.fileSet.Position(identifier.NamePos)
+ context.EnclosingFunction = currentFunction(identifierVisitor.functionStack)
+ identifierVisitor.identifiers = append(identifierVisitor.identifiers, Identifier{
+ Name: identifier.Name,
+ Kind: kind,
+ File: identifierVisitor.file,
+ Line: position.Line,
+ Column: position.Column,
+ Context: context,
+ })
+}
+
+func currentFunction(stack []string) string {
+ if len(stack) == 0 {
+ return ""
+ }
+
+ return stack[len(stack)-1]
+}
+
+func discoverFiles(path string) ([]string, error) {
+ searchRoot := path
+ recursive := false
+
+ if strings.HasSuffix(path, "/...") {
+ searchRoot = strings.TrimSuffix(path, "/...")
+ recursive = true
+ }
+
+ if searchRoot == "" {
+ searchRoot = "."
+ }
+
+ pathFileDetails, err := os.Stat(searchRoot)
+
+ if err != nil {
+ return nil, err
+ }
+
+ if !pathFileDetails.IsDir() {
+ if strings.HasSuffix(searchRoot, ".go") {
+ return []string{searchRoot}, nil
+ }
+
+ return nil, fmt.Errorf("Path %q is not a Go file.", searchRoot)
+ }
+
+ files := make([]string, 0, 64)
+ walkErr := filepath.WalkDir(searchRoot, func(candidate string, entry fs.DirEntry, walkError error) error {
+ if walkError != nil {
+ return walkError
+ }
+
+ if entry.IsDir() {
+ name := entry.Name()
+
+ if name == ".git" || name == "vendor" || name == "node_modules" {
+ return filepath.SkipDir
+ }
+
+ if !recursive && candidate != searchRoot {
+ return filepath.SkipDir
+ }
+
+ return nil
+ }
+
+ if strings.HasSuffix(candidate, ".go") {
+ files = append(files, candidate)
+ }
+
+ return nil
+ })
+
+ if walkErr != nil {
+ return nil, walkErr
+ }
+
+ sort.Strings(files)
+
+ return files, nil
+}
+
+func renderExpression(fileSet *token.FileSet, expression ast.Expr) string {
+ if expression == nil {
+ return ""
+ }
+
+ var buffer bytes.Buffer
+
+ if err := printer.Fprint(&buffer, fileSet, expression); err != nil {
+ return ""
+ }
+
+ return buffer.String()
+}
+
+func renderExpressionList(fileSet *token.FileSet, expressions []ast.Expr) string {
+ if len(expressions) == 0 {
+ return ""
+ }
+
+ parts := make([]string, 0, len(expressions))
+
+ for _, expression := range expressions {
+ parts = append(parts, renderExpression(fileSet, expression))
+ }
+
+ return strings.Join(parts, ", ")
+}
+
+func inferTypeFromExpression(expression ast.Expr) string {
+ switch typedExpression := expression.(type) {
+ case *ast.CallExpr:
+ switch functionExpression := typedExpression.Fun.(type) {
+ case *ast.Ident:
+ return functionExpression.Name
+ case *ast.SelectorExpr:
+ return functionExpression.Sel.Name
+ }
+
+ return ""
+ default:
+ return ""
+ }
+}
diff --git a/internal/nlp/dictionary.go b/internal/nlp/dictionary.go
new file mode 100644
index 0000000..e7db37e
--- /dev/null
+++ b/internal/nlp/dictionary.go
@@ -0,0 +1,614 @@
+package nlp
+
+import (
+ "bufio"
+ "errors"
+ "fmt"
+ "github.com/sajari/fuzzy"
+ "os"
+ "path"
+ "path/filepath"
+ "regexp"
+ "sort"
+ "strconv"
+ "strings"
+ "unicode/utf8"
+)
+
+var wordPattern = regexp.MustCompile(`[A-Za-z]+`)
+
+type Dictionary struct {
+ model *fuzzy.Model
+ words map[string]struct{}
+ wordsByFirstCharacter map[rune][]string
+}
+
+func NewDictionary() (*Dictionary, error) {
+ words, err := loadWords()
+
+ if err != nil {
+ return nil, err
+ }
+
+ wordSet := makeWordSet(words)
+ wordsByFirstCharacter := makeWordsByFirstCharacter(words)
+ model, loadErr := loadCachedModel()
+
+ if loadErr == nil {
+ return &Dictionary{model: model, words: wordSet, wordsByFirstCharacter: wordsByFirstCharacter}, nil
+ }
+
+ model = fuzzy.NewModel()
+
+ model.SetThreshold(1)
+ model.SetDepth(1)
+ model.SetUseAutocomplete(false)
+ model.Train(words)
+
+ _ = saveCachedModel(model)
+
+ return &Dictionary{model: model, words: wordSet, wordsByFirstCharacter: wordsByFirstCharacter}, nil
+}
+
+func (dictionary *Dictionary) IsWord(token string) bool {
+ token = normalizeToken(token)
+
+ if token == "" || dictionary == nil {
+ return false
+ }
+
+ return dictionary.isLexiconWord(token)
+}
+
+func (dictionary *Dictionary) Suggest(token string) string {
+ token = normalizeToken(token)
+
+ if token == "" || dictionary == nil || dictionary.model == nil {
+ return ""
+ }
+
+ if dictionary.isLexiconWord(token) {
+ return ""
+ }
+
+ suggestions := dictionary.model.SpellCheckSuggestions(token, 1)
+
+ if len(suggestions) == 0 {
+ return ""
+ }
+
+ if suggestions[0] == token {
+ return ""
+ }
+
+ return suggestions[0]
+}
+
+func (dictionary *Dictionary) isLexiconWord(token string) bool {
+ if dictionary == nil {
+ return false
+ }
+
+ if _, ok := dictionary.words[token]; ok {
+ return true
+ }
+
+ candidates := make([]string, 0, 16)
+ candidates = append(candidates, inflectionCandidates(token)...)
+ candidates = append(candidates, spellingVariantCandidates(token)...)
+
+ for _, candidate := range inflectionCandidates(token) {
+ candidates = append(candidates, spellingVariantCandidates(candidate)...)
+ }
+
+ uniqueCandidates := make(map[string]struct{}, len(candidates))
+
+ for _, candidate := range candidates {
+ if candidate == "" || candidate == token {
+ continue
+ }
+
+ if _, seen := uniqueCandidates[candidate]; seen {
+ continue
+ }
+
+ uniqueCandidates[candidate] = struct{}{}
+
+ if _, ok := dictionary.words[candidate]; ok {
+ return true
+ }
+ }
+
+ return false
+}
+
+func (dictionary *Dictionary) AbbreviationExpansion(token string) (string, bool) {
+ token = normalizeToken(token)
+
+ if token == "" || dictionary == nil {
+ return "", false
+ }
+
+ tokenLength := utf8.RuneCountInString(token)
+
+ if tokenLength <= 1 || tokenLength > 4 {
+ return "", false
+ }
+
+ firstCharacter, _ := utf8.DecodeRuneInString(token)
+ candidates := dictionary.wordsByFirstCharacter[firstCharacter]
+
+ if len(candidates) == 0 {
+ return "", false
+ }
+
+ bestCandidate := ""
+ bestScore := 1 << 30
+
+ for _, candidate := range candidates {
+ if !isLikelyAbbreviationForToken(token, candidate) {
+ continue
+ }
+
+ score := abbreviationScore(token, candidate)
+
+ if score < bestScore {
+ bestScore = score
+ bestCandidate = candidate
+ }
+ }
+
+ if bestCandidate == "" {
+ return "", false
+ }
+
+ return bestCandidate, true
+}
+
+func isLikelyAbbreviationForToken(token string, candidate string) bool {
+ if candidate == "" || token == "" || token == candidate {
+ return false
+ }
+
+ tokenLength := utf8.RuneCountInString(token)
+ candidateLength := utf8.RuneCountInString(candidate)
+
+ if candidateLength <= tokenLength {
+ return false
+ }
+
+ if !isSubsequence(token, candidate) {
+ return false
+ }
+
+ if strings.HasPrefix(candidate, token) && tokenLength <= 4 {
+ return true
+ }
+
+ tokenConsonants := consonantSkeleton(token)
+ candidateConsonants := consonantSkeleton(candidate)
+
+ if tokenConsonants == "" || candidateConsonants == "" {
+ return false
+ }
+
+ if isSubsequence(tokenConsonants, candidateConsonants) && tokenLength <= 5 {
+ return true
+ }
+
+ return false
+}
+
+func abbreviationScore(token string, candidate string) int {
+ tokenLength := utf8.RuneCountInString(token)
+ candidateLength := utf8.RuneCountInString(candidate)
+ lengthGap := max(candidateLength-tokenLength, 0)
+ score := lengthGap * 10
+
+ if strings.HasPrefix(candidate, token) {
+ score -= 3
+ }
+
+ return score
+}
+
+func isSubsequence(shorter string, longer string) bool {
+ shorterRunes := []rune(shorter)
+ longerRunes := []rune(longer)
+ shorterIndex := 0
+
+ for _, character := range longerRunes {
+ if shorterIndex >= len(shorterRunes) {
+ break
+ }
+
+ if shorterRunes[shorterIndex] == character {
+ shorterIndex++
+ }
+ }
+
+ return shorterIndex == len(shorterRunes)
+}
+
+func consonantSkeleton(word string) string {
+ var builder strings.Builder
+
+ for _, character := range word {
+ switch character {
+ case 'a', 'e', 'i', 'o', 'u':
+ continue
+ default:
+ builder.WriteRune(character)
+ }
+ }
+
+ return builder.String()
+}
+
+func inflectionCandidates(token string) []string {
+ candidates := make([]string, 0, 8)
+
+ if strings.HasSuffix(token, "ies") && len(token) > 3 {
+ candidates = append(candidates, token[:len(token)-3]+"y")
+ }
+
+ if strings.HasSuffix(token, "es") && len(token) > 2 {
+ candidates = append(candidates, token[:len(token)-2])
+ }
+
+ if strings.HasSuffix(token, "s") && len(token) > 1 {
+ candidates = append(candidates, token[:len(token)-1])
+ }
+
+ if strings.HasSuffix(token, "ed") && len(token) > 2 {
+ candidateWithoutSuffix := token[:len(token)-2]
+ candidates = append(candidates, candidateWithoutSuffix)
+ candidates = append(candidates, candidateWithoutSuffix+"e")
+
+ if len(candidateWithoutSuffix) >= 2 {
+ lastCharacter := candidateWithoutSuffix[len(candidateWithoutSuffix)-1]
+ secondToLastCharacter := candidateWithoutSuffix[len(candidateWithoutSuffix)-2]
+
+ if lastCharacter == secondToLastCharacter {
+ candidates = append(candidates, candidateWithoutSuffix[:len(candidateWithoutSuffix)-1])
+ }
+ }
+ }
+
+ if strings.HasSuffix(token, "ing") && len(token) > 3 {
+ candidateWithoutSuffix := token[:len(token)-3]
+ candidates = append(candidates, candidateWithoutSuffix)
+ candidates = append(candidates, candidateWithoutSuffix+"e")
+ }
+
+ if strings.HasSuffix(token, "er") && len(token) > 2 {
+ candidateWithoutSuffix := token[:len(token)-2]
+ candidates = append(candidates, candidateWithoutSuffix)
+ candidates = append(candidates, candidateWithoutSuffix+"e")
+
+ if len(candidateWithoutSuffix) >= 2 {
+ lastCharacter := candidateWithoutSuffix[len(candidateWithoutSuffix)-1]
+ secondToLastCharacter := candidateWithoutSuffix[len(candidateWithoutSuffix)-2]
+
+ if lastCharacter == secondToLastCharacter {
+ candidates = append(candidates, candidateWithoutSuffix[:len(candidateWithoutSuffix)-1])
+ }
+ }
+ }
+
+ if strings.HasSuffix(token, "ize") && len(token) > 3 {
+ candidates = append(candidates, token[:len(token)-3])
+ }
+
+ if strings.HasSuffix(token, "ized") && len(token) > 4 {
+ candidates = append(candidates, token[:len(token)-4])
+ }
+
+ if strings.HasSuffix(token, "izing") && len(token) > 5 {
+ candidates = append(candidates, token[:len(token)-5])
+ }
+
+ if strings.HasSuffix(token, "izer") && len(token) > 4 {
+ candidates = append(candidates, token[:len(token)-4])
+ }
+
+ if strings.HasSuffix(token, "ization") && len(token) > 7 {
+ candidates = append(candidates, token[:len(token)-7])
+ }
+
+ return candidates
+}
+
+func spellingVariantCandidates(token string) []string {
+ candidates := make([]string, 0, 8)
+
+ appendSuffixVariant(&candidates, token, "isation", "ization")
+ appendSuffixVariant(&candidates, token, "ization", "isation")
+ appendSuffixVariant(&candidates, token, "ising", "izing")
+ appendSuffixVariant(&candidates, token, "izing", "ising")
+ appendSuffixVariant(&candidates, token, "ised", "ized")
+ appendSuffixVariant(&candidates, token, "ized", "ised")
+ appendSuffixVariant(&candidates, token, "iser", "izer")
+ appendSuffixVariant(&candidates, token, "izer", "iser")
+ appendSuffixVariant(&candidates, token, "ise", "ize")
+ appendSuffixVariant(&candidates, token, "ize", "ise")
+ appendSuffixVariant(&candidates, token, "our", "or")
+ appendSuffixVariant(&candidates, token, "or", "our")
+ appendSuffixVariant(&candidates, token, "tre", "ter")
+ appendSuffixVariant(&candidates, token, "ter", "tre")
+
+ return candidates
+}
+
+func appendSuffixVariant(candidates *[]string, token string, fromSuffix string, toSuffix string) {
+ if !strings.HasSuffix(token, fromSuffix) || len(token) <= len(fromSuffix) {
+ return
+ }
+
+ root := token[:len(token)-len(fromSuffix)]
+ *candidates = append(*candidates, root+toSuffix)
+}
+
+func makeWordSet(words []string) map[string]struct{} {
+ set := make(map[string]struct{}, len(words))
+
+ for _, word := range words {
+ set[word] = struct{}{}
+ }
+
+ return set
+}
+
+func makeWordsByFirstCharacter(words []string) map[rune][]string {
+ grouped := make(map[rune][]string)
+
+ for _, word := range words {
+ firstCharacter, size := utf8.DecodeRuneInString(word)
+
+ if firstCharacter == utf8.RuneError && size == 0 {
+ continue
+ }
+
+ grouped[firstCharacter] = append(grouped[firstCharacter], word)
+ }
+
+ for firstCharacter := range grouped {
+ sort.Strings(grouped[firstCharacter])
+ }
+
+ return grouped
+}
+
+func loadWords() ([]string, error) {
+ configuredDictionaryPaths := parseDictionaryPaths(os.Getenv("KIVIA_DICTIONARY_PATH"))
+
+ if len(configuredDictionaryPaths) > 0 {
+ words, err := loadWordsFromPaths(configuredDictionaryPaths, true)
+
+ if err != nil {
+ return nil, err
+ }
+
+ if len(words) == 0 {
+ return nil, errors.New("configured dictionary sources contain no usable words")
+ }
+
+ return words, nil
+ }
+
+ words, err := loadWordsFromPaths(defaultDictionaryPaths, false)
+
+ if err != nil {
+ return nil, err
+ }
+
+ if len(words) == 0 {
+ return nil, errors.New("no usable dictionary words found; set KIVIA_DICTIONARY_PATH")
+ }
+
+ return words, nil
+}
+
+func readWordsFromFile(filePath string) ([]string, error) {
+ file, err := os.Open(filePath)
+
+ if err != nil {
+ return nil, err
+ }
+
+ defer file.Close()
+
+ words := make([]string, 0, 1024)
+ scanner := bufio.NewScanner(file)
+ isSpellDictionaryFile := strings.EqualFold(path.Ext(filePath), ".dic")
+ lineNumber := 0
+
+ for scanner.Scan() {
+ lineNumber++
+
+ line := normalizeDictionaryLine(scanner.Text(), lineNumber, isSpellDictionaryFile)
+
+ if line == "" {
+ continue
+ }
+
+ words = append(words, line)
+ }
+
+ if err := scanner.Err(); err != nil {
+ return nil, err
+ }
+
+ return normalizeWords(words), nil
+}
+
+func parseDictionaryPaths(value string) []string {
+ trimmedValue := strings.TrimSpace(value)
+
+ if trimmedValue == "" {
+ return nil
+ }
+
+ expandedValue := strings.ReplaceAll(trimmedValue, ",", string(os.PathListSeparator))
+ parts := strings.Split(expandedValue, string(os.PathListSeparator))
+ paths := make([]string, 0, len(parts))
+
+ for _, entry := range parts {
+ candidate := strings.TrimSpace(entry)
+
+ if candidate == "" {
+ continue
+ }
+
+ paths = append(paths, candidate)
+ }
+
+ return paths
+}
+
+func loadWordsFromPaths(paths []string, strict bool) ([]string, error) {
+ combinedWords := make([]string, 0, 4096)
+
+ for _, dictionaryPath := range paths {
+ words, err := readWordsFromFile(dictionaryPath)
+
+ if err != nil {
+ if strict {
+ return nil, fmt.Errorf("failed to read dictionary %q: %w", dictionaryPath, err)
+ }
+
+ continue
+ }
+
+ combinedWords = append(combinedWords, words...)
+ }
+
+ return normalizeWords(combinedWords), nil
+}
+
+func normalizeDictionaryLine(line string, lineNumber int, isSpellDictionaryFile bool) string {
+ trimmedLine := strings.TrimSpace(line)
+
+ if trimmedLine == "" || strings.HasPrefix(trimmedLine, "#") {
+ return ""
+ }
+
+ if isSpellDictionaryFile && lineNumber == 1 {
+ if _, err := strconv.Atoi(trimmedLine); err == nil {
+ return ""
+ }
+ }
+
+ if slashIndex := strings.Index(trimmedLine, "/"); slashIndex >= 0 {
+ trimmedLine = trimmedLine[:slashIndex]
+ }
+
+ return trimmedLine
+}
+
+func normalizeWords(words []string) []string {
+ unique := make(map[string]struct{}, len(words))
+
+ for _, word := range words {
+ normalized := normalizeToken(word)
+
+ if normalized == "" {
+ continue
+ }
+
+ if len(normalized) <= 1 {
+ continue
+ }
+
+ unique[normalized] = struct{}{}
+ }
+
+ output := make([]string, 0, len(unique))
+
+ for word := range unique {
+ output = append(output, word)
+ }
+
+ sort.Strings(output)
+
+ return output
+}
+
+func normalizeToken(token string) string {
+ token = strings.ToLower(strings.TrimSpace(token))
+
+ if token == "" {
+ return ""
+ }
+
+ match := wordPattern.FindString(token)
+
+ if match == "" {
+ return ""
+ }
+
+ return match
+}
+
+func cachePath() (string, error) {
+ base, err := os.UserCacheDir()
+
+ if err != nil {
+ return "", err
+ }
+
+ return filepath.Join(base, "kivia", "fuzzy_model_v1.json"), nil
+}
+
+func loadCachedModel() (*fuzzy.Model, error) {
+ path, err := cachePath()
+
+ if err != nil {
+ return nil, err
+ }
+
+ model, err := fuzzy.Load(path)
+
+ if err != nil {
+ return nil, err
+ }
+
+ return model, nil
+}
+
+func saveCachedModel(model *fuzzy.Model) error {
+ if model == nil {
+ return errors.New("Model cannot be nil.")
+ }
+
+ path, err := cachePath()
+
+ if err != nil {
+ return err
+ }
+
+ if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
+ return err
+ }
+
+ return model.Save(path)
+}
+
+var defaultDictionaryPaths = []string{
+ "/usr/share/dict/words",
+ "/usr/dict/words",
+ "/usr/share/dict/web2",
+ "/usr/share/dict/web2a",
+ "/usr/share/dict/propernames",
+ "/usr/share/dict/connectives",
+ "/usr/share/hunspell/en_US.dic",
+ "/usr/share/hunspell/en_GB.dic",
+ "/usr/share/hunspell/en_CA.dic",
+ "/usr/share/hunspell/en_AU.dic",
+ "/usr/share/myspell/en_US.dic",
+ "/usr/share/myspell/en_GB.dic",
+ "/opt/homebrew/share/hunspell/en_US.dic",
+ "/opt/homebrew/share/hunspell/en_GB.dic",
+ "/usr/local/share/hunspell/en_US.dic",
+ "/usr/local/share/hunspell/en_GB.dic",
+}
diff --git a/internal/nlp/dictionary_test.go b/internal/nlp/dictionary_test.go
new file mode 100644
index 0000000..c24e332
--- /dev/null
+++ b/internal/nlp/dictionary_test.go
@@ -0,0 +1,165 @@
+package nlp_test
+
+import (
+ "github.com/Fuwn/kivia/internal/nlp"
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+)
+
+func TestDictionaryRecognizesLexiconWords(testingContext *testing.T) {
+ dictionaryFile := filepath.Join("..", "..", "testdata", "dictionary", "words.txt")
+
+ testingContext.Setenv("KIVIA_DICTIONARY_PATH", dictionaryFile)
+
+ dictionary, err := nlp.NewDictionary()
+
+ if err != nil {
+ testingContext.Fatalf("NewDictionary returned an error: %v", err)
+ }
+
+ if !dictionary.IsWord("options") {
+ testingContext.Fatalf("Expected options to be recognized.")
+ }
+
+ if !dictionary.IsWord("has") {
+ testingContext.Fatalf("Expected has to be recognized.")
+ }
+
+ if !dictionary.IsWord("resources") {
+ testingContext.Fatalf("Expected resources to be recognized through plural inflection.")
+ }
+}
+
+func TestDictionaryFindsAbbreviationExpansions(testingContext *testing.T) {
+ dictionaryFile := filepath.Join("..", "..", "testdata", "dictionary", "words.txt")
+
+ testingContext.Setenv("KIVIA_DICTIONARY_PATH", dictionaryFile)
+
+ dictionary, err := nlp.NewDictionary()
+
+ if err != nil {
+ testingContext.Fatalf("NewDictionary returned an error: %v", err)
+ }
+
+ cases := map[string]string{
+ "expr": "expression",
+ "ctx": "context",
+ "err": "error",
+ }
+
+ for token, expectedExpansion := range cases {
+ expansion, ok := dictionary.AbbreviationExpansion(token)
+
+ if !ok {
+ testingContext.Fatalf("Expected an abbreviation expansion for %q.", token)
+ }
+
+ if expansion != expectedExpansion {
+ testingContext.Fatalf("Expected %q to expand to %q, got %q.", token, expectedExpansion, expansion)
+ }
+ }
+}
+
+func TestDictionaryLoadsFromMultipleDictionaryFiles(testingContext *testing.T) {
+ tempDirectory := testingContext.TempDir()
+ firstDictionaryPath := filepath.Join(tempDirectory, "first.txt")
+ secondDictionaryPath := filepath.Join(tempDirectory, "second.txt")
+ combinedPathList := strings.Join([]string{firstDictionaryPath, secondDictionaryPath}, string(os.PathListSeparator))
+
+ if err := os.WriteFile(firstDictionaryPath, []byte("alpha\n"), 0o644); err != nil {
+ testingContext.Fatalf("os.WriteFile returned an error: %v", err)
+ }
+
+ if err := os.WriteFile(secondDictionaryPath, []byte("beta\n"), 0o644); err != nil {
+ testingContext.Fatalf("os.WriteFile returned an error: %v", err)
+ }
+
+ testingContext.Setenv("KIVIA_DICTIONARY_PATH", combinedPathList)
+
+ dictionary, err := nlp.NewDictionary()
+
+ if err != nil {
+ testingContext.Fatalf("NewDictionary returned an error: %v", err)
+ }
+
+ if !dictionary.IsWord("alpha") {
+ testingContext.Fatalf("Expected alpha to be recognized.")
+ }
+
+ if !dictionary.IsWord("beta") {
+ testingContext.Fatalf("Expected beta to be recognized.")
+ }
+}
+
+func TestDictionaryFailsWhenConfiguredPathHasNoWords(testingContext *testing.T) {
+ tempDirectory := testingContext.TempDir()
+ emptyDictionaryPath := filepath.Join(tempDirectory, "empty.txt")
+
+ if err := os.WriteFile(emptyDictionaryPath, []byte("\n"), 0o644); err != nil {
+ testingContext.Fatalf("os.WriteFile returned an error: %v", err)
+ }
+
+ testingContext.Setenv("KIVIA_DICTIONARY_PATH", emptyDictionaryPath)
+
+ _, err := nlp.NewDictionary()
+
+ if err == nil {
+ testingContext.Fatalf("Expected NewDictionary to fail when configured dictionary has no usable words.")
+ }
+}
+
+func TestDictionaryRecognizesDerivedForms(testingContext *testing.T) {
+ tempDirectory := testingContext.TempDir()
+ dictionaryPath := filepath.Join(tempDirectory, "base_words.txt")
+
+ if err := os.WriteFile(dictionaryPath, []byte("trim\ntoken\n"), 0o644); err != nil {
+ testingContext.Fatalf("os.WriteFile returned an error: %v", err)
+ }
+
+ testingContext.Setenv("KIVIA_DICTIONARY_PATH", dictionaryPath)
+
+ dictionary, err := nlp.NewDictionary()
+
+ if err != nil {
+ testingContext.Fatalf("NewDictionary returned an error: %v", err)
+ }
+
+ if !dictionary.IsWord("trimmed") {
+ testingContext.Fatalf("Expected trimmed to be recognized from trim.")
+ }
+
+ if !dictionary.IsWord("tokenize") {
+ testingContext.Fatalf("Expected tokenize to be recognized from token.")
+ }
+}
+
+func TestDictionaryRecognizesBritishAndAmericanVariants(testingContext *testing.T) {
+ tempDirectory := testingContext.TempDir()
+ dictionaryPath := filepath.Join(tempDirectory, "british_words.txt")
+
+ if err := os.WriteFile(dictionaryPath, []byte("normalise\ncolour\ncentre\n"), 0o644); err != nil {
+ testingContext.Fatalf("os.WriteFile returned an error: %v", err)
+ }
+
+ testingContext.Setenv("KIVIA_DICTIONARY_PATH", dictionaryPath)
+
+ dictionary, err := nlp.NewDictionary()
+
+ if err != nil {
+ testingContext.Fatalf("NewDictionary returned an error: %v", err)
+ }
+
+ if !dictionary.IsWord("normalize") {
+ testingContext.Fatalf("Expected normalize to be recognized from normalise.")
+ }
+
+ if !dictionary.IsWord("color") {
+ testingContext.Fatalf("Expected color to be recognized from colour.")
+ }
+
+ if !dictionary.IsWord("center") {
+ testingContext.Fatalf("Expected center to be recognized from centre.")
+ }
+}
diff --git a/internal/report/report.go b/internal/report/report.go
new file mode 100644
index 0000000..a97039e
--- /dev/null
+++ b/internal/report/report.go
@@ -0,0 +1,80 @@
+package report
+
+import (
+ "encoding/json"
+ "fmt"
+ "github.com/Fuwn/kivia/internal/analyze"
+ "github.com/Fuwn/kivia/internal/collect"
+ "io"
+ "strings"
+)
+
+func Render(writer io.Writer, result analyze.Result, format string, includeContext bool) error {
+ switch strings.ToLower(format) {
+ case "json":
+ return renderJSON(writer, result, includeContext)
+ case "text", "":
+ return renderText(writer, result, includeContext)
+ default:
+ return fmt.Errorf("Unsupported output format %q. Use \"text\" or \"json\".", format)
+ }
+}
+
+func renderText(writer io.Writer, result analyze.Result, includeContext bool) error {
+ if len(result.Violations) == 0 {
+ _, err := fmt.Fprintln(writer, "No naming violations found.")
+
+ return err
+ }
+
+ for _, violation := range result.Violations {
+ if _, err := fmt.Fprintf(writer, "%s:%d:%d %s %q: %s\n",
+ violation.Identifier.File,
+ violation.Identifier.Line,
+ violation.Identifier.Column,
+ violation.Identifier.Kind,
+ violation.Identifier.Name,
+ violation.Reason,
+ ); err != nil {
+ return err
+ }
+
+ if includeContext {
+ contextParts := make([]string, 0, 3)
+
+ if violation.Identifier.Context.Type != "" {
+ contextParts = append(contextParts, "type="+violation.Identifier.Context.Type)
+ }
+
+ if violation.Identifier.Context.ValueExpression != "" {
+ contextParts = append(contextParts, "value="+violation.Identifier.Context.ValueExpression)
+ }
+
+ if violation.Identifier.Context.EnclosingFunction != "" {
+ contextParts = append(contextParts, "function="+violation.Identifier.Context.EnclosingFunction)
+ }
+
+ if len(contextParts) > 0 {
+ if _, err := fmt.Fprintf(writer, " context: %s\n", strings.Join(contextParts, ", ")); err != nil {
+ return err
+ }
+ }
+ }
+ }
+
+ return nil
+}
+
+func renderJSON(writer io.Writer, result analyze.Result, includeContext bool) error {
+ if !includeContext {
+ for index := range result.Violations {
+ result.Violations[index].Identifier.Context = collect.Context{}
+ }
+ }
+
+ encoder := json.NewEncoder(writer)
+
+ encoder.SetIndent("", " ")
+
+ return encoder.Encode(result)
+}