diff options
| author | Stefan Boberg <[email protected]> | 2026-04-22 10:36:24 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2026-04-22 10:36:24 +0200 |
| commit | 213e53c4d603c51037f43c86b16fd90d2ac48c4a (patch) | |
| tree | 9bdc5374d5828a69cfd0a7b473c924461a97c2d8 /src/zenutil/suggest.cpp | |
| parent | fix NamedEvent ftok() race on Linux/macOS (#1001) (diff) | |
| download | archived-zen-213e53c4d603c51037f43c86b16fd90d2ac48c4a.tar.xz archived-zen-213e53c4d603c51037f43c86b16fd90d2ac48c4a.zip | |
zen CLI: suggest similar commands on typos (#1000)
Surface "did you mean?" suggestions when the `zen` CLI is invoked with an unknown command or subcommand, so users don't have to dig through `zen --help` every time they mistype.
```
$ zen stauts
Unknown command specified: 'stauts'
The most similar commands are:
status
Run 'zen --help' for the full list of commands.
```
```
$ zen cache statz
Unknown subcommand: 'statz'
The most similar subcommands are:
stats
```
## Algorithm
- Damerau-Levenshtein edit distance with case-insensitive ASCII comparison — handles insertions, deletions, substitutions, and adjacent transpositions (e.g. `versoin` → `version`).
- Small prefix-match bonus so short inputs like `ca` still surface longer commands like `cache` without having to relax the distance threshold to the point where it admits noise.
- Distance threshold scales with input length (`clamp(len/2, 1, 3)`). Very short inputs rely on the prefix bonus; longer inputs tolerate up to three edits.
- Top 5 results by distance, stable-sorted.
- Hidden commands (deprecated shims like `cache-stats`) are excluded from the candidate set so we don't advertise them.
Diffstat (limited to 'src/zenutil/suggest.cpp')
| -rw-r--r-- | src/zenutil/suggest.cpp | 230 |
1 files changed, 230 insertions, 0 deletions
diff --git a/src/zenutil/suggest.cpp b/src/zenutil/suggest.cpp new file mode 100644 index 000000000..15582e4f8 --- /dev/null +++ b/src/zenutil/suggest.cpp @@ -0,0 +1,230 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenutil/suggest.h> + +#include <zencore/string.h> + +#if ZEN_WITH_TESTS +# include <zencore/testing.h> +# include <ostream> // needed for doctest to stringify std::string_view on CHECK failure +#endif + +#include <algorithm> + +namespace zen { + +namespace { + + constexpr size_t kMaxSuggestionInputLen = 64; + + // Damerau-Levenshtein distance with case-insensitive ASCII comparison. Returns the + // raw edit count (insertions, deletions, substitutions, adjacent transpositions). + // Falls back to max(len) for pathologically long inputs so the scratch buffers can + // stay stack-allocated. + size_t ComputeEditDistance(std::string_view Source, std::string_view Target) + { + if (Source.size() > kMaxSuggestionInputLen || Target.size() > kMaxSuggestionInputLen) + { + return std::max(Source.size(), Target.size()); + } + + auto AsciiToLower = [](unsigned char Ch) -> unsigned char { + return (Ch >= 'A' && Ch <= 'Z') ? static_cast<unsigned char>(Ch + ('a' - 'A')) : Ch; + }; + + const size_t SourceLen = Source.size(); + const size_t TargetLen = Target.size(); + if (SourceLen == 0) + { + return TargetLen; + } + if (TargetLen == 0) + { + return SourceLen; + } + + size_t Buf0[kMaxSuggestionInputLen + 1]; + size_t Buf1[kMaxSuggestionInputLen + 1]; + size_t Buf2[kMaxSuggestionInputLen + 1]; + size_t* PrevPrev = Buf0; + size_t* Prev = Buf1; + size_t* Curr = Buf2; + + for (size_t j = 0; j <= TargetLen; ++j) + { + Prev[j] = j; + } + + for (size_t i = 1; i <= SourceLen; ++i) + { + Curr[0] = i; + for (size_t j = 1; j <= TargetLen; ++j) + { + const size_t Cost = (AsciiToLower(Source[i - 1]) == AsciiToLower(Target[j - 1])) ? 0 : 1; + Curr[j] = std::min({Prev[j] + 1, // deletion from Source + Curr[j - 1] + 1, // insertion into Source + Prev[j - 1] + Cost}); // substitution + if (i > 1 && j > 1 && AsciiToLower(Source[i - 1]) == AsciiToLower(Target[j - 2]) && + AsciiToLower(Source[i - 2]) == AsciiToLower(Target[j - 1])) + { + Curr[j] = std::min(Curr[j], PrevPrev[j - 2] + 1); // transposition + } + } + // Rotate buffers: (PrevPrev, Prev, Curr) -> (Prev, Curr, PrevPrev) + size_t* Tmp = PrevPrev; + PrevPrev = Prev; + Prev = Curr; + Curr = Tmp; + } + return Prev[TargetLen]; + } + +} // namespace + +std::vector<std::string_view> +SuggestSimilarCommands(std::string_view Typed, std::span<const std::string_view> Candidates) +{ + constexpr size_t kMaxSuggestions = 5; + + if (Typed.empty()) + { + return {}; + } + + struct Scored + { + std::string_view Name; + size_t Distance; + }; + + std::vector<Scored> Ranked; + Ranked.reserve(Candidates.size()); + + for (std::string_view Name : Candidates) + { + size_t Distance = ComputeEditDistance(Typed, Name); + + // Prefix bonus so short inputs ("ca") still surface longer commands ("cache") + // without us having to use a distance threshold that would admit unrelated matches. + if (Name.size() >= Typed.size() && StrCaseCompare(Name.substr(0, Typed.size()), Typed) == 0) + { + Distance = std::min<size_t>(Distance, 1); + } + + Ranked.push_back({Name, Distance}); + } + + // Scale with typed length: very short inputs get a tighter bound so we don't + // flood the output with every short command that happens to be 2 edits away. + // Short inputs rely on the prefix bonus instead. Cap at 3 for longer strings + // since beyond that the suggestions stop being plausible. + const size_t MaxDistance = std::clamp<size_t>(Typed.size() / 2, 1, 3); + + std::stable_sort(Ranked.begin(), Ranked.end(), [](const Scored& Lhs, const Scored& Rhs) { return Lhs.Distance < Rhs.Distance; }); + + std::vector<std::string_view> Result; + for (const Scored& Entry : Ranked) + { + if (Entry.Distance > MaxDistance) + { + break; + } + Result.push_back(Entry.Name); + if (Result.size() >= kMaxSuggestions) + { + break; + } + } + return Result; +} + +#if ZEN_WITH_TESTS + +void +suggest_forcelink() +{ +} + +TEST_SUITE_BEGIN("util.suggest"); + +TEST_CASE("transposition_is_distance_one") +{ + // "stauts" -> "status" is a single adjacent transposition under Damerau-Levenshtein. + const std::string_view Candidates[] = {"status", "start", "stop"}; + std::vector<std::string_view> Result = SuggestSimilarCommands("stauts", Candidates); + + REQUIRE(!Result.empty()); + CHECK(Result.front() == "status"); +} + +TEST_CASE("short_prefix_surfaces_longer_candidate") +{ + // "ca" is raw distance 3 from "cache", outside the default threshold for a 2-char + // input. The prefix bonus is what brings it back into the result set. + const std::string_view Candidates[] = {"cache", "status", "version"}; + std::vector<std::string_view> Result = SuggestSimilarCommands("ca", Candidates); + + REQUIRE(!Result.empty()); + CHECK(Result.front() == "cache"); +} + +TEST_CASE("gibberish_returns_no_suggestions") +{ + // Nothing in the candidate set is plausibly close. + const std::string_view Candidates[] = {"cache", "status", "version"}; + std::vector<std::string_view> Result = SuggestSimilarCommands("xyzzyqqqq", Candidates); + + CHECK(Result.empty()); +} + +TEST_CASE("empty_input_returns_no_suggestions") +{ + const std::string_view Candidates[] = {"cache", "status"}; + std::vector<std::string_view> Result = SuggestSimilarCommands("", Candidates); + + CHECK(Result.empty()); +} + +TEST_CASE("case_insensitive_match") +{ + const std::string_view Candidates[] = {"cache", "status"}; + std::vector<std::string_view> Result = SuggestSimilarCommands("STAUTS", Candidates); + + REQUIRE(!Result.empty()); + CHECK(Result.front() == "status"); +} + +TEST_CASE("substitution_within_threshold") +{ + // "versoin" -> "version": one transposition. Within threshold for a 7-char input. + const std::string_view Candidates[] = {"version", "verify", "value"}; + std::vector<std::string_view> Result = SuggestSimilarCommands("versoin", Candidates); + + REQUIRE(!Result.empty()); + CHECK(Result.front() == "version"); +} + +TEST_CASE("results_sorted_by_distance") +{ + // "stats" is exact match; "stat" and "start" are both distance 1. Best-first ordering. + const std::string_view Candidates[] = {"start", "stat", "stats"}; + std::vector<std::string_view> Result = SuggestSimilarCommands("stats", Candidates); + + REQUIRE(Result.size() >= 1); + CHECK(Result.front() == "stats"); +} + +TEST_CASE("caps_at_five_results") +{ + // Eight prefix-matching candidates; only the first five should survive the cap. + const std::string_view Candidates[] = {"ca1", "ca2", "ca3", "ca4", "ca5", "ca6", "ca7", "ca8"}; + std::vector<std::string_view> Result = SuggestSimilarCommands("ca", Candidates); + + CHECK(Result.size() == 5); +} + +TEST_SUITE_END(); + +#endif // ZEN_WITH_TESTS + +} // namespace zen |