aboutsummaryrefslogtreecommitdiff
path: root/src/zenutil/suggest.cpp
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2026-04-22 10:36:24 +0200
committerGitHub Enterprise <[email protected]>2026-04-22 10:36:24 +0200
commit213e53c4d603c51037f43c86b16fd90d2ac48c4a (patch)
tree9bdc5374d5828a69cfd0a7b473c924461a97c2d8 /src/zenutil/suggest.cpp
parentfix NamedEvent ftok() race on Linux/macOS (#1001) (diff)
downloadarchived-zen-213e53c4d603c51037f43c86b16fd90d2ac48c4a.tar.xz
archived-zen-213e53c4d603c51037f43c86b16fd90d2ac48c4a.zip
zen CLI: suggest similar commands on typos (#1000)
Surface "did you mean?" suggestions when the `zen` CLI is invoked with an unknown command or subcommand, so users don't have to dig through `zen --help` every time they mistype. ``` $ zen stauts Unknown command specified: 'stauts' The most similar commands are: status Run 'zen --help' for the full list of commands. ``` ``` $ zen cache statz Unknown subcommand: 'statz' The most similar subcommands are: stats ``` ## Algorithm - Damerau-Levenshtein edit distance with case-insensitive ASCII comparison — handles insertions, deletions, substitutions, and adjacent transpositions (e.g. `versoin` → `version`). - Small prefix-match bonus so short inputs like `ca` still surface longer commands like `cache` without having to relax the distance threshold to the point where it admits noise. - Distance threshold scales with input length (`clamp(len/2, 1, 3)`). Very short inputs rely on the prefix bonus; longer inputs tolerate up to three edits. - Top 5 results by distance, stable-sorted. - Hidden commands (deprecated shims like `cache-stats`) are excluded from the candidate set so we don't advertise them.
Diffstat (limited to 'src/zenutil/suggest.cpp')
-rw-r--r--src/zenutil/suggest.cpp230
1 files changed, 230 insertions, 0 deletions
diff --git a/src/zenutil/suggest.cpp b/src/zenutil/suggest.cpp
new file mode 100644
index 000000000..15582e4f8
--- /dev/null
+++ b/src/zenutil/suggest.cpp
@@ -0,0 +1,230 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zenutil/suggest.h>
+
+#include <zencore/string.h>
+
+#if ZEN_WITH_TESTS
+# include <zencore/testing.h>
+# include <ostream> // needed for doctest to stringify std::string_view on CHECK failure
+#endif
+
+#include <algorithm>
+
+namespace zen {
+
+namespace {
+
+ constexpr size_t kMaxSuggestionInputLen = 64;
+
+ // Damerau-Levenshtein distance with case-insensitive ASCII comparison. Returns the
+ // raw edit count (insertions, deletions, substitutions, adjacent transpositions).
+ // Falls back to max(len) for pathologically long inputs so the scratch buffers can
+ // stay stack-allocated.
+ size_t ComputeEditDistance(std::string_view Source, std::string_view Target)
+ {
+ if (Source.size() > kMaxSuggestionInputLen || Target.size() > kMaxSuggestionInputLen)
+ {
+ return std::max(Source.size(), Target.size());
+ }
+
+ auto AsciiToLower = [](unsigned char Ch) -> unsigned char {
+ return (Ch >= 'A' && Ch <= 'Z') ? static_cast<unsigned char>(Ch + ('a' - 'A')) : Ch;
+ };
+
+ const size_t SourceLen = Source.size();
+ const size_t TargetLen = Target.size();
+ if (SourceLen == 0)
+ {
+ return TargetLen;
+ }
+ if (TargetLen == 0)
+ {
+ return SourceLen;
+ }
+
+ size_t Buf0[kMaxSuggestionInputLen + 1];
+ size_t Buf1[kMaxSuggestionInputLen + 1];
+ size_t Buf2[kMaxSuggestionInputLen + 1];
+ size_t* PrevPrev = Buf0;
+ size_t* Prev = Buf1;
+ size_t* Curr = Buf2;
+
+ for (size_t j = 0; j <= TargetLen; ++j)
+ {
+ Prev[j] = j;
+ }
+
+ for (size_t i = 1; i <= SourceLen; ++i)
+ {
+ Curr[0] = i;
+ for (size_t j = 1; j <= TargetLen; ++j)
+ {
+ const size_t Cost = (AsciiToLower(Source[i - 1]) == AsciiToLower(Target[j - 1])) ? 0 : 1;
+ Curr[j] = std::min({Prev[j] + 1, // deletion from Source
+ Curr[j - 1] + 1, // insertion into Source
+ Prev[j - 1] + Cost}); // substitution
+ if (i > 1 && j > 1 && AsciiToLower(Source[i - 1]) == AsciiToLower(Target[j - 2]) &&
+ AsciiToLower(Source[i - 2]) == AsciiToLower(Target[j - 1]))
+ {
+ Curr[j] = std::min(Curr[j], PrevPrev[j - 2] + 1); // transposition
+ }
+ }
+ // Rotate buffers: (PrevPrev, Prev, Curr) -> (Prev, Curr, PrevPrev)
+ size_t* Tmp = PrevPrev;
+ PrevPrev = Prev;
+ Prev = Curr;
+ Curr = Tmp;
+ }
+ return Prev[TargetLen];
+ }
+
+} // namespace
+
+std::vector<std::string_view>
+SuggestSimilarCommands(std::string_view Typed, std::span<const std::string_view> Candidates)
+{
+ constexpr size_t kMaxSuggestions = 5;
+
+ if (Typed.empty())
+ {
+ return {};
+ }
+
+ struct Scored
+ {
+ std::string_view Name;
+ size_t Distance;
+ };
+
+ std::vector<Scored> Ranked;
+ Ranked.reserve(Candidates.size());
+
+ for (std::string_view Name : Candidates)
+ {
+ size_t Distance = ComputeEditDistance(Typed, Name);
+
+ // Prefix bonus so short inputs ("ca") still surface longer commands ("cache")
+ // without us having to use a distance threshold that would admit unrelated matches.
+ if (Name.size() >= Typed.size() && StrCaseCompare(Name.substr(0, Typed.size()), Typed) == 0)
+ {
+ Distance = std::min<size_t>(Distance, 1);
+ }
+
+ Ranked.push_back({Name, Distance});
+ }
+
+ // Scale with typed length: very short inputs get a tighter bound so we don't
+ // flood the output with every short command that happens to be 2 edits away.
+ // Short inputs rely on the prefix bonus instead. Cap at 3 for longer strings
+ // since beyond that the suggestions stop being plausible.
+ const size_t MaxDistance = std::clamp<size_t>(Typed.size() / 2, 1, 3);
+
+ std::stable_sort(Ranked.begin(), Ranked.end(), [](const Scored& Lhs, const Scored& Rhs) { return Lhs.Distance < Rhs.Distance; });
+
+ std::vector<std::string_view> Result;
+ for (const Scored& Entry : Ranked)
+ {
+ if (Entry.Distance > MaxDistance)
+ {
+ break;
+ }
+ Result.push_back(Entry.Name);
+ if (Result.size() >= kMaxSuggestions)
+ {
+ break;
+ }
+ }
+ return Result;
+}
+
+#if ZEN_WITH_TESTS
+
+void
+suggest_forcelink()
+{
+}
+
+TEST_SUITE_BEGIN("util.suggest");
+
+TEST_CASE("transposition_is_distance_one")
+{
+ // "stauts" -> "status" is a single adjacent transposition under Damerau-Levenshtein.
+ const std::string_view Candidates[] = {"status", "start", "stop"};
+ std::vector<std::string_view> Result = SuggestSimilarCommands("stauts", Candidates);
+
+ REQUIRE(!Result.empty());
+ CHECK(Result.front() == "status");
+}
+
+TEST_CASE("short_prefix_surfaces_longer_candidate")
+{
+ // "ca" is raw distance 3 from "cache", outside the default threshold for a 2-char
+ // input. The prefix bonus is what brings it back into the result set.
+ const std::string_view Candidates[] = {"cache", "status", "version"};
+ std::vector<std::string_view> Result = SuggestSimilarCommands("ca", Candidates);
+
+ REQUIRE(!Result.empty());
+ CHECK(Result.front() == "cache");
+}
+
+TEST_CASE("gibberish_returns_no_suggestions")
+{
+ // Nothing in the candidate set is plausibly close.
+ const std::string_view Candidates[] = {"cache", "status", "version"};
+ std::vector<std::string_view> Result = SuggestSimilarCommands("xyzzyqqqq", Candidates);
+
+ CHECK(Result.empty());
+}
+
+TEST_CASE("empty_input_returns_no_suggestions")
+{
+ const std::string_view Candidates[] = {"cache", "status"};
+ std::vector<std::string_view> Result = SuggestSimilarCommands("", Candidates);
+
+ CHECK(Result.empty());
+}
+
+TEST_CASE("case_insensitive_match")
+{
+ const std::string_view Candidates[] = {"cache", "status"};
+ std::vector<std::string_view> Result = SuggestSimilarCommands("STAUTS", Candidates);
+
+ REQUIRE(!Result.empty());
+ CHECK(Result.front() == "status");
+}
+
+TEST_CASE("substitution_within_threshold")
+{
+ // "versoin" -> "version": one transposition. Within threshold for a 7-char input.
+ const std::string_view Candidates[] = {"version", "verify", "value"};
+ std::vector<std::string_view> Result = SuggestSimilarCommands("versoin", Candidates);
+
+ REQUIRE(!Result.empty());
+ CHECK(Result.front() == "version");
+}
+
+TEST_CASE("results_sorted_by_distance")
+{
+ // "stats" is exact match; "stat" and "start" are both distance 1. Best-first ordering.
+ const std::string_view Candidates[] = {"start", "stat", "stats"};
+ std::vector<std::string_view> Result = SuggestSimilarCommands("stats", Candidates);
+
+ REQUIRE(Result.size() >= 1);
+ CHECK(Result.front() == "stats");
+}
+
+TEST_CASE("caps_at_five_results")
+{
+ // Eight prefix-matching candidates; only the first five should survive the cap.
+ const std::string_view Candidates[] = {"ca1", "ca2", "ca3", "ca4", "ca5", "ca6", "ca7", "ca8"};
+ std::vector<std::string_view> Result = SuggestSimilarCommands("ca", Candidates);
+
+ CHECK(Result.size() == 5);
+}
+
+TEST_SUITE_END();
+
+#endif // ZEN_WITH_TESTS
+
+} // namespace zen