From d6a2b1c247778e1bf2a847adba230ad20f44d21d Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Mon, 24 Nov 2025 10:32:52 +0100 Subject: Add regex-free route matching support (#662) This change adds support for non-regex matching of routes. Instead of using regex patterns you can associate matcher functions with pattern names and string literal components are identified and matched directly. Also implemented tests for `HttpRequestRouter` class. --- src/zenhttp/httpserver.cpp | 410 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 393 insertions(+), 17 deletions(-) (limited to 'src/zenhttp/httpserver.cpp') diff --git a/src/zenhttp/httpserver.cpp b/src/zenhttp/httpserver.cpp index f48c22367..b28682375 100644 --- a/src/zenhttp/httpserver.cpp +++ b/src/zenhttp/httpserver.cpp @@ -698,24 +698,124 @@ void HttpRequestRouter::AddPattern(const char* Id, const char* Regex) { ZEN_ASSERT(m_PatternMap.find(Id) == m_PatternMap.end()); + ZEN_ASSERT(!m_IsFinalized); m_PatternMap.insert({Id, Regex}); } void -HttpRequestRouter::RegisterRoute(const char* Regex, HttpRequestRouter::HandlerFunc_t&& HandlerFunc, HttpVerb SupportedVerbs) +HttpRequestRouter::AddMatcher(const char* Id, std::function&& Matcher) { - ExtendableStringBuilder<128> ExpandedRegex; - ProcessRegexSubstitutions(Regex, ExpandedRegex); + ZEN_ASSERT(m_MatcherNameMap.find(Id) == m_MatcherNameMap.end()); + ZEN_ASSERT(!m_IsFinalized); - m_Handlers.emplace_back(ExpandedRegex.c_str(), SupportedVerbs, std::move(HandlerFunc), Regex); + const int MatcherIndex = gsl::narrow_cast(m_MatcherFunctions.size()); + m_MatcherFunctions.push_back(Matcher); + m_MatcherNameMap.insert({Id, MatcherIndex}); } void +HttpRequestRouter::RegisterRoute(const char* UriPattern, HttpRequestRouter::HandlerFunc_t&& HandlerFunc, HttpVerb SupportedVerbs) +{ + ZEN_ASSERT(!m_IsFinalized); + + if (ExtendableStringBuilder<128> ExpandedRegex; ProcessRegexSubstitutions(UriPattern, ExpandedRegex)) + { + // Regex route + m_RegexHandlers.emplace_back(ExpandedRegex.c_str(), SupportedVerbs, std::move(HandlerFunc), UriPattern); + } + else + { + // New-style regex-free route. More efficient and should be used for everything eventually + + int RegexLen = gsl::narrow_cast(strlen(UriPattern)); + + int i = 0; + + std::vector MatcherIndices; + + while (i < RegexLen) + { + if (UriPattern[i] == '{') + { + bool IsComplete = false; + int PatternStart = i + 1; + while (++i < RegexLen) + { + if (UriPattern[i] == '}') + { + std::string_view Pattern(&UriPattern[PatternStart], i - PatternStart); + if (auto it = m_MatcherNameMap.find(std::string(Pattern)); it != m_MatcherNameMap.end()) + { + // It's a match + MatcherIndices.push_back(it->second); + IsComplete = true; + ++i; + break; + } + else + { + throw std::runtime_error(fmt::format("unknown matcher pattern '{}' in URI pattern '{}'", Pattern, UriPattern)); + } + } + } + if (!IsComplete) + { + throw std::runtime_error(fmt::format("unterminated matcher pattern in URI pattern '{}'", UriPattern)); + } + } + else + { + if (UriPattern[i] == '/') + { + throw std::runtime_error(fmt::format("unexpected '/' in literal segment of URI pattern '{}'", UriPattern)); + } + + int SegmentStart = i; + while (++i < RegexLen && UriPattern[i] != '/') + ; + + std::string_view Segment(&UriPattern[SegmentStart], (i - SegmentStart)); + int LiteralIndex = gsl::narrow_cast(m_Literals.size()); + m_Literals.push_back(std::string(Segment)); + MatcherIndices.push_back(-1 - LiteralIndex); + } + + if (i < RegexLen && UriPattern[i] == '/') + { + ++i; // skip slash + } + } + + m_MatcherEndpoints.emplace_back(std::move(MatcherIndices), SupportedVerbs, std::move(HandlerFunc), UriPattern); + } +} + +std::string_view +HttpRouterRequest::GetCapture(uint32_t Index) const +{ + if (!m_CapturedSegments.empty()) + { + ZEN_ASSERT(Index < m_CapturedSegments.size()); + return m_CapturedSegments[Index]; + } + + ZEN_ASSERT(Index < m_Match.size()); + + const auto& Match = m_Match[Index]; + + return std::string_view(&*Match.first, Match.second - Match.first); +} + +bool HttpRequestRouter::ProcessRegexSubstitutions(const char* Regex, StringBuilderBase& OutExpandedRegex) { size_t RegexLen = strlen(Regex); + bool HasRegex = false; + + std::vector UnknownPatterns; + for (size_t i = 0; i < RegexLen;) { bool matched = false; @@ -733,12 +833,11 @@ HttpRequestRouter::ProcessRegexSubstitutions(const char* Regex, StringBuilderBas if (auto it = m_PatternMap.find(Pattern); it != m_PatternMap.end()) { OutExpandedRegex.Append(it->second.c_str()); + HasRegex = true; } else { - // Default to anything goes (or should this just be an error?) - - OutExpandedRegex.Append("(.+?)"); + UnknownPatterns.push_back(Pattern); } // skip ahead @@ -756,17 +855,127 @@ HttpRequestRouter::ProcessRegexSubstitutions(const char* Regex, StringBuilderBas OutExpandedRegex.Append(Regex[i++]); } } + + if (HasRegex) + { + if (UnknownPatterns.size() > 0) + { + std::string UnknownList; + for (const auto& Pattern : UnknownPatterns) + { + if (!UnknownList.empty()) + { + UnknownList += ", "; + } + UnknownList += "'"; + UnknownList += Pattern; + UnknownList += "'"; + } + + throw std::runtime_error(fmt::format("unknown pattern(s) {} in regex route '{}'", UnknownList, Regex)); + } + + return true; + } + + return false; } bool HttpRequestRouter::HandleRequest(zen::HttpServerRequest& Request) { + if (!m_IsFinalized) + { + m_IsFinalized = true; + } + const HttpVerb Verb = Request.RequestVerb(); std::string_view Uri = Request.RelativeUri(); HttpRouterRequest RouterRequest(Request); - for (const auto& Handler : m_Handlers) + // First try new-style matcher routes + + for (const auto& Handler : m_MatcherEndpoints) + { + if ((Handler.Verbs & Verb) == Verb) + { + size_t UriPos = 0; + const size_t UriLen = Uri.length(); + const std::vector& Matchers = Handler.ComponentIndices; + bool IsMatch = true; + + std::vector CapturedSegments; + + CapturedSegments.emplace_back(Uri); + + for (int MatcherIndex : Matchers) + { + if (UriPos >= UriLen) + { + IsMatch = false; + break; + } + + if (MatcherIndex < 0) + { + // Literal match + int LitIndex = -MatcherIndex - 1; + const std::string& LitStr = m_Literals[LitIndex]; + size_t LitLen = LitStr.length(); + + if (Uri.substr(UriPos, LitLen) == LitStr) + { + UriPos += LitLen; + } + else + { + IsMatch = false; + break; + } + } + else + { + // Matcher function + size_t SegmentStart = UriPos; + while (UriPos < UriLen && Uri[UriPos] != '/') + { + ++UriPos; + } + + std::string_view Segment = Uri.substr(SegmentStart, UriPos - SegmentStart); + + if (m_MatcherFunctions[MatcherIndex](Segment)) + { + CapturedSegments.push_back(Segment); + } + else + { + IsMatch = false; + break; + } + } + + // Skip slash + if (UriPos < UriLen && Uri[UriPos] == '/') + { + ++UriPos; + } + } + + if (IsMatch && UriPos == UriLen) + { + RouterRequest.m_CapturedSegments = std::move(CapturedSegments); + Handler.Handler(RouterRequest); + + return true; // Route matched + } + } + } + + // Old-style regex routes + + for (const auto& Handler : m_RegexHandlers) { if ((Handler.Verbs & Verb) == Verb && regex_match(begin(Uri), end(Uri), RouterRequest.m_Match, Handler.RegEx)) { @@ -1023,22 +1232,189 @@ TEST_CASE("http.common") { using namespace std::literals; - SUBCASE("router") + struct TestHttpServerRequest : public HttpServerRequest + { + TestHttpServerRequest(std::string_view Uri) { m_Uri = Uri; } + virtual IoBuffer ReadPayload() override { return IoBuffer(); } + virtual void WriteResponse(HttpResponseCode ResponseCode, HttpContentType ContentType, std::span Blobs) override + { + ZEN_UNUSED(ResponseCode, ContentType, Blobs); + } + virtual void WriteResponse(HttpResponseCode ResponseCode) override { ZEN_UNUSED(ResponseCode); } + virtual void WriteResponse(HttpResponseCode ResponseCode, HttpContentType ContentType, std::u8string_view ResponseString) override + { + ZEN_UNUSED(ResponseCode, ContentType, ResponseString); + } + virtual void WriteResponseAsync(std::function&& ContinuationHandler) override + { + ZEN_UNUSED(ContinuationHandler); + } + virtual Oid ParseSessionId() const override { return Oid(); } + virtual uint32_t ParseRequestId() const override { return 0; } + }; + + SUBCASE("router-regex") + { + bool HandledA = false; + bool HandledAA = false; + std::vector Captures; + auto Reset = [&] { + Captures.clear(); + HandledA = HandledAA = false; + }; + + HttpRequestRouter r; + r.AddPattern("a", "([[:alpha:]]+)"); + r.RegisterRoute( + "{a}", + [&](auto& Req) { + HandledA = true; + Captures = {std::string(Req.GetCapture(1))}; + }, + HttpVerb::kGet); + + r.RegisterRoute( + "{a}/{a}", + [&](auto& Req) { + HandledAA = true; + Captures = {std::string(Req.GetCapture(1)), std::string(Req.GetCapture(2))}; + }, + HttpVerb::kGet); + + { + Reset(); + TestHttpServerRequest req{"abc"sv}; + r.HandleRequest(req); + CHECK(HandledA); + CHECK(!HandledAA); + REQUIRE_EQ(Captures.size(), 1); + CHECK_EQ(Captures[0], "abc"sv); + } + + { + Reset(); + TestHttpServerRequest req{"abc/def"sv}; + r.HandleRequest(req); + CHECK(!HandledA); + CHECK(HandledAA); + REQUIRE_EQ(Captures.size(), 2); + CHECK_EQ(Captures[0], "abc"sv); + CHECK_EQ(Captures[1], "def"sv); + } + + { + Reset(); + TestHttpServerRequest req{"123"sv}; + r.HandleRequest(req); + CHECK(!HandledA); + } + + { + Reset(); + TestHttpServerRequest req{"a123"sv}; + r.HandleRequest(req); + CHECK(!HandledA); + } + } + + SUBCASE("router-matcher") { + bool HandledA = false; + bool HandledAA = false; + bool HandledAB = false; + bool HandledAandB = false; + std::vector Captures; + auto Reset = [&] { + HandledA = HandledAA = HandledAB = HandledAandB = false; + Captures.clear(); + }; + HttpRequestRouter r; - r.AddPattern("a", "[[:alpha:]]+"); + r.AddMatcher("a", [](std::string_view In) -> bool { return In.length() % 2 == 0; }); + r.AddMatcher("b", [](std::string_view In) -> bool { return In.length() % 3 == 0; }); r.RegisterRoute( "{a}", - [&](auto) {}, + [&](auto& Req) { + HandledA = true; + Captures = {std::string(Req.GetCapture(1))}; + }, + HttpVerb::kGet); + r.RegisterRoute( + "{a}/{a}", + [&](auto& Req) { + HandledAA = true; + Captures = {std::string(Req.GetCapture(1)), std::string(Req.GetCapture(2))}; + }, + HttpVerb::kGet); + r.RegisterRoute( + "{a}/{b}", + [&](auto& Req) { + HandledAB = true; + Captures = {std::string(Req.GetCapture(1)), std::string(Req.GetCapture(2))}; + }, HttpVerb::kGet); + r.RegisterRoute( + "{a}/and/{b}", + [&](auto& Req) { + HandledAandB = true; + Captures = {std::string(Req.GetCapture(1)), std::string(Req.GetCapture(2))}; + }, + HttpVerb::kGet); + + { + Reset(); + TestHttpServerRequest req{"ab"sv}; + r.HandleRequest(req); + CHECK(HandledA); + CHECK(!HandledAA); + CHECK(!HandledAB); + + REQUIRE_EQ(Captures.size(), 1); + CHECK_EQ(Captures[0], "ab"sv); + } - // struct TestHttpServerRequest : public HttpServerRequest - //{ - // TestHttpServerRequest(std::string_view Uri) : m_uri{Uri} {} - //}; + { + Reset(); + TestHttpServerRequest req{"ab/def"sv}; + r.HandleRequest(req); + CHECK(!HandledA); + CHECK(!HandledAA); + CHECK(HandledAB); + REQUIRE_EQ(Captures.size(), 2); + CHECK_EQ(Captures[0], "ab"sv); + CHECK_EQ(Captures[1], "def"sv); + } + + { + Reset(); + TestHttpServerRequest req{"ab/and/def"sv}; + r.HandleRequest(req); + CHECK(!HandledA); + CHECK(!HandledAA); + CHECK(!HandledAB); + CHECK(HandledAandB); + REQUIRE_EQ(Captures.size(), 2); + CHECK_EQ(Captures[0], "ab"sv); + CHECK_EQ(Captures[1], "def"sv); + } - // TestHttpServerRequest req{}; - // r.HandleRequest(req); + { + Reset(); + TestHttpServerRequest req{"123"sv}; + r.HandleRequest(req); + CHECK(!HandledA); + CHECK(!HandledAA); + CHECK(!HandledAB); + } + + { + Reset(); + TestHttpServerRequest req{"a123"sv}; + r.HandleRequest(req); + CHECK(HandledA); + CHECK(!HandledAA); + CHECK(!HandledAB); + } } SUBCASE("content-type") -- cgit v1.2.3