From 768887011a72ad0fc00aaee66031ee36df21bdae Mon Sep 17 00:00:00 2001 From: Harpreet Sangar Date: Fri, 6 Oct 2023 16:30:43 +0530 Subject: [PATCH] Parse reference facet_by. --- include/collection.h | 2 + include/field.h | 2 + src/collection.cpp | 38 ++++++++++- src/string_utils.cpp | 32 ++++++++- test/collection_faceting_test.cpp | 107 ++++++++++++++++++++++++++++++ test/string_utils_test.cpp | 12 ++++ 6 files changed, 190 insertions(+), 3 deletions(-) diff --git a/include/collection.h b/include/collection.h index 920b4597e..720c6f169 100644 --- a/include/collection.h +++ b/include/collection.h @@ -526,6 +526,8 @@ class Collection { bool get_enable_nested_fields(); + Option parse_facet_with_lock(const std::string& facet_field, std::vector& facets) const; + Option parse_facet(const std::string& facet_field, std::vector& facets) const; // Override operations diff --git a/include/field.h b/include/field.h index 34664a62d..69fdb42d6 100644 --- a/include/field.h +++ b/include/field.h @@ -649,6 +649,8 @@ struct facet { std::string sort_field=""; + std::string reference_collection_name; + bool get_range(int64_t key, std::pair& range_pair) { if(facet_range_map.empty()) { LOG (ERROR) << "Facet range is not defined!!!"; diff --git a/src/collection.cpp b/src/collection.cpp index f699edad3..a4f22f5d8 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -5258,12 +5258,48 @@ bool Collection::get_enable_nested_fields() { return enable_nested_fields; } +Option Collection::parse_facet_with_lock(const std::string& facet_field, std::vector& facets) const { + std::shared_lock lock(mutex); + return parse_facet(facet_field, facets); +} + Option Collection::parse_facet(const std::string& facet_field, std::vector& facets) const { const std::regex base_pattern(".+\\(.*\\)"); const std::regex range_pattern("[[a-zA-Z]+:\\[([0-9]+)\\,\\s*([0-9]+)\\]"); const std::string _alpha = "_alpha"; - if ((facet_field.find(":") != std::string::npos) + if (facet_field[0] == '$') { // Reference facet_by + std::string error_message = "Error parsing reference facet: "; + auto open_paren_pos = facet_field.find('('); + if (open_paren_pos == std::string::npos) { + return Option(400, error_message + "`" + facet_field + "`."); + } + + std::string ref_collection_name = facet_field.substr(1, open_paren_pos - 1); + auto &cm = CollectionManager::get_instance(); + auto ref_collection = cm.get_collection(ref_collection_name); + if (ref_collection == nullptr) { + return Option(400, error_message + "Referenced collection `" + ref_collection_name + "` not found."); + } + + std::string ref_facet_expression = facet_field.substr(open_paren_pos + 1, facet_field.size() - open_paren_pos - 2); + std::vector ref_facet_strings; + StringUtils::split_facet(ref_facet_expression, ref_facet_strings); + + std::vector ref_facets; + for (const auto &ref_facet: ref_facet_strings) { + auto parse_op = ref_collection->parse_facet_with_lock(ref_facet, ref_facets); + if (!parse_op.ok()) { + return Option(parse_op.code(), error_message + parse_op.error()); + } + } + + for (auto &ref_facet: ref_facets) { + ref_facet.reference_collection_name = ref_collection_name; + facets.emplace_back(std::move(ref_facet)); + } + } + else if ((facet_field.find(":") != std::string::npos) && (facet_field.find("sort") == std::string::npos)) { //range based facet if (!std::regex_match(facet_field, base_pattern)) { diff --git a/src/string_utils.cpp b/src/string_utils.cpp index a9296cd5f..b845dc96d 100644 --- a/src/string_utils.cpp +++ b/src/string_utils.cpp @@ -543,16 +543,42 @@ Option StringUtils::split_include_fields(const std::string& include_fields size_t StringUtils::split_facet(const std::string &s, std::vector &result, const bool keep_empty, const size_t start_index, const size_t max_values) { - std::string::const_iterator substart = s.begin()+start_index, subend; size_t end_index = start_index; std::string delim(""), temp(""); std::string current_str=s; + trim(current_str); + while (true) { auto range_pos = current_str.find("("); auto normal_pos = current_str.find(","); - if(range_pos == std::string::npos && normal_pos == std::string::npos){ + if(current_str[0] == '$'){ // Reference facet_by + if(range_pos == std::string::npos){ + break; + } + + auto index = range_pos + 1; + int paren_count = 1; + while (++index < s.size() && paren_count > 0) { + if (s[index] == '(') { + paren_count++; + } else if (s[index] == ')') { + paren_count--; + } + } + + if (paren_count != 0) { + return 0; + } + + temp = delim = current_str.substr(0, index); + subend = substart + delim.size(); + + while (subend != s.end() && *(subend++) != ','); + delim.clear(); + } + else if(range_pos == std::string::npos && normal_pos == std::string::npos){ if(!current_str.empty()){ result.push_back(trim(current_str)); } @@ -584,6 +610,8 @@ size_t StringUtils::split_facet(const std::string &s, std::vector & break; } substart = subend + delim.size(); + while (*substart == ' ' && ++substart != s.end()); + current_str = std::string(substart, s.end()); } diff --git a/test/collection_faceting_test.cpp b/test/collection_faceting_test.cpp index 3d95a45f2..f0f8176c4 100644 --- a/test/collection_faceting_test.cpp +++ b/test/collection_faceting_test.cpp @@ -1248,6 +1248,113 @@ TEST_F(CollectionFacetingTest, FacetParseTest){ ASSERT_EQ("rank", mixed_facets_ptr[2]->field_name); ASSERT_EQ("range", mixed_facets_ptr[1]->field_name); + + fields = { + field("ref_score", field_types::INT32, true), + field("ref_grade", field_types::INT32, true), + field("ref_rank", field_types::INT32, true), + field("ref_range", field_types::INT32, true), + field("ref_scale", field_types::INT32, false), + }; + + collectionManager.create_collection("ref_coll", 1, fields).get(); + + range_facet_fields = { + "$ref_coll(ref_score(fail:[0, 40], pass:[40, 100]), ref_grade(A:[80, 100], B:[60, 80], C:[40, 60]))" + }; + range_facets.clear(); + for(const std::string & facet_field: range_facet_fields) { + coll1->parse_facet(facet_field, range_facets); + } + ASSERT_EQ(2, range_facets.size()); + + ASSERT_EQ("ref_score", range_facets[0].field_name); + ASSERT_TRUE(range_facets[0].is_range_query); + ASSERT_EQ(2, range_facets[0].facet_range_map.size()); + ASSERT_EQ("ref_coll", range_facets[0].reference_collection_name); + + ASSERT_EQ("ref_grade", range_facets[1].field_name); + ASSERT_TRUE(range_facets[1].is_range_query); + ASSERT_EQ(3, range_facets[1].facet_range_map.size()); + ASSERT_EQ("ref_coll", range_facets[1].reference_collection_name); + + normal_facet_fields = { + "$ref_coll(ref_score, ref_grade)" + }; + normal_facets.clear(); + for(const std::string & facet_field: normal_facet_fields) { + coll1->parse_facet(facet_field, normal_facets); + } + ASSERT_EQ(2, normal_facets.size()); + + ASSERT_EQ("ref_score", normal_facets[0].field_name); + ASSERT_EQ("ref_coll", normal_facets[0].reference_collection_name); + ASSERT_EQ("ref_grade", normal_facets[1].field_name); + ASSERT_EQ("ref_coll", normal_facets[1].reference_collection_name); + + wildcard_facet_fields = { + "$ref_coll(ref_ran*, ref_sc*)", + }; + wildcard_facets.clear(); + for(const std::string & facet_field: wildcard_facet_fields) { + coll1->parse_facet(facet_field, wildcard_facets); + } + + ASSERT_EQ(3, wildcard_facets.size()); + + expected = {"ref_range", "ref_rank", "ref_score"}; + for (size_t i = 0; i < wildcard_facets.size(); i++) { + ASSERT_TRUE(expected.count(wildcard_facets[i].field_name) == 1); + ASSERT_EQ("ref_coll", wildcard_facets[i].reference_collection_name); + } + + wildcard_facets.clear(); + coll1->parse_facet("$ref_coll(*)", wildcard_facets); + + // Last field is not a facet. + ASSERT_EQ(fields.size() - 1, wildcard_facets.size()); + + expected.clear(); + for (size_t i = 0; i < fields.size() - 1; i++) { + expected.insert(fields[i].name); + } + + for (size_t i = 0; i < wildcard_facets.size(); i++) { + ASSERT_TRUE(expected.count(wildcard_facets[i].field_name) == 1); + ASSERT_EQ("ref_coll", wildcard_facets[i].reference_collection_name); + } + + mixed_facet_fields = { + "$ref_coll(ref_score, ref_grade(A:[80, 100], B:[60, 80], C:[40,60]), ref_ra*)", + }; + + mixed_facets.clear(); + for(const std::string & facet_field: mixed_facet_fields) { + coll1->parse_facet(facet_field, mixed_facets); + } + ASSERT_EQ(4, mixed_facets.size()); + + mixed_facets_ptr.clear(); + for(auto& f: mixed_facets) { + mixed_facets_ptr.push_back(&f); + } + + std::sort(mixed_facets_ptr.begin(), mixed_facets_ptr.end(), [](const facet* f1, const facet* f2) { + return f1->field_name < f2->field_name; + }); + + ASSERT_EQ("ref_score", mixed_facets_ptr[3]->field_name); + ASSERT_EQ("ref_coll", mixed_facets_ptr[3]->reference_collection_name); + + ASSERT_EQ("ref_grade", mixed_facets_ptr[0]->field_name); + ASSERT_TRUE(mixed_facets_ptr[0]->is_range_query); + ASSERT_GT(mixed_facets_ptr[0]->facet_range_map.size(), 0); + ASSERT_EQ("ref_coll", mixed_facets_ptr[0]->reference_collection_name); + + ASSERT_EQ("ref_rank", mixed_facets_ptr[2]->field_name); + ASSERT_EQ("ref_coll", mixed_facets_ptr[2]->reference_collection_name); + ASSERT_EQ("ref_range", mixed_facets_ptr[1]->field_name); + ASSERT_EQ("ref_coll", mixed_facets_ptr[1]->reference_collection_name); } TEST_F(CollectionFacetingTest, RangeFacetTest) { diff --git a/test/string_utils_test.cpp b/test/string_utils_test.cpp index bd33a1696..b37d54566 100644 --- a/test/string_utils_test.cpp +++ b/test/string_utils_test.cpp @@ -338,6 +338,18 @@ TEST(StringUtilsTest, ShouldSplitRangeFacet){ ASSERT_EQ("grade(A:[80,100], B:[60, 80], C:[40, 60])", mixed_facets[1]); ASSERT_EQ("rank", mixed_facets[2]); + std::string reference_facets_string = " $Collection(score(fail:[0, 40], pass:[40, 100]), city) , grade,"; + std::vector reference_facets; + StringUtils::split_facet(reference_facets_string, reference_facets); + ASSERT_EQ("$Collection(score(fail:[0, 40], pass:[40, 100]), city)", reference_facets[0]); + ASSERT_EQ("grade", reference_facets[1]); + + reference_facets_string = "score(fail:[0, 40], pass:[40, 100]), $Collection(city)"; + reference_facets.clear(); + StringUtils::split_facet(reference_facets_string, reference_facets); + ASSERT_EQ("score(fail:[0, 40], pass:[40, 100])", reference_facets[0]); + ASSERT_EQ("$Collection(city)", reference_facets[1]); + // empty string should produce empty list std::vector lines_empty; StringUtils::split_facet("", lines_empty);