Skip to content

Commit

Permalink
Parse reference facet_by.
Browse files Browse the repository at this point in the history
  • Loading branch information
happy-san committed Oct 6, 2023
1 parent a4ab04c commit 7688870
Show file tree
Hide file tree
Showing 6 changed files with 190 additions and 3 deletions.
2 changes: 2 additions & 0 deletions include/collection.h
Expand Up @@ -526,6 +526,8 @@ class Collection {

bool get_enable_nested_fields();

Option<bool> parse_facet_with_lock(const std::string& facet_field, std::vector<facet>& facets) const;

Option<bool> parse_facet(const std::string& facet_field, std::vector<facet>& facets) const;

// Override operations
Expand Down
2 changes: 2 additions & 0 deletions include/field.h
Expand Up @@ -649,6 +649,8 @@ struct facet {

std::string sort_field="";

std::string reference_collection_name;

bool get_range(int64_t key, std::pair<int64_t, std::string>& range_pair) {
if(facet_range_map.empty()) {
LOG (ERROR) << "Facet range is not defined!!!";
Expand Down
38 changes: 37 additions & 1 deletion src/collection.cpp
Expand Up @@ -5258,12 +5258,48 @@ bool Collection::get_enable_nested_fields() {
return enable_nested_fields;
}

Option<bool> Collection::parse_facet_with_lock(const std::string& facet_field, std::vector<facet>& facets) const {
std::shared_lock lock(mutex);
return parse_facet(facet_field, facets);
}

Option<bool> Collection::parse_facet(const std::string& facet_field, std::vector<facet>& facets) const {
const std::regex base_pattern(".+\\(.*\\)");
const std::regex range_pattern("[[a-zA-Z]+:\\[([0-9]+)\\,\\s*([0-9]+)\\]");
const std::string _alpha = "_alpha";

if ((facet_field.find(":") != std::string::npos)
if (facet_field[0] == '$') { // Reference facet_by
std::string error_message = "Error parsing reference facet: ";
auto open_paren_pos = facet_field.find('(');
if (open_paren_pos == std::string::npos) {
return Option<bool>(400, error_message + "`" + facet_field + "`.");
}

std::string ref_collection_name = facet_field.substr(1, open_paren_pos - 1);
auto &cm = CollectionManager::get_instance();
auto ref_collection = cm.get_collection(ref_collection_name);
if (ref_collection == nullptr) {
return Option<bool>(400, error_message + "Referenced collection `" + ref_collection_name + "` not found.");
}

std::string ref_facet_expression = facet_field.substr(open_paren_pos + 1, facet_field.size() - open_paren_pos - 2);
std::vector<std::string> ref_facet_strings;
StringUtils::split_facet(ref_facet_expression, ref_facet_strings);

std::vector<facet> ref_facets;
for (const auto &ref_facet: ref_facet_strings) {
auto parse_op = ref_collection->parse_facet_with_lock(ref_facet, ref_facets);
if (!parse_op.ok()) {
return Option<bool>(parse_op.code(), error_message + parse_op.error());
}
}

for (auto &ref_facet: ref_facets) {
ref_facet.reference_collection_name = ref_collection_name;
facets.emplace_back(std::move(ref_facet));
}
}
else if ((facet_field.find(":") != std::string::npos)
&& (facet_field.find("sort") == std::string::npos)) { //range based facet

if (!std::regex_match(facet_field, base_pattern)) {
Expand Down
32 changes: 30 additions & 2 deletions src/string_utils.cpp
Expand Up @@ -543,16 +543,42 @@ Option<bool> StringUtils::split_include_fields(const std::string& include_fields
size_t StringUtils::split_facet(const std::string &s, std::vector<std::string> &result, const bool keep_empty,
const size_t start_index, const size_t max_values) {


std::string::const_iterator substart = s.begin()+start_index, subend;
size_t end_index = start_index;
std::string delim(""), temp("");
std::string current_str=s;
trim(current_str);

while (true) {
auto range_pos = current_str.find("(");
auto normal_pos = current_str.find(",");

if(range_pos == std::string::npos && normal_pos == std::string::npos){
if(current_str[0] == '$'){ // Reference facet_by
if(range_pos == std::string::npos){
break;
}

auto index = range_pos + 1;
int paren_count = 1;
while (++index < s.size() && paren_count > 0) {
if (s[index] == '(') {
paren_count++;
} else if (s[index] == ')') {
paren_count--;
}
}

if (paren_count != 0) {
return 0;
}

temp = delim = current_str.substr(0, index);
subend = substart + delim.size();

while (subend != s.end() && *(subend++) != ',');
delim.clear();
}
else if(range_pos == std::string::npos && normal_pos == std::string::npos){
if(!current_str.empty()){
result.push_back(trim(current_str));
}
Expand Down Expand Up @@ -584,6 +610,8 @@ size_t StringUtils::split_facet(const std::string &s, std::vector<std::string> &
break;
}
substart = subend + delim.size();
while (*substart == ' ' && ++substart != s.end());

current_str = std::string(substart, s.end());
}

Expand Down
107 changes: 107 additions & 0 deletions test/collection_faceting_test.cpp
Expand Up @@ -1248,6 +1248,113 @@ TEST_F(CollectionFacetingTest, FacetParseTest){

ASSERT_EQ("rank", mixed_facets_ptr[2]->field_name);
ASSERT_EQ("range", mixed_facets_ptr[1]->field_name);

fields = {
field("ref_score", field_types::INT32, true),
field("ref_grade", field_types::INT32, true),
field("ref_rank", field_types::INT32, true),
field("ref_range", field_types::INT32, true),
field("ref_scale", field_types::INT32, false),
};

collectionManager.create_collection("ref_coll", 1, fields).get();

range_facet_fields = {
"$ref_coll(ref_score(fail:[0, 40], pass:[40, 100]), ref_grade(A:[80, 100], B:[60, 80], C:[40, 60]))"
};
range_facets.clear();
for(const std::string & facet_field: range_facet_fields) {
coll1->parse_facet(facet_field, range_facets);
}
ASSERT_EQ(2, range_facets.size());

ASSERT_EQ("ref_score", range_facets[0].field_name);
ASSERT_TRUE(range_facets[0].is_range_query);
ASSERT_EQ(2, range_facets[0].facet_range_map.size());
ASSERT_EQ("ref_coll", range_facets[0].reference_collection_name);

ASSERT_EQ("ref_grade", range_facets[1].field_name);
ASSERT_TRUE(range_facets[1].is_range_query);
ASSERT_EQ(3, range_facets[1].facet_range_map.size());
ASSERT_EQ("ref_coll", range_facets[1].reference_collection_name);

normal_facet_fields = {
"$ref_coll(ref_score, ref_grade)"
};
normal_facets.clear();
for(const std::string & facet_field: normal_facet_fields) {
coll1->parse_facet(facet_field, normal_facets);
}
ASSERT_EQ(2, normal_facets.size());

ASSERT_EQ("ref_score", normal_facets[0].field_name);
ASSERT_EQ("ref_coll", normal_facets[0].reference_collection_name);
ASSERT_EQ("ref_grade", normal_facets[1].field_name);
ASSERT_EQ("ref_coll", normal_facets[1].reference_collection_name);

wildcard_facet_fields = {
"$ref_coll(ref_ran*, ref_sc*)",
};
wildcard_facets.clear();
for(const std::string & facet_field: wildcard_facet_fields) {
coll1->parse_facet(facet_field, wildcard_facets);
}

ASSERT_EQ(3, wildcard_facets.size());

expected = {"ref_range", "ref_rank", "ref_score"};
for (size_t i = 0; i < wildcard_facets.size(); i++) {
ASSERT_TRUE(expected.count(wildcard_facets[i].field_name) == 1);
ASSERT_EQ("ref_coll", wildcard_facets[i].reference_collection_name);
}

wildcard_facets.clear();
coll1->parse_facet("$ref_coll(*)", wildcard_facets);

// Last field is not a facet.
ASSERT_EQ(fields.size() - 1, wildcard_facets.size());

expected.clear();
for (size_t i = 0; i < fields.size() - 1; i++) {
expected.insert(fields[i].name);
}

for (size_t i = 0; i < wildcard_facets.size(); i++) {
ASSERT_TRUE(expected.count(wildcard_facets[i].field_name) == 1);
ASSERT_EQ("ref_coll", wildcard_facets[i].reference_collection_name);
}

mixed_facet_fields = {
"$ref_coll(ref_score, ref_grade(A:[80, 100], B:[60, 80], C:[40,60]), ref_ra*)",
};

mixed_facets.clear();
for(const std::string & facet_field: mixed_facet_fields) {
coll1->parse_facet(facet_field, mixed_facets);
}
ASSERT_EQ(4, mixed_facets.size());

mixed_facets_ptr.clear();
for(auto& f: mixed_facets) {
mixed_facets_ptr.push_back(&f);
}

std::sort(mixed_facets_ptr.begin(), mixed_facets_ptr.end(), [](const facet* f1, const facet* f2) {
return f1->field_name < f2->field_name;
});

ASSERT_EQ("ref_score", mixed_facets_ptr[3]->field_name);
ASSERT_EQ("ref_coll", mixed_facets_ptr[3]->reference_collection_name);

ASSERT_EQ("ref_grade", mixed_facets_ptr[0]->field_name);
ASSERT_TRUE(mixed_facets_ptr[0]->is_range_query);
ASSERT_GT(mixed_facets_ptr[0]->facet_range_map.size(), 0);
ASSERT_EQ("ref_coll", mixed_facets_ptr[0]->reference_collection_name);

ASSERT_EQ("ref_rank", mixed_facets_ptr[2]->field_name);
ASSERT_EQ("ref_coll", mixed_facets_ptr[2]->reference_collection_name);
ASSERT_EQ("ref_range", mixed_facets_ptr[1]->field_name);
ASSERT_EQ("ref_coll", mixed_facets_ptr[1]->reference_collection_name);
}

TEST_F(CollectionFacetingTest, RangeFacetTest) {
Expand Down
12 changes: 12 additions & 0 deletions test/string_utils_test.cpp
Expand Up @@ -338,6 +338,18 @@ TEST(StringUtilsTest, ShouldSplitRangeFacet){
ASSERT_EQ("grade(A:[80,100], B:[60, 80], C:[40, 60])", mixed_facets[1]);
ASSERT_EQ("rank", mixed_facets[2]);

std::string reference_facets_string = " $Collection(score(fail:[0, 40], pass:[40, 100]), city) , grade,";
std::vector<std::string> reference_facets;
StringUtils::split_facet(reference_facets_string, reference_facets);
ASSERT_EQ("$Collection(score(fail:[0, 40], pass:[40, 100]), city)", reference_facets[0]);
ASSERT_EQ("grade", reference_facets[1]);

reference_facets_string = "score(fail:[0, 40], pass:[40, 100]), $Collection(city)";
reference_facets.clear();
StringUtils::split_facet(reference_facets_string, reference_facets);
ASSERT_EQ("score(fail:[0, 40], pass:[40, 100])", reference_facets[0]);
ASSERT_EQ("$Collection(city)", reference_facets[1]);

// empty string should produce empty list
std::vector<std::string> lines_empty;
StringUtils::split_facet("", lines_empty);
Expand Down

0 comments on commit 7688870

Please sign in to comment.