diff --git a/freesound/settings.py b/freesound/settings.py index 712b4227a..275719ad0 100644 --- a/freesound/settings.py +++ b/freesound/settings.py @@ -626,6 +626,10 @@ SEARCH_SOUNDS_FIELD_LICENSE_NAME: {'limit': 10}, } +SEARCH_SOUNDS_BETA_FACETS = { + 'fsdsinet_detected_class': {'limit': 30}, +} + SEARCH_FORUM_SORT_OPTION_THREAD_DATE_FIRST = "Thread creation (newest first)" SEARCH_FORUM_SORT_OPTION_DATE_NEW_FIRST = "Post creation (newest first)" SEARCH_FORUM_SORT_OPTIONS_WEB = [ diff --git a/search/templatetags/search.py b/search/templatetags/search.py index a399bf4cf..e62817c86 100644 --- a/search/templatetags/search.py +++ b/search/templatetags/search.py @@ -24,23 +24,32 @@ from sounds.models import License from utils.search import search_query_processor_options +from utils.search.backends.solr555pysolr import FIELD_NAMES_MAP from utils.tags import annotate_tags register = template.Library() @register.inclusion_tag('search/facet.html', takes_context=True) -def display_facet(context, facet_name): +def display_facet(context, facet_name, facet_title=None, facet_type='list'): sqp = context['sqp'] facets = context['facets'] - facet_type = {'tag': 'cloud', 'username': 'cloud'}.get(facet_name, 'list') - facet_title = { - 'tag': 'Related tags', - 'username': 'Related users', - 'grouping_pack': 'Packs', - 'license': 'Licenses' - }.get(facet_name, facet_name.capitalize()) + if facet_title is None: + facet_title = facet_name.capitalize() + + solr_fieldname = FIELD_NAMES_MAP.get(facet_name, facet_name) + if facet_name in facets: + # If a facet contains a value which is already used in a filter (this can hapen with facets with multiple values like + # tags), then we remove it from the list of options so we don't show redundant information + facet_values_to_skip = [] + for field_name_value in sqp.get_active_filters(): + if field_name_value.startswith(solr_fieldname + ':'): + facet_values_to_skip.append(field_name_value.split(':')[1].replace('"', '')) + if facet_values_to_skip: + facets[facet_name] = [f for f in facets[facet_name] if f[0] not in facet_values_to_skip] + + # Annotate facet elements with size values used in the tag cloud (this is not useulf for all facets) facet = annotate_tags([dict(value=f[0], count=f[1]) for f in facets[facet_name] if f[0] != "0"], sort="value", small_size=0.7, large_size=2.0) else: @@ -90,13 +99,13 @@ def display_facet(context, facet_name): # Set the URL to add facet values as filters if element["value"].startswith('('): # If the filter value is a "complex" operation , don't wrap it in quotes - filter_str = f'{facet_name}:{element["value"]}' + filter_str = f'{solr_fieldname}:{element["value"]}' elif element["value"].isdigit(): # If the filter value is a digit, also don't wrap it in quotes - filter_str = f'{facet_name}:{element["value"]}' + filter_str = f'{solr_fieldname}:{element["value"]}' else: # Otherwise wrap in quotes - filter_str = f'{facet_name}:"{element["value"]}"' + filter_str = f'{solr_fieldname}:"{element["value"]}"' element['add_filter_url'] = sqp.get_url(add_filters=[filter_str]) # We sort the facets by count. Also, we apply an opacity filter on "could" type facets diff --git a/search/views.py b/search/views.py index e49896c21..c61f9499e 100644 --- a/search/views.py +++ b/search/views.py @@ -153,12 +153,6 @@ def search_view_helper(request): 'query_time': results.q_time })) - # For the facets of fields that could have mulitple values (i.e. currently, only "tags" facet), make - # sure to remove the filters for the corresponding facet field that are already active (so we remove - # redundant information) - if 'tag' in results.facets: - results.facets['tag'] = [(tag, count) for tag, count in results.facets['tag'] if tag not in sqp.get_tags_in_filters()] - # Compile template variables return { 'sqp': sqp, diff --git a/templates/search/search.html b/templates/search/search.html index de3f026a3..9598b67a4 100644 --- a/templates/search/search.html +++ b/templates/search/search.html @@ -165,13 +165,14 @@

+
{% display_search_option "include_audio_problems" %}
{% display_search_option "compute_clusters" %}
{% display_search_option "similar_to" %}
-
-
{% display_search_option "include_audio_problems" %}
+
+
FSDSINET class:
+ {% display_facet "fsdsinet_detected_class" "" "cloud" %}
-
{% endif %}
@@ -210,19 +211,19 @@

{% comment %}facets{% endcomment%} diff --git a/utils/search/backends/solr555pysolr.py b/utils/search/backends/solr555pysolr.py index 8a6dbbc2b..0b70428ec 100644 --- a/utils/search/backends/solr555pysolr.py +++ b/utils/search/backends/solr555pysolr.py @@ -57,6 +57,8 @@ settings.SEARCH_SOUNDS_FIELD_LICENSE_NAME: 'license' } +REVERSE_FIELD_NAMES_MAP = {value: key for key, value in FIELD_NAMES_MAP.items()} + # Map "web" sorting options to solr sorting options SORT_OPTIONS_MAP = { @@ -309,11 +311,10 @@ def convert_post_to_search_engine_document(self, post): "has_posts": False if post.thread.num_posts == 0 else True } return document - - def add_solr_suffix_to_dynamic_fieldname(self, fieldname): - """Add the corresponding SOLR dynamic field suffix to the given fieldname. If the fieldname does not correspond - to a dynamic field, leave it unchanged. See docstring in 'add_solr_suffix_to_dynamic_fieldnames_in_filter' for - more information""" + + def get_dynamic_fields_map(self): + if hasattr(self, '_dynamic_fields_map'): + return self._dynamic_fields_map dynamic_fields_map = {} for analyzer, analyzer_data in settings.ANALYZERS_CONFIGURATION.items(): if 'descriptors_map' in analyzer_data: @@ -322,7 +323,14 @@ def add_solr_suffix_to_dynamic_fieldname(self, fieldname): if descriptor_type is not None: dynamic_fields_map[db_descriptor_key] = '{}{}'.format( db_descriptor_key, SOLR_DYNAMIC_FIELDS_SUFFIX_MAP[descriptor_type]) - return dynamic_fields_map.get(fieldname, fieldname) + self._dynamic_fields_map = dynamic_fields_map + return dynamic_fields_map + + def add_solr_suffix_to_dynamic_fieldname(self, fieldname): + """Add the corresponding SOLR dynamic field suffix to the given fieldname. If the fieldname does not correspond + to a dynamic field, leave it unchanged. E.g. 'ac_tonality' -> 'ac_tonality_s'. See docstring in + 'add_solr_suffix_to_dynamic_fieldnames_in_filter' for more information""" + return self.get_dynamic_fields_map().get(fieldname, fieldname) def add_solr_suffix_to_dynamic_fieldnames_in_filter(self, query_filter): """Processes a filter string containing field names and replaces the occurrences of fieldnames that match with @@ -331,16 +339,25 @@ def add_solr_suffix_to_dynamic_fieldnames_in_filter(self, query_filter): fields which need to end with a specific suffi that SOLR uses to learn about the type of the field and how it should treat it. """ - for analyzer, analyzer_data in settings.ANALYZERS_CONFIGURATION.items(): - if 'descriptors_map' in analyzer_data: - descriptors_map = settings.ANALYZERS_CONFIGURATION[analyzer]['descriptors_map'] - for _, db_descriptor_key, descriptor_type in descriptors_map: - if descriptor_type is not None: - query_filter = query_filter.replace( - f'{db_descriptor_key}:','{}{}:'.format( - db_descriptor_key, SOLR_DYNAMIC_FIELDS_SUFFIX_MAP[descriptor_type])) + for raw_fieldname, solr_fieldname in self.get_dynamic_fields_map().items(): + query_filter = query_filter.replace( + f'{raw_fieldname}:', f'{solr_fieldname}:') return query_filter - + + def remove_solr_suffix_from_dynamic_fieldname(self, fieldname): + """Removes the solr dynamic field suffix from the given fieldname (if any). E.g. 'ac_tonality_s' -> 'ac_tonality'""" + for suffix in SOLR_DYNAMIC_FIELDS_SUFFIX_MAP.values(): + if fieldname.endswith(suffix): + return fieldname[:-len(suffix)] + return fieldname + + def get_solr_fieldname(self, fieldname): + return self.add_solr_suffix_to_dynamic_fieldname(FIELD_NAMES_MAP.get(fieldname, fieldname)) + + def get_original_fieldname(self, solr_fieldname): + solr_fieldname_no_suffix = self.remove_solr_suffix_from_dynamic_fieldname(solr_fieldname) + return REVERSE_FIELD_NAMES_MAP.get(solr_fieldname_no_suffix, solr_fieldname_no_suffix) + def search_process_sort(self, sort, forum=False): """Translates sorting criteria to solr sort criteria and add extra criteria if sorting by ratings. @@ -382,51 +399,51 @@ def search_filter_make_intersection(self, query_filter): return query_filter def search_process_filter(self, query_filter, only_sounds_within_ids=False, only_sounds_with_pack=False): - """Process the filter to make a number of adjustments - - 1) Add type suffix to human-readable audio analyzer descriptor names (needed for dynamic solr field names). - 2) If only sounds with pack should be returned, add such a filter. - 3) Add filter for sound IDs if only_sounds_within_ids is passed. - 4) Rewrite geotag bounding box queries to use solr 5+ syntax - - Step 1) is used for the dynamic field names used in Solr (e.g. ac_tonality -> ac_tonality_s, ac_tempo -> - ac_tempo_i). The dynamic field names we define in Solr schema are '*_b' (for bool), '*_d' (for float), - '*_i' (for integer) and '*_s' (for string). At indexing time, we append these suffixes to the analyzer - descriptor names that need to be indexed so Solr can treat the types properly. Now we automatically append the - suffices to the filter names so users do not need to deal with that and Solr understands recognizes the field name. - - Args: - query_filter (str): query filter string. - only_sounds_with_pack (bool, optional): whether to only include sounds that belong to a pack - only_sounds_within_ids (List[int], optional): restrict search results to sounds with these IDs - - Returns: - str: processed filter query string. - """ - # Add type suffix to human-readable audio analyzer descriptor names which is needed for solr dynamic fields - query_filter = self.add_solr_suffix_to_dynamic_fieldnames_in_filter(query_filter) - - # If we only want sounds with packs and there is no pack filter, add one - if only_sounds_with_pack and not 'pack:' in query_filter: - query_filter += ' pack:*' - - if 'geotag:"Intersects(' in query_filter: - # Replace geotag:"Intersects( )" - # with geotag:[", " TO " "] - query_filter = re.sub('geotag:"Intersects\((.+?) (.+?) (.+?) (.+?)\)"', r'geotag:["\2,\1" TO "\4,\3"]', query_filter) - - query_filter = self.search_filter_make_intersection(query_filter) - - # When calculating results form clustering, the "only_sounds_within_ids" argument is passed and we filter - # our query to the sounds in that list of IDs. - if only_sounds_within_ids: - sounds_within_ids_filter = ' OR '.join(['id:{}'.format(sound_id) for sound_id in only_sounds_within_ids]) - if query_filter: - query_filter += ' AND ({})'.format(sounds_within_ids_filter) - else: - query_filter = '({})'.format(sounds_within_ids_filter) + """Process the filter to make a number of adjustments + + 1) Add type suffix to human-readable audio analyzer descriptor names (needed for dynamic solr field names). + 2) If only sounds with pack should be returned, add such a filter. + 3) Add filter for sound IDs if only_sounds_within_ids is passed. + 4) Rewrite geotag bounding box queries to use solr 5+ syntax + + Step 1) is used for the dynamic field names used in Solr (e.g. ac_tonality -> ac_tonality_s, ac_tempo -> + ac_tempo_i). The dynamic field names we define in Solr schema are '*_b' (for bool), '*_d' (for float), + '*_i' (for integer) and '*_s' (for string). At indexing time, we append these suffixes to the analyzer + descriptor names that need to be indexed so Solr can treat the types properly. Now we automatically append the + suffices to the filter names so users do not need to deal with that and Solr understands recognizes the field name. + + Args: + query_filter (str): query filter string. + only_sounds_with_pack (bool, optional): whether to only include sounds that belong to a pack + only_sounds_within_ids (List[int], optional): restrict search results to sounds with these IDs + + Returns: + str: processed filter query string. + """ + # Add type suffix to human-readable audio analyzer descriptor names which is needed for solr dynamic fields + query_filter = self.add_solr_suffix_to_dynamic_fieldnames_in_filter(query_filter) - return query_filter + # If we only want sounds with packs and there is no pack filter, add one + if only_sounds_with_pack and not 'pack:' in query_filter: + query_filter += ' pack:*' + + if 'geotag:"Intersects(' in query_filter: + # Replace geotag:"Intersects( )" + # with geotag:[", " TO " "] + query_filter = re.sub('geotag:"Intersects\((.+?) (.+?) (.+?) (.+?)\)"', r'geotag:["\2,\1" TO "\4,\3"]', query_filter) + + query_filter = self.search_filter_make_intersection(query_filter) + + # When calculating results form clustering, the "only_sounds_within_ids" argument is passed and we filter + # our query to the sounds in that list of IDs. + if only_sounds_within_ids: + sounds_within_ids_filter = ' OR '.join(['id:{}'.format(sound_id) for sound_id in only_sounds_within_ids]) + if query_filter: + query_filter += ' AND ({})'.format(sounds_within_ids_filter) + else: + query_filter = '({})'.format(sounds_within_ids_filter) + + return query_filter def force_sounds(self, query_dict): # Add an extra filter to the query parameters to make sure these return sound documents only @@ -509,11 +526,11 @@ def search_sounds(self, textual_query='', query_fields=None, query_filter='', fi # If no fields provided, use the default query_fields = settings.SEARCH_SOUNDS_DEFAULT_FIELD_WEIGHTS if isinstance(query_fields, list): - query_fields = [self.add_solr_suffix_to_dynamic_fieldname(FIELD_NAMES_MAP.get(field, field)) for field in query_fields] + query_fields = [self.get_solr_fieldname(field_name) for field_name in query_fields] elif isinstance(query_fields, dict): # Also remove fields with weight <= 0 - query_fields = [(self.add_solr_suffix_to_dynamic_fieldname(FIELD_NAMES_MAP.get(field, field)), weight) - for field, weight in query_fields.items() if weight > 0] + query_fields = [(self.get_solr_fieldname(field_name), weight) + for field_name, weight in query_fields.items() if weight > 0] # Set main query options query.set_dismax_query(textual_query, query_fields=query_fields) @@ -583,7 +600,7 @@ def search_sounds(self, textual_query='', query_fields=None, query_filter='', fi # Configure facets if facets is not None: json_facets = {} - facet_fields = [FIELD_NAMES_MAP[field_name] for field_name, _ in facets.items()] + facet_fields = [self.get_solr_fieldname(field_name) for field_name, _ in facets.items()] for field in facet_fields: json_facets[field] = SOLR_SOUND_FACET_DEFAULT_OPTIONS.copy() json_facets[field]['field'] = field @@ -591,7 +608,7 @@ def search_sounds(self, textual_query='', query_fields=None, query_filter='', fi # In similarity search we need to set the "domain" facet option to apply them to the parent documents of the child documents we will match json_facets[field]['domain'] = {'blockParent': f'content_type:{SOLR_DOC_CONTENT_TYPES["sound"]}'} for field_name, extra_options in facets.items(): - json_facets[FIELD_NAMES_MAP[field_name]].update(extra_options) + json_facets[self.get_solr_fieldname(field_name)].update(extra_options) query.set_facet_json_api(json_facets) # Configure grouping @@ -620,6 +637,10 @@ def search_sounds(self, textual_query='', query_fields=None, query_filter='', fi results = self.get_sounds_index().search( **(self.force_sounds(query.as_kwargs()) if similar_to is None else query.as_kwargs())) + # Facets returned in results use the corresponding solr fieldnames as keys. We want to convert them to the + # original fieldnames so that the rest of the code can use them without knowing about the solr fieldnames. + results.facets = {self.get_original_fieldname(facet_name): data for facet_name, data in results.facets.items()} + # Solr uses a string for the id field, but django uses an int. Convert the id in all results to int # before use to avoid issues for d in results.docs: diff --git a/utils/search/search_query_processor.py b/utils/search/search_query_processor.py index 1b02f3a3e..6602e4546 100644 --- a/utils/search/search_query_processor.py +++ b/utils/search/search_query_processor.py @@ -31,7 +31,7 @@ from utils.clustering_utilities import get_ids_in_cluster, get_clusters_for_query from utils.encryption import create_hash -from utils.search.backends.solr555pysolr import FIELD_NAMES_MAP +from utils.search.backends.solr555pysolr import Solr555PySolrSearchEngine from utils.search.search_sounds import allow_beta_search_features from .search_query_processor_options import SearchOptionStr, SearchOptionChoice, \ SearchOptionInt, SearchOptionBool, SearchOptionRange, SearchOptionMultipleChoice, \ @@ -167,6 +167,10 @@ def __init__(self, request, facets=None): else: self.facets = facets + # Add extra facets if in beta mode + if allow_beta_search_features(request): + self.facets.update(settings.SEARCH_SOUNDS_BETA_FACETS) + # Put all SearchOption objects in a self.options dictionary so we can easily iterate them and we can access them through self.options attribute # In this was SearchOption objects are accessible in a similar way as Django form fields are accessible in form objects # NOTE: even though we add references to the SearchOption objects in the self.options dictionary, we don't actually remove these references from @@ -278,9 +282,9 @@ def get_active_filters(self, include_filters_from_options=True, if fit is not None: ff.append(fit) if include_non_option_filters: + facet_search_engine_field_names = list(self.facets.keys()) for non_option_filter in self.non_option_filters: should_be_included = True - facet_search_engine_field_names = [FIELD_NAMES_MAP[f] for f in self.facets.keys()] if not include_filters_from_facets and non_option_filter[0] in facet_search_engine_field_names: should_be_included = False if should_be_included: @@ -331,12 +335,12 @@ def get_filters_data_to_display_in_search_results_page(self): return filters_data - def has_filter_with_name(self, filter_name): + def has_filter_with_name(self, field_name): """Returns True if the parsed filter has a filter with the given name. """ for node in self.f_parsed: if type(node) == luqum.tree.SearchField: - if node.name == filter_name: + if node.name == field_name: return True return False diff --git a/utils/tests/test_search_query_processor.py b/utils/tests/test_search_query_processor.py index c9b2c0a53..726a8c2f0 100644 --- a/utils/tests/test_search_query_processor.py +++ b/utils/tests/test_search_query_processor.py @@ -31,7 +31,7 @@ class SearchQueryProcessorTests(TestCase): default_expected_params = { 'current_page': 1, - 'facets': settings.SEARCH_SOUNDS_DEFAULT_FACETS, + 'facets': settings.SEARCH_SOUNDS_DEFAULT_FACETS | settings.SEARCH_SOUNDS_BETA_FACETS, # Combine all facets because we normally test with superuser 'field_list': ['id', 'score'], 'group_by_pack': True, 'num_sounds': settings.SOUNDS_PER_PAGE, @@ -195,7 +195,7 @@ def test_search_query_processor_as_query_params_and_make_url(self, fake_get_ids_ # With tags mode sqp, url = self.run_fake_search_query_processor(base_url=reverse('tags')) - expected_facets = settings.SEARCH_SOUNDS_DEFAULT_FACETS.copy() + expected_facets = settings.SEARCH_SOUNDS_DEFAULT_FACETS | settings.SEARCH_SOUNDS_BETA_FACETS expected_facets['tags']['limit'] = 50 self.assertExpectedParams(sqp.as_query_params(), {'facets': expected_facets}) self.assertGetUrlAsExpected(sqp, url) @@ -364,9 +364,9 @@ def test_search_query_processor_contains_active_advanced_search_options(self): def test_search_query_processor_as_query_params_exclude_facet_filters(self): for filter_name, is_facet in [ ('samplerate', True), - ('grouping_pack', True), + ('pack_grouping', True), ('username', True), - ('tag', True), + ('tags', True), ('bitrate', True), ('bitdepth', True), ('type', True),