/
searchFulltextStandard.scala.txt
172 lines (137 loc) · 5.89 KB
/
searchFulltextStandard.scala.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
@*
* Copyright © 2021 Data and Service Center for the Humanities and/or DaSCH Service Platform contributors.
* SPDX-License-Identifier: Apache-2.0
*@
@import org.knora.webapi.IRI
@import org.knora.webapi.exceptions.SparqlGenerationException
@import org.knora.webapi.util.ApacheLuceneSupport.LuceneQueryString
@*
* Performs a simple full-text search using standard SPARQL (except for the interface to Lucene, which is
* triplestore-specific), and without inference.
*
* The number of rows returned per matching resource is equal to the number of values that matched in the resource,
* plus one if the resource's label matched.
*
* This template is used only by searchFulltext.scala.txt.
*
* @param triplestore the name of the triplestore being used.
* @param searchTerms search terms.
* @param limitToProject limit search to the given project.
* @param limitToResourceClass limit search to given resource class.
* @param limitToStandoffClass limit the search to given standoff class.
* @param returnFiles if true, return any file value attached to each matching resource.
* @param separator the separator to be used in aggregation functions.
* @param limit maximal amount of rows to be returned
* @param offset offset for paging (starts with 0)
* @param countQuery indicates whether it is a count query or the actual resources should be returned.
*@
@(triplestore: String,
searchTerms: LuceneQueryString,
limitToProject: Option[IRI],
limitToResourceClass: Option[IRI],
limitToStandoffClass: Option[IRI],
returnFiles: Boolean,
separator: Option[Char],
limit: Int,
offset: Int,
countQuery: Boolean)
PREFIX knora-base: <http://www.knora.org/ontology/knora-base#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
@if(!countQuery) {
SELECT DISTINCT ?resource
(GROUP_CONCAT(IF(BOUND(?valueObject), STR(?valueObject), "");
separator="@separator.getOrElse(throw SparqlGenerationException("Separator expected for non count query, but none given"))")
AS ?valueObjectConcat)
} else {
SELECT (count(distinct ?resource) as ?count)
}
WHERE {
{
SELECT DISTINCT ?matchingSubject WHERE {
?matchingSubject <http://jena.apache.org/text#query> '@searchTerms.getQueryString' .
@* standoff search *@
@if(limitToStandoffClass.nonEmpty) {
# ?matchingSubject is expected to be a TextValue
?matchingSubject a knora-base:TextValue ;
knora-base:valueHasString ?literal ;
knora-base:valueHasStandoff ?standoffNode .
?standoffNode a <@limitToStandoffClass.get> ;
knora-base:standoffTagHasStart ?start ;
knora-base:standoffTagHasEnd ?end .
# https://www.w3.org/TR/xpath-functions/#func-substring
# The first character of a string is located at position 1, not position 0. -> standoff uses a 0 based index
BIND(SUBSTR(?literal, ?start+1, ?end - ?start) AS ?markedup)
@* Loop over search terms and make sure they are all contained in the specified standoff markup *@
@for(term <- searchTerms.getSingleTerms) {
@* TODO: Ignore Lucene operators *@
FILTER REGEX(?markedup, '@term', "i")
}
}
}
}
OPTIONAL {
?matchingSubject a ?valueObjectType .
?valueObjectType rdfs:subClassOf *knora-base:Value .
FILTER(?valueObjectType != knora-base:LinkValue && ?valueObjectType != knora-base:ListValue)
?containingResource ?property ?matchingSubject .
?property rdfs:subPropertyOf* knora-base:hasValue .
FILTER NOT EXISTS {
?matchingSubject knora-base:isDeleted true
}
# this variable will only be bound if the search matched a value object
BIND(?matchingSubject AS ?valueObject)
}
OPTIONAL {
# get all list nodes that match the search term
?matchingSubject a knora-base:ListNode .
# get sub-node(s) of that node(s) (recursively)
?matchingSubject knora-base:hasSubListNode* ?subListNode .
# get all values that point to the node(s) and sub-node(s)
?listValue knora-base:valueHasListNode ?subListNode .
# get all resources that have that values
?subjectWithListValue ?predicate ?listValue .
FILTER NOT EXISTS {
?matchingSubject knora-base:isDeleted true
}
# this variable will only be bound if the search matched a list node
BIND(?listValue AS ?valueObject)
}
# If the first OPTIONAL clause was executed, ?matchingSubject is a value object, and ?containingResource will be set as ?valueObject.
# If the second OPTIONAL clause was executed, ?matchingSubject is a list node, and ?listValue will be set as ?valueObject.
# Otherwise, ?matchingSubject is a resource (its rdfs:label matched the search pattern).
BIND(
COALESCE(
?containingResource,
?subjectWithListValue,
?matchingSubject)
AS ?resource)
?resource a ?resourceClass .
?resourceClass rdfs:subClassOf* knora-base:Resource .
@limitToResourceClass match {
case Some(definedLimitToResourceClass) => {
?resourceClass rdfs:subClassOf* <@definedLimitToResourceClass> .
}
case None => {}
}
@limitToProject match {
case Some(definedLimitToProject) => {
?resource knora-base:attachedToProject <@definedLimitToProject> .
}
case None => {}
}
@if(returnFiles) {
OPTIONAL {
?fileValueProp rdfs:subPropertyOf* knora-base:hasFileValue .
?resource ?fileValueProp ?valueObject .
}
}
FILTER NOT EXISTS {
?resource knora-base:isDeleted true .
}
}
@if(!countQuery) {
GROUP BY ?resource
ORDER BY ?resource
OFFSET @offset
}
LIMIT @limit