From 48d77cde965ce861c9b611f0a246509c70ea9ad0 Mon Sep 17 00:00:00 2001 From: Benjamin Geer Date: Mon, 25 Jan 2021 10:36:30 +0100 Subject: [PATCH] fix!(gravsearch): Handle UNION scopes with FILTER correctly (DSP-1240) (#1790) --- docs/03-apis/api-v2/query-language.md | 124 ++++++++++ docs/05-internals/design/api-v2/gravsearch.md | 3 + .../messages/util/search/QueryTraverser.scala | 21 +- .../messages/util/search/SparqlQuery.scala | 39 +++- .../util/search/SparqlTransformer.scala | 16 ++ .../prequery/AbstractPrequeryGenerator.scala | 116 ++++++++-- ...cificGravsearchToPrequeryTransformer.scala | 2 +- .../types/GravsearchTypeInspectionUtil.scala | 4 + .../responders/v2/SearchResponderV2.scala | 7 +- .../webapi/e2e/v2/SearchRouteV2R2RSpec.scala | 129 ++++++++--- ...cGravsearchToPrequeryTransformerSpec.scala | 215 ++++++++++++++++++ 11 files changed, 623 insertions(+), 53 deletions(-) diff --git a/docs/03-apis/api-v2/query-language.md b/docs/03-apis/api-v2/query-language.md index dea369c17c..83338d4487 100644 --- a/docs/03-apis/api-v2/query-language.md +++ b/docs/03-apis/api-v2/query-language.md @@ -51,6 +51,10 @@ It is certainly possible to write Gravsearch queries by hand, but we expect that in general, they will be automatically generated by client software, e.g. by a client user interface. +For a more detailed overview of Gravsearch, see +[Gravsearch: Transforming SPARQL to query humanities data](https://doi.org/10.3233/SW-200386). + + ## Submitting Gravsearch Queries The recommended way to submit a Gravsearch query is via HTTP POST: @@ -1087,3 +1091,123 @@ CONSTRUCT { FILTER(?pubdate = "JULIAN:1497-03-01"^^knora-api:Date) . } ``` + +## Scoping Issues + +SPARQL is evaluated [from the bottom up](https://sourceforge.net/p/bigdata/news/2015/09/understanding-sparqls-bottom-up-semantics/). +A `UNION` block therefore opens a new scope, in which variables bound at +higher levels are not necessarily in scope. This can cause unexpected results if queries +are not carefully designed. Gravsearch tries to prevent this by rejecting queries in the +following cases. + +### FILTER in UNION + +A `FILTER` in a `UNION` block can only use variables that are bound in the same block, otherwise the query will be rejected. This query is invalid because `?text` is not bound in the `UNION` block containing the `FILTER` where the variable is used: + +```sparql +PREFIX knora-api: +PREFIX mls: + +CONSTRUCT { + ?lemma knora-api:isMainResource true . + ?lemma mls:hasLemmaText ?text . +} WHERE { + ?lemma a mls:Lemma . + ?lemma mls:hasLemmaText ?text . + + { + ?lemma mls:hasPseudonym ?pseudo . + FILTER regex(?pseudo, "Abel", "i") . + } UNION { + FILTER regex(?text, "Abel", "i") . + } +} +ORDER BY ASC(?text) +OFFSET 0 +``` + +It can be corrected like this: + +```sparql +PREFIX knora-api: +PREFIX mls: + +CONSTRUCT { + ?lemma knora-api:isMainResource true . + ?lemma mls:hasLemmaText ?text . +} WHERE { + ?lemma a mls:Lemma . + ?lemma mls:hasLemmaText ?text . + + { + ?lemma mls:hasPseudonym ?pseudo . + FILTER regex(?pseudo, "Abel", "i") . + } UNION { + ?lemma mls:hasLemmaText ?text . + FILTER regex(?text, "Abel", "i") . + } +} +ORDER BY ASC(?text) +OFFSET 0 +``` + +### ORDER BY + +A variable used in `ORDER BY` must be bound at the top level of the `WHERE` clause. This query is invalid, because `?int` is not bound at the top level of the `WHERE` clause: + +```sparql +PREFIX knora-api: +PREFIX anything: + +CONSTRUCT { + ?thing knora-api:isMainResource true . + ?thing anything:hasInteger ?int . + ?thing anything:hasRichtext ?richtext . + ?thing anything:hasText ?text . +} WHERE { + ?thing a knora-api:Resource . + ?thing a anything:Thing . + + { + ?thing anything:hasRichtext ?richtext . + FILTER knora-api:matchText(?richtext, "test") + ?thing anything:hasInteger ?int . + } + UNION + { + ?thing anything:hasText ?text . + FILTER knora-api:matchText(?text, "test") + ?thing anything:hasInteger ?int . + } +} +ORDER BY (?int) +``` + +It can be corrected like this: + +```sparql +PREFIX knora-api: +PREFIX anything: + +CONSTRUCT { + ?thing knora-api:isMainResource true . + ?thing anything:hasInteger ?int . + ?thing anything:hasRichtext ?richtext . + ?thing anything:hasText ?text . +} WHERE { + ?thing a knora-api:Resource . + ?thing a anything:Thing . + ?thing anything:hasInteger ?int . + + { + ?thing anything:hasRichtext ?richtext . + FILTER knora-api:matchText(?richtext, "test") + } + UNION + { + ?thing anything:hasText ?text . + FILTER knora-api:matchText(?text, "test") + } +} +ORDER BY (?int) +``` diff --git a/docs/05-internals/design/api-v2/gravsearch.md b/docs/05-internals/design/api-v2/gravsearch.md index ecfa8cfa69..52c2e17c3a 100644 --- a/docs/05-internals/design/api-v2/gravsearch.md +++ b/docs/05-internals/design/api-v2/gravsearch.md @@ -19,6 +19,9 @@ License along with Knora. If not, see . # Gravsearch Design +For a detailed overview of Gravsearch, see +[Gravsearch: Transforming SPARQL to query humanities data](https://doi.org/10.3233/SW-200386). + ## Gravsearch Package The classes that process Gravsearch queries and results can be found in `org.knora.webapi.messages.util.search.gravsearch`. diff --git a/webapi/src/main/scala/org/knora/webapi/messages/util/search/QueryTraverser.scala b/webapi/src/main/scala/org/knora/webapi/messages/util/search/QueryTraverser.scala index a229fbd755..0bc2bad027 100644 --- a/webapi/src/main/scala/org/knora/webapi/messages/util/search/QueryTraverser.scala +++ b/webapi/src/main/scala/org/knora/webapi/messages/util/search/QueryTraverser.scala @@ -59,6 +59,16 @@ trait WhereTransformer { */ def optimiseQueryPatterns(patterns: Seq[QueryPattern]): Seq[QueryPattern] + /** + * Called before entering a UNION block. + */ + def enteringUnionBlock(): Unit + + /** + * Called before leaving a UNION block. + */ + def leavingUnionBlock(): Unit + /** * Transforms a [[StatementPattern]] in a WHERE clause into zero or more query patterns. * @@ -240,10 +250,13 @@ object QueryTraverser { Seq(OptionalPattern(patterns = transformedPatterns)) case unionPattern: UnionPattern => - val transformedBlocks: Seq[Seq[QueryPattern]] = unionPattern.blocks.map { blockPatterns => - transformWherePatterns(patterns = blockPatterns, - whereTransformer = whereTransformer, - inputOrderBy = inputOrderBy) + val transformedBlocks: Seq[Seq[QueryPattern]] = unionPattern.blocks.map { blockPatterns: Seq[QueryPattern] => + whereTransformer.enteringUnionBlock() + val transformedPatterns: Seq[QueryPattern] = transformWherePatterns(patterns = blockPatterns, + whereTransformer = whereTransformer, + inputOrderBy = inputOrderBy) + whereTransformer.leavingUnionBlock() + transformedPatterns } Seq(UnionPattern(blocks = transformedBlocks)) diff --git a/webapi/src/main/scala/org/knora/webapi/messages/util/search/SparqlQuery.scala b/webapi/src/main/scala/org/knora/webapi/messages/util/search/SparqlQuery.scala index 7f66cd2011..a039db084f 100644 --- a/webapi/src/main/scala/org/knora/webapi/messages/util/search/SparqlQuery.scala +++ b/webapi/src/main/scala/org/knora/webapi/messages/util/search/SparqlQuery.scala @@ -66,6 +66,8 @@ trait SelectQueryColumn extends Entity */ case class QueryVariable(variableName: String) extends SelectQueryColumn { override def toSparql: String = s"?$variableName" + + override def getVariables: Set[QueryVariable] = Set(this) } /** @@ -83,6 +85,8 @@ case class GroupConcat(inputVariable: QueryVariable, separator: Char, outputVari override def toSparql: String = { s"""(GROUP_CONCAT(DISTINCT(IF(BOUND(${inputVariable.toSparql}), STR(${inputVariable.toSparql}), "")); SEPARATOR='$separator') AS ${outputVariable.toSparql})""" } + + override def getVariables: Set[QueryVariable] = Set(inputVariable) } /** @@ -108,6 +112,7 @@ case class Count(inputVariable: QueryVariable, distinct: Boolean, outputVariable s"(COUNT($distinctAsStr ${inputVariable.toSparql}) AS ${outputVariable.toSparql})" } + override def getVariables: Set[QueryVariable] = Set(inputVariable, outputVariable) } /** @@ -135,6 +140,8 @@ case class IriRef(iri: SmartIri, propertyPathOperator: Option[Char] = None) exte def toOntologySchema(targetSchema: OntologySchema): IriRef = { copy(iri = iri.toOntologySchema(targetSchema)) } + + override def getVariables: Set[QueryVariable] = Set.empty } /** @@ -156,6 +163,8 @@ case class XsdLiteral(value: String, datatype: SmartIri) extends Entity { "\"" + value + "\"^^" + transformedDatatype.toSparql } + + override def getVariables: Set[QueryVariable] = Set.empty } /** @@ -198,7 +207,7 @@ case class StatementPattern(subj: Entity, pred: Entity, obj: Entity, namedGraph: private def entityToOntologySchema(entity: Entity, targetSchema: OntologySchema): Entity = { entity match { - case iriRef: IriRef => iriRef.toOntologySchema(InternalSchema) + case iriRef: IriRef => iriRef.toOntologySchema(targetSchema) case other => other } } @@ -314,7 +323,13 @@ object CompareExpressionOperator extends Enumeration { /** * Represents an expression that can be used in a FILTER. */ -sealed trait Expression extends SparqlGenerator +sealed trait Expression extends SparqlGenerator { + + /** + * Returns the set of query variables used in this expression. + */ + def getVariables: Set[QueryVariable] +} /** * Represents a comparison expression in a FILTER. @@ -326,6 +341,8 @@ sealed trait Expression extends SparqlGenerator case class CompareExpression(leftArg: Expression, operator: CompareExpressionOperator.Value, rightArg: Expression) extends Expression { override def toSparql: String = s"(${leftArg.toSparql} $operator ${rightArg.toSparql})" + + override def getVariables: Set[QueryVariable] = leftArg.getVariables ++ rightArg.getVariables } /** @@ -336,6 +353,8 @@ case class CompareExpression(leftArg: Expression, operator: CompareExpressionOpe */ case class AndExpression(leftArg: Expression, rightArg: Expression) extends Expression { override def toSparql: String = s"(${leftArg.toSparql} && ${rightArg.toSparql})" + + override def getVariables: Set[QueryVariable] = leftArg.getVariables ++ rightArg.getVariables } /** @@ -346,6 +365,8 @@ case class AndExpression(leftArg: Expression, rightArg: Expression) extends Expr */ case class OrExpression(leftArg: Expression, rightArg: Expression) extends Expression { override def toSparql: String = s"(${leftArg.toSparql} || ${rightArg.toSparql})" + + override def getVariables: Set[QueryVariable] = leftArg.getVariables ++ rightArg.getVariables } /** @@ -372,6 +393,8 @@ case object MinusOperator extends ArithmeticOperator { */ case class IntegerLiteral(value: Int) extends Expression { override def toSparql: String = value.toString + + override def getVariables: Set[QueryVariable] = Set.empty } /** @@ -380,6 +403,8 @@ case class IntegerLiteral(value: Int) extends Expression { case class ArithmeticExpression(leftArg: Expression, operator: ArithmeticOperator, rightArg: Expression) extends Expression { override def toSparql: String = s"""${leftArg.toSparql} $operator ${rightArg.toSparql}""" + + override def getVariables: Set[QueryVariable] = leftArg.getVariables ++ rightArg.getVariables } /** @@ -397,6 +422,8 @@ case class RegexFunction(textExpr: Expression, pattern: String, modifier: Option case None => s"""regex(${textExpr.toSparql}, "$pattern")""" } + + override def getVariables: Set[QueryVariable] = textExpr.getVariables } /** @@ -406,6 +433,8 @@ case class RegexFunction(textExpr: Expression, pattern: String, modifier: Option */ case class LangFunction(textValueVar: QueryVariable) extends Expression { override def toSparql: String = s"""lang(${textValueVar.toSparql})""" + + override def getVariables: Set[QueryVariable] = Set(textValueVar) } /** @@ -419,6 +448,8 @@ case class SubStrFunction(textLiteralVar: QueryVariable, startExpression: Expres extends Expression { override def toSparql: String = s"""substr(${textLiteralVar.toSparql}, ${startExpression.toSparql}, ${lengthExpression.toSparql})""" + + override def getVariables: Set[QueryVariable] = Set(textLiteralVar) } /** @@ -428,6 +459,8 @@ case class SubStrFunction(textLiteralVar: QueryVariable, startExpression: Expres */ case class StrFunction(textLiteralVar: QueryVariable) extends Expression { override def toSparql: String = s"""str(${textLiteralVar.toSparql})""" + + override def getVariables: Set[QueryVariable] = Set(textLiteralVar) } /** @@ -482,6 +515,8 @@ case class FunctionCallExpression(functionIri: IriRef, args: Seq[Entity]) extend } } + + override def getVariables: Set[QueryVariable] = args.toSet.flatMap((arg: Entity) => arg.getVariables) } /** diff --git a/webapi/src/main/scala/org/knora/webapi/messages/util/search/SparqlTransformer.scala b/webapi/src/main/scala/org/knora/webapi/messages/util/search/SparqlTransformer.scala index 93a66fadcb..f9c0e58078 100644 --- a/webapi/src/main/scala/org/knora/webapi/messages/util/search/SparqlTransformer.scala +++ b/webapi/src/main/scala/org/knora/webapi/messages/util/search/SparqlTransformer.scala @@ -63,6 +63,10 @@ object SparqlTransformer { Some(FromClause(IriRef(OntologyConstants.NamedGraphs.GraphDBExplicitNamedGraph.toSmartIri))) } } + + override def enteringUnionBlock(): Unit = {} + + override def leavingUnionBlock(): Unit = {} } /** @@ -91,6 +95,10 @@ object SparqlTransformer { transformLuceneQueryPatternForFuseki(luceneQueryPattern) override def getFromClause: Option[FromClause] = None + + override def enteringUnionBlock(): Unit = {} + + override def leavingUnionBlock(): Unit = {} } /** @@ -115,6 +123,10 @@ object SparqlTransformer { override def transformLuceneQueryPattern(luceneQueryPattern: LuceneQueryPattern): Seq[QueryPattern] = transformLuceneQueryPatternForGraphDB(luceneQueryPattern) + + override def enteringUnionBlock(): Unit = {} + + override def leavingUnionBlock(): Unit = {} } /** @@ -138,6 +150,10 @@ object SparqlTransformer { override def transformLuceneQueryPattern(luceneQueryPattern: LuceneQueryPattern): Seq[QueryPattern] = transformLuceneQueryPatternForFuseki(luceneQueryPattern) + + override def enteringUnionBlock(): Unit = {} + + override def leavingUnionBlock(): Unit = {} } /** diff --git a/webapi/src/main/scala/org/knora/webapi/messages/util/search/gravsearch/prequery/AbstractPrequeryGenerator.scala b/webapi/src/main/scala/org/knora/webapi/messages/util/search/gravsearch/prequery/AbstractPrequeryGenerator.scala index a31ad79000..71f4c2f5df 100644 --- a/webapi/src/main/scala/org/knora/webapi/messages/util/search/gravsearch/prequery/AbstractPrequeryGenerator.scala +++ b/webapi/src/main/scala/org/knora/webapi/messages/util/search/gravsearch/prequery/AbstractPrequeryGenerator.scala @@ -67,9 +67,16 @@ abstract class AbstractPrequeryGenerator(constructClause: ConstructClause, */ private case class GeneratedQueryVariable(variable: QueryVariable, useInOrderBy: Boolean) - // variables that are created when processing filter statements or for a value object var used as a sort criterion - // they represent the value of a literal pointed to by a value object - private val valueVariablesAutomaticallyGenerated = mutable.Map.empty[QueryVariable, Set[GeneratedQueryVariable]] + // Variables that are created when processing filter statements or for a value object var used as a sort criterion. + // They represent the value of a literal pointed to by a value object. There is a stack of collections of these + // variables, with an element for the top level of the WHERE clause, and an element for each level of UNION blocks, + // because we can't assume that variables at the top level will be bound in a UNION block. + private var valueVariablesAutomaticallyGenerated: List[Map[QueryVariable, Set[GeneratedQueryVariable]]] = + List(Map.empty[QueryVariable, Set[GeneratedQueryVariable]]) + + // Variables mentioned in the UNION block that is currently being processed, so we can ensure that a variable + // is bound before it is used in a FILTER. This is a stack of sets, with one element per level of union blocks. + private var variablesInUnionBlocks: List[Set[QueryVariable]] = List.empty // variables the represent resource metadata private val resourceMetadataVariables = mutable.Set.empty[QueryVariable] @@ -77,6 +84,31 @@ abstract class AbstractPrequeryGenerator(constructClause: ConstructClause, // The query can set this to false to disable inference. var useInference = true + /** + * When we enter a UNION block, pushes an empty collection of generated variables on to the stack + * valueVariablesAutomaticallyGenerated. + */ + override def enteringUnionBlock(): Unit = { + valueVariablesAutomaticallyGenerated = Map + .empty[QueryVariable, Set[GeneratedQueryVariable]] :: valueVariablesAutomaticallyGenerated + + variablesInUnionBlocks = Set.empty[QueryVariable] :: variablesInUnionBlocks + } + + /** + * When we leave a UNION block, pops that block's collection of generated variables off the + * stack valueVariablesAutomaticallyGenerated. + */ + override def leavingUnionBlock(): Unit = { + valueVariablesAutomaticallyGenerated = valueVariablesAutomaticallyGenerated.tail + + variablesInUnionBlocks = variablesInUnionBlocks.tail + } + + private def inUnionBlock: Boolean = { + variablesInUnionBlocks.nonEmpty + } + /** * Saves a generated variable representing a value literal, if it hasn't been saved already. * @@ -88,17 +120,24 @@ abstract class AbstractPrequeryGenerator(constructClause: ConstructClause, private def addGeneratedVariableForValueLiteral(valueVar: QueryVariable, generatedVar: QueryVariable, useInOrderBy: Boolean = true): Boolean = { - val currentGeneratedVars = - valueVariablesAutomaticallyGenerated.getOrElse(valueVar, Set.empty[GeneratedQueryVariable]) - - if (!currentGeneratedVars.exists(currentGeneratedVar => currentGeneratedVar.variable == generatedVar)) { - valueVariablesAutomaticallyGenerated.put( - valueVar, - currentGeneratedVars + GeneratedQueryVariable(generatedVar, useInOrderBy)) - true - } else { - false - } + val currentGeneratedVarsForBlock: Map[QueryVariable, Set[GeneratedQueryVariable]] = + valueVariablesAutomaticallyGenerated.head + + val currentGeneratedVarsForValueVar: Set[GeneratedQueryVariable] = + currentGeneratedVarsForBlock.getOrElse(valueVar, Set.empty[GeneratedQueryVariable]) + + val newGeneratedVarsForBlock = + if (!currentGeneratedVarsForValueVar.exists(currentGeneratedVar => currentGeneratedVar.variable == generatedVar)) { + currentGeneratedVarsForBlock + (valueVar -> (currentGeneratedVarsForValueVar + GeneratedQueryVariable( + generatedVar, + useInOrderBy))) + } else { + currentGeneratedVarsForBlock + } + + valueVariablesAutomaticallyGenerated = newGeneratedVarsForBlock :: valueVariablesAutomaticallyGenerated.tail + + newGeneratedVarsForBlock != currentGeneratedVarsForBlock } /** @@ -108,7 +147,7 @@ abstract class AbstractPrequeryGenerator(constructClause: ConstructClause, * @return a generated variable that represents a value literal and can be used in ORDER BY, or `None` if no such variable has been saved. */ protected def getGeneratedVariableForValueLiteralInOrderBy(valueVar: QueryVariable): Option[QueryVariable] = { - valueVariablesAutomaticallyGenerated.get(valueVar) match { + valueVariablesAutomaticallyGenerated.head.get(valueVar) match { case Some(generatedVars: Set[GeneratedQueryVariable]) => val generatedVarsForOrderBy: Set[QueryVariable] = generatedVars.filter(_.useInOrderBy).map(_.variable) @@ -492,6 +531,38 @@ abstract class AbstractPrequeryGenerator(constructClause: ConstructClause, } } + /** + * If we're in a UNION block, records any variables that are used in the specified statement, + * so we can make sure that they're defined before they're used in a FILTER pattern. + * + * @param statementPattern the statement pattern being processed. + */ + private def recordVariablesInUnionBlock(statementPattern: StatementPattern): Unit = { + def entityAsVariable(entity: Entity): Option[QueryVariable] = { + entity match { + case queryVariable: QueryVariable => Some(queryVariable) + case _ => None + } + } + + // Are we in a UNION block? + variablesInUnionBlocks match { + case variablesInCurrentBlock :: tail => + // Yes. Collect any variables in the statement. + val newVariablesInCurrentBlock: Set[QueryVariable] = variablesInCurrentBlock ++ entityAsVariable( + statementPattern.subj) ++ + entityAsVariable(statementPattern.pred) ++ + entityAsVariable(statementPattern.obj) + + // Record them. + variablesInUnionBlocks = newVariablesInCurrentBlock :: tail + + case Nil => + // No. Nothing to do here. + () + } + } + protected def processStatementPatternFromWhereClause(statementPattern: StatementPattern, inputOrderBy: Seq[OrderCriterion]): Seq[QueryPattern] = { @@ -529,6 +600,10 @@ abstract class AbstractPrequeryGenerator(constructClause: ConstructClause, conversionFuncForPropertyType = convertStatementForPropertyType(inputOrderBy) ) + // If we're in a UNION block, record any variables that are used in the statement, + // so we can make sure that they're defined before they're used in a FILTER pattern. + recordVariablesInUnionBlock(statementPattern) + additionalStatementsForSubj ++ additionalStatementsForWholeStatement ++ additionalStatementsForObj } } @@ -1875,11 +1950,22 @@ abstract class AbstractPrequeryGenerator(constructClause: ConstructClause, * * @param filterExpression the `FILTER` expression to be transformed. * @param typeInspectionResult the results of type inspection. + * @param isTopLevel `true` if this is the top-level expression in the filter. * @return a [[TransformedFilterPattern]]. */ protected def transformFilterPattern(filterExpression: Expression, typeInspectionResult: GravsearchTypeInspectionResult, isTopLevel: Boolean): TransformedFilterPattern = { + // Are we looking at a top-level filter expression in a UNION block? + if (isTopLevel && inUnionBlock) { + // Yes. Make sure that all the variables used in the FILTER have already been bound in the same block. + val unboundVariables: Set[QueryVariable] = filterExpression.getVariables -- variablesInUnionBlocks.head + + if (unboundVariables.nonEmpty) { + throw GravsearchException( + s"One or more variables used in a filter have not been bound in the same UNION block: ${unboundVariables.map(_.toSparql).mkString(", ")}") + } + } filterExpression match { diff --git a/webapi/src/main/scala/org/knora/webapi/messages/util/search/gravsearch/prequery/NonTriplestoreSpecificGravsearchToPrequeryTransformer.scala b/webapi/src/main/scala/org/knora/webapi/messages/util/search/gravsearch/prequery/NonTriplestoreSpecificGravsearchToPrequeryTransformer.scala index 6173e3d843..e1861c65ef 100644 --- a/webapi/src/main/scala/org/knora/webapi/messages/util/search/gravsearch/prequery/NonTriplestoreSpecificGravsearchToPrequeryTransformer.scala +++ b/webapi/src/main/scala/org/knora/webapi/messages/util/search/gravsearch/prequery/NonTriplestoreSpecificGravsearchToPrequeryTransformer.scala @@ -341,7 +341,7 @@ class NonTriplestoreSpecificGravsearchToPrequeryTransformer(constructClause: Con case None => // No. throw GravsearchException( - s"Not value literal variable was automatically generated for ${criterion.queryVariable}") + s"Variable ${criterion.queryVariable.toSparql} is used in ORDER by, but is not bound at the top level of the WHERE clause") } } diff --git a/webapi/src/main/scala/org/knora/webapi/messages/util/search/gravsearch/types/GravsearchTypeInspectionUtil.scala b/webapi/src/main/scala/org/knora/webapi/messages/util/search/gravsearch/types/GravsearchTypeInspectionUtil.scala index 99032a333d..bb8d9c57ee 100644 --- a/webapi/src/main/scala/org/knora/webapi/messages/util/search/gravsearch/types/GravsearchTypeInspectionUtil.scala +++ b/webapi/src/main/scala/org/knora/webapi/messages/util/search/gravsearch/types/GravsearchTypeInspectionUtil.scala @@ -186,6 +186,10 @@ object GravsearchTypeInspectionUtil { override def transformLuceneQueryPattern(luceneQueryPattern: LuceneQueryPattern): Seq[QueryPattern] = Seq(luceneQueryPattern) + + override def enteringUnionBlock(): Unit = {} + + override def leavingUnionBlock(): Unit = {} } /** diff --git a/webapi/src/main/scala/org/knora/webapi/responders/v2/SearchResponderV2.scala b/webapi/src/main/scala/org/knora/webapi/responders/v2/SearchResponderV2.scala index d95d5f7bda..2eacb892f4 100644 --- a/webapi/src/main/scala/org/knora/webapi/responders/v2/SearchResponderV2.scala +++ b/webapi/src/main/scala/org/knora/webapi/responders/v2/SearchResponderV2.scala @@ -392,6 +392,7 @@ class SearchResponderV2(responderData: ResponderData) extends ResponderWithStand typeInspectionResult: GravsearchTypeInspectionResult <- gravsearchTypeInspectionRunner.inspectTypes( inputQuery.whereClause, requestingUser) + whereClauseWithoutAnnotations: WhereClause = GravsearchTypeInspectionUtil.removeTypeAnnotations( inputQuery.whereClause) @@ -409,7 +410,7 @@ class SearchResponderV2(responderData: ResponderData) extends ResponderWithStand querySchema = inputQuery.querySchema.getOrElse(throw AssertionException(s"WhereClause has no querySchema")) ) - nonTriplestoreSpecficPrequery: SelectQuery = QueryTraverser.transformConstructToSelect( + nonTriplestoreSpecificPrequery: SelectQuery = QueryTraverser.transformConstructToSelect( inputQuery = inputQuery.copy( whereClause = whereClauseWithoutAnnotations, orderBy = Seq.empty[OrderCriterion] // count queries do not need any sorting criteria @@ -434,7 +435,7 @@ class SearchResponderV2(responderData: ResponderData) extends ResponderWithStand } triplestoreSpecificCountQuery = QueryTraverser.transformSelectToSelect( - inputQuery = nonTriplestoreSpecficPrequery, + inputQuery = nonTriplestoreSpecificPrequery, transformer = triplestoreSpecificQueryPatternTransformerSelect ) @@ -446,7 +447,7 @@ class SearchResponderV2(responderData: ResponderData) extends ResponderWithStand // query response should contain one result with one row with the name "count" _ = if (countResponse.results.bindings.length != 1) { throw GravsearchException( - s"Fulltext count query is expected to return exactly one row, but ${countResponse.results.bindings.size} given") + s"Count query is expected to return exactly one row, but ${countResponse.results.bindings.size} given") } count: String = countResponse.results.bindings.head.rowMap("count") diff --git a/webapi/src/test/scala/org/knora/webapi/e2e/v2/SearchRouteV2R2RSpec.scala b/webapi/src/test/scala/org/knora/webapi/e2e/v2/SearchRouteV2R2RSpec.scala index e36dbc0cea..09484d3b2d 100644 --- a/webapi/src/test/scala/org/knora/webapi/e2e/v2/SearchRouteV2R2RSpec.scala +++ b/webapi/src/test/scala/org/knora/webapi/e2e/v2/SearchRouteV2R2RSpec.scala @@ -8565,36 +8565,33 @@ class SearchRouteV2R2RSpec extends R2RSpec { } } - "not return duplicate results when there are unbound variables in one or more UNION branches" in { + "not return duplicate results when there are UNION branches with different variables" in { val gravsearchQuery = s"""PREFIX knora-api: - |PREFIX anything: - |CONSTRUCT { - | ?thing knora-api:isMainResource true . - | ?thing anything:hasInteger ?int . - | ?thing anything:hasRichtext ?richtext . - | ?thing anything:hasText ?text . - |} WHERE { - | ?thing a knora-api:Resource . - | ?thing a anything:Thing . - | - | { - | ?thing anything:hasRichtext ?richtext . - | FILTER knora-api:matchText(?richtext, "test") - | - | ?thing anything:hasInteger ?int . - | ?int knora-api:intValueAsInt 1 - | } - | UNION - | { - | ?thing anything:hasText ?text . - | FILTER knora-api:matchText(?text, "test") - | - | ?thing anything:hasInteger ?int . - | ?int knora-api:intValueAsInt 1 - | } - |} - |ORDER BY (?int)""".stripMargin + |PREFIX anything: + | + |CONSTRUCT { + | ?thing knora-api:isMainResource true . + | ?thing anything:hasInteger ?int . + | ?thing anything:hasRichtext ?richtext . + | ?thing anything:hasText ?text . + |} WHERE { + | ?thing a knora-api:Resource . + | ?thing a anything:Thing . + | ?thing anything:hasInteger ?int . + | ?int knora-api:intValueAsInt 1 . + | + | { + | ?thing anything:hasRichtext ?richtext . + | FILTER knora-api:matchText(?richtext, "test") + | } + | UNION + | { + | ?thing anything:hasText ?text . + | FILTER knora-api:matchText(?text, "test") + | } + |} + |ORDER BY (?int)""".stripMargin val expectedCount = 1 @@ -8616,6 +8613,82 @@ class SearchRouteV2R2RSpec extends R2RSpec { } } + "reject an ORDER by containing a variable that's not bound at the top level of the WHERE clause" in { + val gravsearchQuery = + s"""PREFIX knora-api: + |PREFIX anything: + |CONSTRUCT { + | ?thing knora-api:isMainResource true . + | ?thing anything:hasInteger ?int . + | ?thing anything:hasRichtext ?richtext . + | ?thing anything:hasText ?text . + |} WHERE { + | ?thing a knora-api:Resource . + | ?thing a anything:Thing . + | + | { + | ?thing anything:hasRichtext ?richtext . + | FILTER knora-api:matchText(?richtext, "test") + | + | ?thing anything:hasInteger ?int . + | ?int knora-api:intValueAsInt 1 . + | } + | UNION + | { + | ?thing anything:hasText ?text . + | FILTER knora-api:matchText(?text, "test") + | + | ?thing anything:hasInteger ?int . + | ?int knora-api:intValueAsInt 1 . + | } + |} + |ORDER BY (?int)""".stripMargin + + Post("/v2/searchextended", HttpEntity(SparqlQueryConstants.`application/sparql-query`, gravsearchQuery)) ~> searchPath ~> check { + val responseStr = responseAs[String] + assert(status == StatusCodes.BAD_REQUEST, responseStr) + assert( + responseStr.contains( + "Variable ?int is used in ORDER by, but is not bound at the top level of the WHERE clause")) + } + } + + "reject a FILTER in a UNION that uses a variable that's out of scope" in { + val gravsearchQuery = + s"""PREFIX knora-api: + |PREFIX anything: + |CONSTRUCT { + | ?thing knora-api:isMainResource true . + | ?thing anything:hasInteger ?int . + | ?thing anything:hasRichtext ?richtext . + | ?thing anything:hasText ?text . + |} WHERE { + | ?thing a knora-api:Resource . + | ?thing a anything:Thing . + | ?thing anything:hasRichtext ?richtext . + | ?thing anything:hasInteger ?int . + | ?int knora-api:intValueAsInt 1 . + | + | { + | FILTER knora-api:matchText(?richtext, "test") + | } + | UNION + | { + | ?thing anything:hasText ?text . + | FILTER knora-api:matchText(?text, "test") + | } + |} + |ORDER BY (?int)""".stripMargin + + Post("/v2/searchextended", HttpEntity(SparqlQueryConstants.`application/sparql-query`, gravsearchQuery)) ~> searchPath ~> check { + val responseStr = responseAs[String] + assert(status == StatusCodes.BAD_REQUEST, responseStr) + assert( + responseStr.contains( + "One or more variables used in a filter have not been bound in the same UNION block: ?richtext")) + } + } + "search for a resource containing a time value tag" in { // Create a resource containing a time value. diff --git a/webapi/src/test/scala/org/knora/webapi/messages/util/search/gravsearch/prequery/NonTriplestoreSpecificGravsearchToPrequeryTransformerSpec.scala b/webapi/src/test/scala/org/knora/webapi/messages/util/search/gravsearch/prequery/NonTriplestoreSpecificGravsearchToPrequeryTransformerSpec.scala index 61e5931fbe..8805cbdce2 100644 --- a/webapi/src/test/scala/org/knora/webapi/messages/util/search/gravsearch/prequery/NonTriplestoreSpecificGravsearchToPrequeryTransformerSpec.scala +++ b/webapi/src/test/scala/org/knora/webapi/messages/util/search/gravsearch/prequery/NonTriplestoreSpecificGravsearchToPrequeryTransformerSpec.scala @@ -1558,6 +1558,214 @@ class NonTriplestoreSpecificGravsearchToPrequeryTransformerSpec extends CoreSpec useDistinct = true ) + val InputQueryWithUnionScopes: String = + """PREFIX knora-api: + |PREFIX onto: + | + |CONSTRUCT { + | ?thing knora-api:isMainResource true . + | ?thing onto:hasText ?text . + |} WHERE { + | ?thing a onto:Thing . + | ?thing onto:hasText ?text . + | + | { + | ?thing onto:hasInteger ?int . + | FILTER(?int = 1) + | } UNION { + | ?thing onto:hasText ?text . + | FILTER regex(?text, "Abel", "i") . + | } + |} + |ORDER BY ASC(?text) + |OFFSET 0""".stripMargin + + val TransformedQueryWithUnionScopes: SelectQuery = SelectQuery( + fromClause = None, + variables = Vector( + QueryVariable(variableName = "thing"), + GroupConcat( + inputVariable = QueryVariable(variableName = "text"), + separator = StringFormatter.INFORMATION_SEPARATOR_ONE, + outputVariableName = "text__Concat" + ) + ), + offset = 0, + groupBy = Vector( + QueryVariable(variableName = "thing"), + QueryVariable(variableName = "text__valueHasString") + ), + orderBy = Vector( + OrderCriterion( + queryVariable = QueryVariable(variableName = "text__valueHasString"), + isAscending = true + ), + OrderCriterion( + queryVariable = QueryVariable(variableName = "thing"), + isAscending = true + ) + ), + whereClause = WhereClause( + patterns = Vector( + StatementPattern( + subj = QueryVariable(variableName = "thing"), + pred = IriRef( + iri = "http://www.knora.org/ontology/knora-base#isDeleted".toSmartIri, + propertyPathOperator = None + ), + obj = XsdLiteral( + value = "false", + datatype = "http://www.w3.org/2001/XMLSchema#boolean".toSmartIri + ), + namedGraph = Some( + IriRef( + iri = "http://www.knora.org/explicit".toSmartIri, + propertyPathOperator = None + )) + ), + StatementPattern( + subj = QueryVariable(variableName = "thing"), + pred = IriRef( + iri = "http://www.knora.org/ontology/0001/anything#hasText".toSmartIri, + propertyPathOperator = None + ), + obj = QueryVariable(variableName = "text"), + namedGraph = None + ), + StatementPattern( + subj = QueryVariable(variableName = "text"), + pred = IriRef( + iri = "http://www.knora.org/ontology/knora-base#isDeleted".toSmartIri, + propertyPathOperator = None + ), + obj = XsdLiteral( + value = "false", + datatype = "http://www.w3.org/2001/XMLSchema#boolean".toSmartIri + ), + namedGraph = Some( + IriRef( + iri = "http://www.knora.org/explicit".toSmartIri, + propertyPathOperator = None + )) + ), + StatementPattern( + subj = QueryVariable(variableName = "text"), + pred = IriRef( + iri = "http://www.knora.org/ontology/knora-base#valueHasString".toSmartIri, + propertyPathOperator = None + ), + obj = QueryVariable(variableName = "text__valueHasString"), + namedGraph = Some( + IriRef( + iri = "http://www.knora.org/explicit".toSmartIri, + propertyPathOperator = None + )) + ), + UnionPattern( + blocks = Vector( + Vector( + StatementPattern( + subj = QueryVariable(variableName = "thing"), + pred = IriRef( + iri = "http://www.knora.org/ontology/0001/anything#hasInteger".toSmartIri, + propertyPathOperator = None + ), + obj = QueryVariable(variableName = "int"), + namedGraph = None + ), + StatementPattern( + subj = QueryVariable(variableName = "int"), + pred = IriRef( + iri = "http://www.knora.org/ontology/knora-base#isDeleted".toSmartIri, + propertyPathOperator = None + ), + obj = XsdLiteral( + value = "false", + datatype = "http://www.w3.org/2001/XMLSchema#boolean".toSmartIri + ), + namedGraph = Some( + IriRef( + iri = "http://www.knora.org/explicit".toSmartIri, + propertyPathOperator = None + )) + ), + StatementPattern( + subj = QueryVariable(variableName = "int"), + pred = IriRef( + iri = "http://www.knora.org/ontology/knora-base#valueHasInteger".toSmartIri, + propertyPathOperator = None + ), + obj = QueryVariable(variableName = "int__valueHasInteger"), + namedGraph = Some( + IriRef( + iri = "http://www.knora.org/explicit".toSmartIri, + propertyPathOperator = None + )) + ), + FilterPattern(expression = CompareExpression( + leftArg = QueryVariable(variableName = "int__valueHasInteger"), + operator = CompareExpressionOperator.EQUALS, + rightArg = XsdLiteral( + value = "1", + datatype = "http://www.w3.org/2001/XMLSchema#integer".toSmartIri + ) + )) + ), + Vector( + StatementPattern( + subj = QueryVariable(variableName = "thing"), + pred = IriRef( + iri = "http://www.knora.org/ontology/0001/anything#hasText".toSmartIri, + propertyPathOperator = None + ), + obj = QueryVariable(variableName = "text"), + namedGraph = None + ), + StatementPattern( + subj = QueryVariable(variableName = "text"), + pred = IriRef( + iri = "http://www.knora.org/ontology/knora-base#isDeleted".toSmartIri, + propertyPathOperator = None + ), + obj = XsdLiteral( + value = "false", + datatype = "http://www.w3.org/2001/XMLSchema#boolean".toSmartIri + ), + namedGraph = Some( + IriRef( + iri = "http://www.knora.org/explicit".toSmartIri, + propertyPathOperator = None + )) + ), + StatementPattern( + subj = QueryVariable(variableName = "text"), + pred = IriRef( + iri = "http://www.knora.org/ontology/knora-base#valueHasString".toSmartIri, + propertyPathOperator = None + ), + obj = QueryVariable(variableName = "text__valueHasString"), + namedGraph = Some( + IriRef( + iri = "http://www.knora.org/explicit".toSmartIri, + propertyPathOperator = None + )) + ), + FilterPattern( + expression = RegexFunction( + textExpr = QueryVariable(variableName = "text__valueHasString"), + pattern = "Abel", + modifier = Some("i") + )) + ) + )) + ), + positiveEntities = Set(), + querySchema = None + ), + limit = Some(25), + useDistinct = true + ) + "The NonTriplestoreSpecificGravsearchToPrequeryGenerator object" should { "transform an input query with an optional property criterion without removing the rdf:type statement" in { @@ -1712,5 +1920,12 @@ class NonTriplestoreSpecificGravsearchToPrequeryTransformerSpec extends CoreSpec assert(transformedQuery === TransformedQueryWithRdfsLabelAndRegex) } + + "transform an input query with UNION scopes in the simple schema" in { + val transformedQuery = + QueryHandler.transformQuery(InputQueryWithUnionScopes, responderData, settings) + + assert(transformedQuery === TransformedQueryWithUnionScopes) + } } }