Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(sipi): Improve performance of file value query #1697

Merged
merged 5 commits into from Sep 2, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 1 addition & 6 deletions docs/02-knora-ontologies/knora-base.md
Expand Up @@ -72,7 +72,7 @@ property name):

: The `kb:Institution` that the project belongs to.

Ontologies, resources, and valuesare associated with a project by means of the
Ontologies and resources are associated with a project by means of the
`kb:attachedToProject` property, as described in [Ontologies](#ontologies)
and [Properties of Resource](#properties-of-resource)). Users are associated with a project by
means of the `kb:isInProject` property, as described in
Expand Down Expand Up @@ -318,11 +318,6 @@ created by copying data from a deleted value.

: The user who owns the value.

`attachedToProject` (0-1)

: The project that the value is part of. If not specified, defaults to
the project of the containing resource.

`valueHasString` (1)

: A human-readable string representation of the value's contents,
Expand Down
Expand Up @@ -22,11 +22,11 @@ package org.knora.webapi.responders.admin
import akka.actor.Status
import akka.http.scaladsl.util.FastFuture
import akka.pattern._
import org.knora.webapi._
import org.knora.webapi.exceptions.{InconsistentTriplestoreDataException, NotFoundException}
import org.knora.webapi.messages.SmartIri
import org.knora.webapi.messages.admin.responder.projectsmessages.{ProjectIdentifierADM, ProjectRestrictedViewSettingsADM, ProjectRestrictedViewSettingsGetADM}
import org.knora.webapi.messages.admin.responder.sipimessages.{SipiFileInfoGetRequestADM, SipiFileInfoGetResponseADM, SipiResponderRequestADM}
import org.knora.webapi.messages.store.triplestoremessages.{SparqlSelectRequest, SparqlSelectResponse, VariableResultsRow}
import org.knora.webapi.messages.store.triplestoremessages.{IriSubjectV2, LiteralV2, SparqlExtendedConstructRequest, SparqlExtendedConstructResponse}
import org.knora.webapi.messages.util.PermissionUtilADM.EntityPermission
import org.knora.webapi.messages.util.{KnoraSystemInstances, PermissionUtilADM, ResponderData}
import org.knora.webapi.responders.Responder
Expand Down Expand Up @@ -69,30 +69,25 @@ class SipiResponderADM(responderData: ResponderData) extends Responder(responder
filename = request.filename
).toString())

// _ = println(sparqlQuery)
queryResponse: SparqlExtendedConstructResponse <- (storeManager ? SparqlExtendedConstructRequest(sparqlQuery)).mapTo[SparqlExtendedConstructResponse]

queryResponse <- (storeManager ? SparqlSelectRequest(sparqlQuery)).mapTo[SparqlSelectResponse]
_ = if (queryResponse.statements.isEmpty) throw NotFoundException(s"No file value was found for filename ${request.filename}")
_ = if (queryResponse.statements.size > 1) throw InconsistentTriplestoreDataException(s"Filename ${request.filename} is used in more than one file value")

rows: Seq[VariableResultsRow] = queryResponse.results.bindings

// check if rows were found for the given filename
_ = if (rows.isEmpty) throw NotFoundException(s"No file value was found for filename ${request.filename}")

// check that only one file value was found (by grouping by file value IRI)
groupedByFileValue: Map[String, Seq[VariableResultsRow]] = rows.groupBy {
row: VariableResultsRow => row.rowMap("fileValue")
fileValueIriSubject: IriSubjectV2 = queryResponse.statements.keys.head match {
case iriSubject: IriSubjectV2 => iriSubject
case _ => throw InconsistentTriplestoreDataException(s"The subject of the file value with filename ${request.filename} is not an IRI")
}

_ = if (groupedByFileValue.size > 1) throw InconsistentTriplestoreDataException(s"Filename ${request.filename} is used in more than one file value")

fileValueIri: IRI = groupedByFileValue.keys.head

assertions: Seq[(String, String)] = rows.map {
row => (row.rowMap("objPred"), row.rowMap("objObj"))
assertions: Seq[(String, String)] = queryResponse.statements(fileValueIriSubject).toSeq.flatMap {
case (predicate: SmartIri, values: Seq[LiteralV2]) =>
values.map {
value => predicate.toString -> value.toString
}
}

maybeEntityPermission: Option[EntityPermission] = PermissionUtilADM.getUserPermissionFromAssertionsADM(
entityIri = fileValueIri,
entityIri = fileValueIriSubject.toString,
assertions = assertions,
requestingUser = request.requestingUser
)
Expand All @@ -102,15 +97,17 @@ class SipiResponderADM(responderData: ResponderData) extends Responder(responder
permissionCode: Int = maybeEntityPermission.map(_.toInt).getOrElse(0) // Sipi expects a permission code from 0 to 8

response <- permissionCode match {

case 1 =>
for {
maybeRVSettings <- (responderManager ? ProjectRestrictedViewSettingsGetADM(ProjectIdentifierADM(maybeShortcode = Some(request.projectID)), KnoraSystemInstances.Users.SystemUser)).mapTo[Option[ProjectRestrictedViewSettingsADM]]

maybeRVSettings <- (
responderManager ? ProjectRestrictedViewSettingsGetADM(
ProjectIdentifierADM(maybeShortcode = Some(request.projectID)),
requestingUser = KnoraSystemInstances.Users.SystemUser)
).mapTo[Option[ProjectRestrictedViewSettingsADM]]
} yield SipiFileInfoGetResponseADM(permissionCode = permissionCode, maybeRVSettings)

case _ => FastFuture.successful(SipiFileInfoGetResponseADM(permissionCode = permissionCode, restrictedViewSettings = None))
}

} yield response
}
}
Expand Up @@ -24,10 +24,7 @@
*
* This template is used only by getFileValue.scala.txt.
*
* Since the triplestore type is GraphDB, we assume that inference is enabled, and we use it to optimise the generated
* SPARQL. Specifically, we use inference to return search results matching subproperties of Knora base properties
* such as knora-base:hasFileValue. This requires us to use GraphDB's GRAPH <http://www.ontotext.com/explicit>
* whenever we need to get explicit (non-inferred) statements.
* Since the triplestore type is GraphDB, we turn off inference in this query because it is not needed.
*
* @param fileValueIri the IRI of the file value.
*@
Expand All @@ -38,40 +35,22 @@ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX knora-base: <http://www.knora.org/ontology/knora-base#>

SELECT ?fileValue ?objPred ?objObj
CONSTRUCT {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@benjamingeer Could you test that with GraphDB locally?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't seem possible with the current Bazel setup.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally, there would be a config option.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think our policy at the moment is that we're not officially maintaining support for GraphDB. But I'm trying to do it anyway if it's not too much effort.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe at some point we might have more than one free triplestore available

?fileValue ?objPred ?objObj .
?fileValue knora-base:attachedToProject ?resourceProject .
?fileValue knora-base:hasPermissions ?currentFileValuePermissions .
}
FROM <http://www.ontotext.com/explicit>
WHERE {

?fileValue knora-base:internalFilename "@filename" .

?currentFileValue knora-base:previousValue* ?fileValue ;
knora-base:isDeleted false .

?resource knora-base:hasFileValue ?currentFileValue ;
knora-base:isDeleted false .

{
GRAPH <http://www.ontotext.com/explicit> {
?fileValue ?objPred ?objObj .
}
knora-base:hasPermissions ?currentFileValuePermissions .

FILTER(?objPred != knora-base:attachedToProject && ?objPred != knora-base:hasPermissions)
}
UNION
{
@* Return the permissions of the current version of the value. *@
?resource ?prop ?currentFileValue ;
knora-base:attachedToProject ?resourceProject .

?currentFileValue knora-base:hasPermissions ?currentFileValuePermissions .

BIND(knora-base:hasPermissions AS ?objPred)
BIND(?currentFileValuePermissions AS ?objObj)
}
UNION
{
@* Return the project of the resource that contains the value. *@

?resource knora-base:attachedToProject ?resourceProject .

BIND(knora-base:attachedToProject AS ?objPred)
BIND(?resourceProject AS ?objObj)
}
}
?fileValue ?objPred ?objObj .
?currentFileValue knora-base:isDeleted false .
?resource knora-base:isDeleted false .
}
Expand Up @@ -36,39 +36,20 @@ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX knora-base: <http://www.knora.org/ontology/knora-base#>

SELECT ?fileValue ?objPred ?objObj
WHERE {

CONSTRUCT {
?fileValue ?objPred ?objObj .
?fileValue knora-base:attachedToProject ?resourceProject .
?fileValue knora-base:hasPermissions ?currentFileValuePermissions .
} WHERE {
?fileValue knora-base:internalFilename "@filename" .

?currentFileValue knora-base:previousValue* ?fileValue ;
knora-base:isDeleted false .

?prop rdfs:subPropertyOf* knora-base:hasFileValue .
knora-base:hasPermissions ?currentFileValuePermissions .
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure I remember why this is necessary (although I seem to have worked on that query): I suppose it's necessary because permissions are only stored for the current version of a value, and if someone asks for a filename that was changed with a later version of the file value, it's the only way to get the permissions, right?

So this means permissions are not versioned.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The permissions attached to the current version of a value also apply to previous versions of the value. Value versions other than the current one do not have this predicate.

https://docs.knora.org/02-knora-ontologies/knora-base/#permissions

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just realised that this means that the FILTER is redundant: 05029e9

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Values don't have attachedToProject anymore, either.)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just realised that this means that the FILTER is redundant: 05029e9

true, good catch!


?resource ?prop ?currentFileValue ;
knora-base:isDeleted false .

{
?fileValue ?objPred ?objObj .
FILTER(?objPred != knora-base:attachedToProject && ?objPred != knora-base:hasPermissions)
}
UNION
{
@* Return the permissions of the current version of the value. *@

?currentFileValue knora-base:hasPermissions ?currentFileValuePermissions .

BIND(knora-base:hasPermissions AS ?objPred)
BIND(?currentFileValuePermissions AS ?objObj)
}
UNION
{
@* Return the project of the resource that contains the value. *@

?resource knora-base:attachedToProject ?resourceProject .
knora-base:attachedToProject ?resourceProject .

BIND(knora-base:attachedToProject AS ?objPred)
BIND(?resourceProject AS ?objObj)
}
?fileValue ?objPred ?objObj .
?currentFileValue knora-base:isDeleted false .
?resource knora-base:isDeleted false .
}