From 213a5f0fd6cd48ffba8128e33b6a964059ed3309 Mon Sep 17 00:00:00 2001 From: Marcin Procyk Date: Fri, 2 Dec 2022 08:46:08 +0100 Subject: [PATCH] fix: fix RepositoryUpdater that is not timing out during repository upgrade (DEV-1534) (#2313) --- .../TriplestoreMessages.scala | 10 ++++ .../triplestore/api/TriplestoreService.scala | 23 +++------ .../TriplestoreServiceHttpConnectorImpl.scala | 49 ++++++++++++++++--- .../upgrade/RepositoryUpdater.scala | 16 ++++-- 4 files changed, 70 insertions(+), 28 deletions(-) diff --git a/webapi/src/main/scala/org/knora/webapi/messages/store/triplestoremessages/TriplestoreMessages.scala b/webapi/src/main/scala/org/knora/webapi/messages/store/triplestoremessages/TriplestoreMessages.scala index acef8c9471..5bbb68e843 100644 --- a/webapi/src/main/scala/org/knora/webapi/messages/store/triplestoremessages/TriplestoreMessages.scala +++ b/webapi/src/main/scala/org/knora/webapi/messages/store/triplestoremessages/TriplestoreMessages.scala @@ -288,6 +288,16 @@ case class DropAllTRepositoryContent() extends TriplestoreRequest */ case class DropAllRepositoryContentACK() +/** + * Message for removing all content from the repository. + */ +case class DropDataGraphByGraph() extends TriplestoreRequest + +/** + * Sent as a response to [[DropDataGraphByGraph]] if the request was processed successfully. + */ +case class DropDataGraphByGraphACK() + /** * Inserts data into the repository. * diff --git a/webapi/src/main/scala/org/knora/webapi/store/triplestore/api/TriplestoreService.scala b/webapi/src/main/scala/org/knora/webapi/store/triplestore/api/TriplestoreService.scala index b1100356c0..f6a92da946 100644 --- a/webapi/src/main/scala/org/knora/webapi/store/triplestore/api/TriplestoreService.scala +++ b/webapi/src/main/scala/org/knora/webapi/store/triplestore/api/TriplestoreService.scala @@ -6,21 +6,7 @@ import zio.macros.accessible import java.nio.file.Path import org.knora.webapi._ -import org.knora.webapi.messages.store.triplestoremessages.CheckTriplestoreResponse -import org.knora.webapi.messages.store.triplestoremessages.DropAllRepositoryContentACK -import org.knora.webapi.messages.store.triplestoremessages.FileWrittenResponse -import org.knora.webapi.messages.store.triplestoremessages.InsertGraphDataContentResponse -import org.knora.webapi.messages.store.triplestoremessages.InsertTriplestoreContentACK -import org.knora.webapi.messages.store.triplestoremessages.NamedGraphDataResponse -import org.knora.webapi.messages.store.triplestoremessages.RdfDataObject -import org.knora.webapi.messages.store.triplestoremessages.RepositoryUploadedResponse -import org.knora.webapi.messages.store.triplestoremessages.ResetRepositoryContentACK -import org.knora.webapi.messages.store.triplestoremessages.SparqlAskResponse -import org.knora.webapi.messages.store.triplestoremessages.SparqlConstructRequest -import org.knora.webapi.messages.store.triplestoremessages.SparqlConstructResponse -import org.knora.webapi.messages.store.triplestoremessages.SparqlExtendedConstructRequest -import org.knora.webapi.messages.store.triplestoremessages.SparqlExtendedConstructResponse -import org.knora.webapi.messages.store.triplestoremessages.SparqlUpdateResponse +import org.knora.webapi.messages.store.triplestoremessages._ import org.knora.webapi.messages.util.rdf.QuadFormat import org.knora.webapi.messages.util.rdf.SparqlSelectResult @@ -132,10 +118,15 @@ trait TriplestoreService { ): UIO[ResetRepositoryContentACK] /** - * Drops (deletes) all data from the triplestore. + * Drops (deletes) all data from the triplestore using "DROP ALL" SPARQL query. */ def dropAllTriplestoreContent(): UIO[DropAllRepositoryContentACK] + /** + * Wipes all triplestore data out using HTTP requests. + */ + def dropDataGraphByGraph(): UIO[DropDataGraphByGraphACK] + /** * Inserts the data referenced inside the `rdfDataObjects` by appending it to a default set of `rdfDataObjects` * based on the list defined in `application.conf` under the `app.triplestore.default-rdf-data` key. diff --git a/webapi/src/main/scala/org/knora/webapi/store/triplestore/impl/TriplestoreServiceHttpConnectorImpl.scala b/webapi/src/main/scala/org/knora/webapi/store/triplestore/impl/TriplestoreServiceHttpConnectorImpl.scala index 10e0810a2d..86079fe8ee 100644 --- a/webapi/src/main/scala/org/knora/webapi/store/triplestore/impl/TriplestoreServiceHttpConnectorImpl.scala +++ b/webapi/src/main/scala/org/knora/webapi/store/triplestore/impl/TriplestoreServiceHttpConnectorImpl.scala @@ -346,22 +346,57 @@ case class TriplestoreServiceHttpConnectorImpl( } yield ResetRepositoryContentACK() /** - * Drops (deletes) all data from the triplestore. + * Drops (deletes) all data from the triplestore using "DROP ALL" SPARQL query. */ def dropAllTriplestoreContent(): UIO[DropAllRepositoryContentACK] = { - - val DropAllSparqlString = - """ - DROP ALL - """ + val sparqlQuery = "DROP ALL" for { _ <- ZIO.logDebug("==>> Drop All Data Start") - result <- getSparqlHttpResponse(DropAllSparqlString, isUpdate = true) + result <- getSparqlHttpResponse(sparqlQuery, isUpdate = true) _ <- ZIO.logDebug(s"==>> Drop All Data End, Result: $result") } yield DropAllRepositoryContentACK() } + /** + * Gets all graphs stored in the triplestore. + * + * @return All graphs stored in the triplestore as a [[Seq[String]] + */ + def getAllGraphs(): UIO[Seq[String]] = { + val sparqlQuery = + """| + | SELECT DISTINCT ?graph + | WHERE { + | GRAPH ?graph { ?s ?p ?o } + | }""".stripMargin + + for { + res <- sparqlHttpSelect(sparqlQuery) + bindings <- ZIO.succeed(res.results.bindings) + graphs = bindings.map(_.rowMap("graph")) + } yield graphs + } + + /** + * Drops all triplestore data graph by graph using "DROP GRAPH" SPARQL query. + * This method is useful in cases with large amount of data (over 10 million statements), + * where the method [[dropAllTriplestoreContent()]] could create timeout issues. + */ + def dropDataGraphByGraph(): UIO[DropDataGraphByGraphACK] = { + val sparqlQuery = (graph: String) => s"DROP GRAPH <$graph>" + + for { + _ <- ZIO.logInfo("==>> Drop All Data Start") + graphs <- getAllGraphs() + _ <- ZIO.foreach(graphs)(graph => + getSparqlHttpResponse(sparqlQuery(graph), isUpdate = true) + .tap(result => ZIO.logDebug(s"==>> Dropped graph: $graph")) + ) + _ <- ZIO.logInfo("==>> Drop All Data End") + } yield DropDataGraphByGraphACK() + } + /** * Inserts the data referenced inside the `rdfDataObjects` by appending it to a default set of `rdfDataObjects` * based on the list defined in `application.conf` under the `app.triplestore.default-rdf-data` key. diff --git a/webapi/src/main/scala/org/knora/webapi/store/triplestore/upgrade/RepositoryUpdater.scala b/webapi/src/main/scala/org/knora/webapi/store/triplestore/upgrade/RepositoryUpdater.scala index d974ee6653..afb85f5182 100644 --- a/webapi/src/main/scala/org/knora/webapi/store/triplestore/upgrade/RepositoryUpdater.scala +++ b/webapi/src/main/scala/org/knora/webapi/store/triplestore/upgrade/RepositoryUpdater.scala @@ -82,9 +82,16 @@ object RepositoryUpdater { for { // No. Construct the list of updates that it needs. _ <- - ZIO.logInfo( - s"Repository not up to date. Found: ${foundRepositoryVersion.getOrElse("None")}, Required: $requiredRepositoryVersion" - ) + foundRepositoryVersion match { + case Some(foundRepositoryVersion) => + ZIO.logInfo( + s"Repository not up to date. Found: $foundRepositoryVersion, Required: $requiredRepositoryVersion" + ) + case None => + ZIO.logWarning( + s"Repository not up to date. Found: None, Required: $requiredRepositoryVersion" + ) + } _ <- deleteTmpDirectories() selectedPlugins <- selectPluginsForNeededUpdates(foundRepositoryVersion) _ <- @@ -202,8 +209,7 @@ object RepositoryUpdater { ) // Empty the repository. - _ <- ZIO.logInfo("Emptying the repository...") - _ <- triplestoreService.dropAllTriplestoreContent() + _ <- triplestoreService.dropDataGraphByGraph() // Upload the transformed repository. _ <- ZIO.logInfo("Uploading transformed repository data...")