Skip to content

Commit

Permalink
fix: fix RepositoryUpdater that is not timing out during repository u…
Browse files Browse the repository at this point in the history
…pgrade (DEV-1534) (#2313)
  • Loading branch information
mpro7 committed Dec 2, 2022
1 parent 6f5020e commit 213a5f0
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 28 deletions.
Expand Up @@ -288,6 +288,16 @@ case class DropAllTRepositoryContent() extends TriplestoreRequest
*/
case class DropAllRepositoryContentACK()

/**
* Message for removing all content from the repository.
*/
case class DropDataGraphByGraph() extends TriplestoreRequest

/**
* Sent as a response to [[DropDataGraphByGraph]] if the request was processed successfully.
*/
case class DropDataGraphByGraphACK()

/**
* Inserts data into the repository.
*
Expand Down
Expand Up @@ -6,21 +6,7 @@ import zio.macros.accessible
import java.nio.file.Path

import org.knora.webapi._
import org.knora.webapi.messages.store.triplestoremessages.CheckTriplestoreResponse
import org.knora.webapi.messages.store.triplestoremessages.DropAllRepositoryContentACK
import org.knora.webapi.messages.store.triplestoremessages.FileWrittenResponse
import org.knora.webapi.messages.store.triplestoremessages.InsertGraphDataContentResponse
import org.knora.webapi.messages.store.triplestoremessages.InsertTriplestoreContentACK
import org.knora.webapi.messages.store.triplestoremessages.NamedGraphDataResponse
import org.knora.webapi.messages.store.triplestoremessages.RdfDataObject
import org.knora.webapi.messages.store.triplestoremessages.RepositoryUploadedResponse
import org.knora.webapi.messages.store.triplestoremessages.ResetRepositoryContentACK
import org.knora.webapi.messages.store.triplestoremessages.SparqlAskResponse
import org.knora.webapi.messages.store.triplestoremessages.SparqlConstructRequest
import org.knora.webapi.messages.store.triplestoremessages.SparqlConstructResponse
import org.knora.webapi.messages.store.triplestoremessages.SparqlExtendedConstructRequest
import org.knora.webapi.messages.store.triplestoremessages.SparqlExtendedConstructResponse
import org.knora.webapi.messages.store.triplestoremessages.SparqlUpdateResponse
import org.knora.webapi.messages.store.triplestoremessages._
import org.knora.webapi.messages.util.rdf.QuadFormat
import org.knora.webapi.messages.util.rdf.SparqlSelectResult

Expand Down Expand Up @@ -132,10 +118,15 @@ trait TriplestoreService {
): UIO[ResetRepositoryContentACK]

/**
* Drops (deletes) all data from the triplestore.
* Drops (deletes) all data from the triplestore using "DROP ALL" SPARQL query.
*/
def dropAllTriplestoreContent(): UIO[DropAllRepositoryContentACK]

/**
* Wipes all triplestore data out using HTTP requests.
*/
def dropDataGraphByGraph(): UIO[DropDataGraphByGraphACK]

/**
* Inserts the data referenced inside the `rdfDataObjects` by appending it to a default set of `rdfDataObjects`
* based on the list defined in `application.conf` under the `app.triplestore.default-rdf-data` key.
Expand Down
Expand Up @@ -346,22 +346,57 @@ case class TriplestoreServiceHttpConnectorImpl(
} yield ResetRepositoryContentACK()

/**
* Drops (deletes) all data from the triplestore.
* Drops (deletes) all data from the triplestore using "DROP ALL" SPARQL query.
*/
def dropAllTriplestoreContent(): UIO[DropAllRepositoryContentACK] = {

val DropAllSparqlString =
"""
DROP ALL
"""
val sparqlQuery = "DROP ALL"

for {
_ <- ZIO.logDebug("==>> Drop All Data Start")
result <- getSparqlHttpResponse(DropAllSparqlString, isUpdate = true)
result <- getSparqlHttpResponse(sparqlQuery, isUpdate = true)
_ <- ZIO.logDebug(s"==>> Drop All Data End, Result: $result")
} yield DropAllRepositoryContentACK()
}

/**
* Gets all graphs stored in the triplestore.
*
* @return All graphs stored in the triplestore as a [[Seq[String]]
*/
def getAllGraphs(): UIO[Seq[String]] = {
val sparqlQuery =
"""|
| SELECT DISTINCT ?graph
| WHERE {
| GRAPH ?graph { ?s ?p ?o }
| }""".stripMargin

for {
res <- sparqlHttpSelect(sparqlQuery)
bindings <- ZIO.succeed(res.results.bindings)
graphs = bindings.map(_.rowMap("graph"))
} yield graphs
}

/**
* Drops all triplestore data graph by graph using "DROP GRAPH" SPARQL query.
* This method is useful in cases with large amount of data (over 10 million statements),
* where the method [[dropAllTriplestoreContent()]] could create timeout issues.
*/
def dropDataGraphByGraph(): UIO[DropDataGraphByGraphACK] = {
val sparqlQuery = (graph: String) => s"DROP GRAPH <$graph>"

for {
_ <- ZIO.logInfo("==>> Drop All Data Start")
graphs <- getAllGraphs()
_ <- ZIO.foreach(graphs)(graph =>
getSparqlHttpResponse(sparqlQuery(graph), isUpdate = true)
.tap(result => ZIO.logDebug(s"==>> Dropped graph: $graph"))
)
_ <- ZIO.logInfo("==>> Drop All Data End")
} yield DropDataGraphByGraphACK()
}

/**
* Inserts the data referenced inside the `rdfDataObjects` by appending it to a default set of `rdfDataObjects`
* based on the list defined in `application.conf` under the `app.triplestore.default-rdf-data` key.
Expand Down
Expand Up @@ -82,9 +82,16 @@ object RepositoryUpdater {
for {
// No. Construct the list of updates that it needs.
_ <-
ZIO.logInfo(
s"Repository not up to date. Found: ${foundRepositoryVersion.getOrElse("None")}, Required: $requiredRepositoryVersion"
)
foundRepositoryVersion match {
case Some(foundRepositoryVersion) =>
ZIO.logInfo(
s"Repository not up to date. Found: $foundRepositoryVersion, Required: $requiredRepositoryVersion"
)
case None =>
ZIO.logWarning(
s"Repository not up to date. Found: None, Required: $requiredRepositoryVersion"
)
}
_ <- deleteTmpDirectories()
selectedPlugins <- selectPluginsForNeededUpdates(foundRepositoryVersion)
_ <-
Expand Down Expand Up @@ -202,8 +209,7 @@ object RepositoryUpdater {
)

// Empty the repository.
_ <- ZIO.logInfo("Emptying the repository...")
_ <- triplestoreService.dropAllTriplestoreContent()
_ <- triplestoreService.dropDataGraphByGraph()

// Upload the transformed repository.
_ <- ZIO.logInfo("Uploading transformed repository data...")
Expand Down

0 comments on commit 213a5f0

Please sign in to comment.