Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: fix RepositoryUpdater that is not timing out during repository upgrade (DEV-1534) #2313

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -288,6 +288,16 @@ case class DropAllTRepositoryContent() extends TriplestoreRequest
*/
case class DropAllRepositoryContentACK()

/**
* Message for removing all content from the repository.
*/
case class DropDataGraphByGraph() extends TriplestoreRequest

/**
* Sent as a response to [[DropDataGraphByGraph]] if the request was processed successfully.
*/
case class DropDataGraphByGraphACK()

/**
* Inserts data into the repository.
*
Expand Down
Expand Up @@ -6,21 +6,7 @@ import zio.macros.accessible
import java.nio.file.Path

import org.knora.webapi._
import org.knora.webapi.messages.store.triplestoremessages.CheckTriplestoreResponse
import org.knora.webapi.messages.store.triplestoremessages.DropAllRepositoryContentACK
import org.knora.webapi.messages.store.triplestoremessages.FileWrittenResponse
import org.knora.webapi.messages.store.triplestoremessages.InsertGraphDataContentResponse
import org.knora.webapi.messages.store.triplestoremessages.InsertTriplestoreContentACK
import org.knora.webapi.messages.store.triplestoremessages.NamedGraphDataResponse
import org.knora.webapi.messages.store.triplestoremessages.RdfDataObject
import org.knora.webapi.messages.store.triplestoremessages.RepositoryUploadedResponse
import org.knora.webapi.messages.store.triplestoremessages.ResetRepositoryContentACK
import org.knora.webapi.messages.store.triplestoremessages.SparqlAskResponse
import org.knora.webapi.messages.store.triplestoremessages.SparqlConstructRequest
import org.knora.webapi.messages.store.triplestoremessages.SparqlConstructResponse
import org.knora.webapi.messages.store.triplestoremessages.SparqlExtendedConstructRequest
import org.knora.webapi.messages.store.triplestoremessages.SparqlExtendedConstructResponse
import org.knora.webapi.messages.store.triplestoremessages.SparqlUpdateResponse
import org.knora.webapi.messages.store.triplestoremessages._
import org.knora.webapi.messages.util.rdf.QuadFormat
import org.knora.webapi.messages.util.rdf.SparqlSelectResult

Expand Down Expand Up @@ -132,10 +118,15 @@ trait TriplestoreService {
): UIO[ResetRepositoryContentACK]

/**
* Drops (deletes) all data from the triplestore.
* Drops (deletes) all data from the triplestore using "DROP ALL" SPARQL query.
*/
def dropAllTriplestoreContent(): UIO[DropAllRepositoryContentACK]

/**
* Wipes all triplestore data out using HTTP requests.
*/
def dropDataGraphByGraph(): UIO[DropDataGraphByGraphACK]

/**
* Inserts the data referenced inside the `rdfDataObjects` by appending it to a default set of `rdfDataObjects`
* based on the list defined in `application.conf` under the `app.triplestore.default-rdf-data` key.
Expand Down
Expand Up @@ -346,22 +346,57 @@ case class TriplestoreServiceHttpConnectorImpl(
} yield ResetRepositoryContentACK()

/**
* Drops (deletes) all data from the triplestore.
* Drops (deletes) all data from the triplestore using "DROP ALL" SPARQL query.
*/
def dropAllTriplestoreContent(): UIO[DropAllRepositoryContentACK] = {

val DropAllSparqlString =
"""
DROP ALL
"""
val sparqlQuery = "DROP ALL"

for {
_ <- ZIO.logDebug("==>> Drop All Data Start")
result <- getSparqlHttpResponse(DropAllSparqlString, isUpdate = true)
result <- getSparqlHttpResponse(sparqlQuery, isUpdate = true)
_ <- ZIO.logDebug(s"==>> Drop All Data End, Result: $result")
} yield DropAllRepositoryContentACK()
}

/**
* Gets all graphs stored in the triplestore.
*
* @return All graphs stored in the triplestore as a [[Seq[String]]
*/
def getAllGraphs(): UIO[Seq[String]] = {
val sparqlQuery =
"""|
| SELECT DISTINCT ?graph
| WHERE {
| GRAPH ?graph { ?s ?p ?o }
| }""".stripMargin

for {
res <- sparqlHttpSelect(sparqlQuery)
bindings <- ZIO.succeed(res.results.bindings)
graphs = bindings.map(_.rowMap("graph"))
} yield graphs
}

/**
* Drops all triplestore data graph by graph using "DROP GRAPH" SPARQL query.
* This method is useful in cases with large amount of data (over 10 million statements),
* where the method [[dropAllTriplestoreContent()]] could create timeout issues.
*/
def dropDataGraphByGraph(): UIO[DropDataGraphByGraphACK] = {
val sparqlQuery = (graph: String) => s"DROP GRAPH <$graph>"

for {
_ <- ZIO.logInfo("==>> Drop All Data Start")
graphs <- getAllGraphs()
_ <- ZIO.foreach(graphs)(graph =>
getSparqlHttpResponse(sparqlQuery(graph), isUpdate = true)
.tap(result => ZIO.logDebug(s"==>> Dropped graph: $graph"))
)
_ <- ZIO.logInfo("==>> Drop All Data End")
} yield DropDataGraphByGraphACK()
}

/**
* Inserts the data referenced inside the `rdfDataObjects` by appending it to a default set of `rdfDataObjects`
* based on the list defined in `application.conf` under the `app.triplestore.default-rdf-data` key.
Expand Down
Expand Up @@ -82,9 +82,16 @@ object RepositoryUpdater {
for {
// No. Construct the list of updates that it needs.
_ <-
ZIO.logInfo(
s"Repository not up to date. Found: ${foundRepositoryVersion.getOrElse("None")}, Required: $requiredRepositoryVersion"
)
foundRepositoryVersion match {
case Some(foundRepositoryVersion) =>
ZIO.logInfo(
s"Repository not up to date. Found: $foundRepositoryVersion, Required: $requiredRepositoryVersion"
)
case None =>
ZIO.logWarning(
s"Repository not up to date. Found: None, Required: $requiredRepositoryVersion"
)
}
_ <- deleteTmpDirectories()
selectedPlugins <- selectPluginsForNeededUpdates(foundRepositoryVersion)
_ <-
Expand Down Expand Up @@ -202,8 +209,7 @@ object RepositoryUpdater {
)

// Empty the repository.
_ <- ZIO.logInfo("Emptying the repository...")
_ <- triplestoreService.dropAllTriplestoreContent()
_ <- triplestoreService.dropDataGraphByGraph()

// Upload the transformed repository.
_ <- ZIO.logInfo("Uploading transformed repository data...")
Expand Down