Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: fix RepositoryUpdater that is not timing out during repository upgrade (DEV-1534) #2313

Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
53 changes: 53 additions & 0 deletions webapi/src/main/resources/fuseki-repository-config.ttl.template
@@ -0,0 +1,53 @@
@prefix : <http://base/#> .
@prefix fuseki: <http://jena.apache.org/fuseki#> .
@prefix ja: <http://jena.hpl.hp.com/2005/11/Assembler#> .
@prefix tdb2: <http://jena.apache.org/2016/tdb#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix text: <http://jena.apache.org/text#> .
@prefix knora-base: <http://www.knora.org/ontology/knora-base#> .

[] rdf:type fuseki:Server ;
fuseki:services :service_tdb_all ;
ja:loadClass "org.apache.jena.query.text.TextQuery" .


:service_tdb_all a fuseki:Service ;
rdfs:label "TDB2 @REPOSITORY@" ;
fuseki:dataset :text_dataset ;
fuseki:name "@REPOSITORY@" ;
fuseki:serviceQuery "query" , "sparql" ;
fuseki:serviceReadGraphStore "get" ;
fuseki:serviceReadWriteGraphStore "data" ;
fuseki:serviceUpdate "update" ;
fuseki:serviceUpload "upload" .

## ---------------------------------------------------------------
## This URI must be fixed - it's used to assemble the text dataset.

:text_dataset rdf:type text:TextDataset ;
text:dataset :tdb_dataset_readwrite ;
text:index :indexLucene .

# A TDB2 dataset used for RDF storage
:tdb_dataset_readwrite a tdb2:DatasetTDB2 ;
tdb2:unionDefaultGraph true ;
tdb2:location "/fuseki/databases/@REPOSITORY@" .

# Text index description
:indexLucene a text:TextIndexLucene ;
text:directory "/fuseki/lucene/@REPOSITORY@" ;
text:entityMap :entMap .

# Mapping in the index
# URI stored in field "uri"
# knora-base:valueHasString is mapped to field "text"
:entMap a text:EntityMap ;
text:entityField "uri" ;
text:defaultField "text" ;
text:uidField "uid" ;
text:map (
[ text:field "text" ; text:predicate rdfs:label ]
[ text:field "text" ; text:predicate knora-base:valueHasString ]
[ text:field "text" ; text:predicate knora-base:valueHasComment ]
) .
Expand Up @@ -288,6 +288,16 @@ case class DropAllTRepositoryContent() extends TriplestoreRequest
*/
case class DropAllRepositoryContentACK()

/**
* Message for removing all content from the repository.
*/
case class DropDataGraphByGraph() extends TriplestoreRequest

/**
* Sent as a response to [[DropDataGraphByGraph]] if the request was processed successfully.
*/
case class DropDataGraphByGraphACK()

/**
* Inserts data into the repository.
*
Expand Down
Expand Up @@ -6,21 +6,7 @@ import zio.macros.accessible
import java.nio.file.Path

import org.knora.webapi._
import org.knora.webapi.messages.store.triplestoremessages.CheckTriplestoreResponse
import org.knora.webapi.messages.store.triplestoremessages.DropAllRepositoryContentACK
import org.knora.webapi.messages.store.triplestoremessages.FileWrittenResponse
import org.knora.webapi.messages.store.triplestoremessages.InsertGraphDataContentResponse
import org.knora.webapi.messages.store.triplestoremessages.InsertTriplestoreContentACK
import org.knora.webapi.messages.store.triplestoremessages.NamedGraphDataResponse
import org.knora.webapi.messages.store.triplestoremessages.RdfDataObject
import org.knora.webapi.messages.store.triplestoremessages.RepositoryUploadedResponse
import org.knora.webapi.messages.store.triplestoremessages.ResetRepositoryContentACK
import org.knora.webapi.messages.store.triplestoremessages.SparqlAskResponse
import org.knora.webapi.messages.store.triplestoremessages.SparqlConstructRequest
import org.knora.webapi.messages.store.triplestoremessages.SparqlConstructResponse
import org.knora.webapi.messages.store.triplestoremessages.SparqlExtendedConstructRequest
import org.knora.webapi.messages.store.triplestoremessages.SparqlExtendedConstructResponse
import org.knora.webapi.messages.store.triplestoremessages.SparqlUpdateResponse
import org.knora.webapi.messages.store.triplestoremessages._
import org.knora.webapi.messages.util.rdf.QuadFormat
import org.knora.webapi.messages.util.rdf.SparqlSelectResult

Expand Down Expand Up @@ -132,10 +118,15 @@ trait TriplestoreService {
): UIO[ResetRepositoryContentACK]

/**
* Drops (deletes) all data from the triplestore.
* Drops (deletes) all data from the triplestore using "DROP ALL" SPARQL query.
*/
def dropAllTriplestoreContent(): UIO[DropAllRepositoryContentACK]

/**
* Wipes all triplestore data out using HTTP requests.
*/
def dropDataGraphByGraph(): UIO[DropDataGraphByGraphACK]

/**
* Inserts the data referenced inside the `rdfDataObjects` by appending it to a default set of `rdfDataObjects`
* based on the list defined in `application.conf` under the `app.triplestore.default-rdf-data` key.
Expand Down
Expand Up @@ -346,14 +346,10 @@ case class TriplestoreServiceHttpConnectorImpl(
} yield ResetRepositoryContentACK()

/**
* Drops (deletes) all data from the triplestore.
* Drops (deletes) all data from the triplestore using "DROP ALL" SPARQL query.
*/
def dropAllTriplestoreContent(): UIO[DropAllRepositoryContentACK] = {

val DropAllSparqlString =
"""
DROP ALL
"""
val DropAllSparqlString = "DROP ALL"

for {
_ <- ZIO.logDebug("==>> Drop All Data Start")
Expand All @@ -362,6 +358,45 @@ case class TriplestoreServiceHttpConnectorImpl(
} yield DropAllRepositoryContentACK()
}

/**
* Gets all graphs stored in the triplestore.
*
* @return All graphs stored in the triplestore as a [[Seq[String]]
*/
def getAllGraphs(): UIO[Seq[String]] = {
val sparqlQuery =
"""|
| SELECT DISTINCT ?graph
| WHERE {
| GRAPH ?graph { ?s ?p ?o }
| }""".stripMargin

for {
res <- sparqlHttpSelect(sparqlQuery)
bindings <- ZIO.succeed(res.results.bindings)
graphs = bindings.map(_.rowMap("graph"))
} yield graphs
}

/**
* Drops all triplestore data graph by graph using "DROP GRAPH" SPARQL query.
* This method is useful in cases with large amount of data (over 10 million statements),
* where the method [[dropAllTriplestoreContent()]] could create timeout issues.
*/
def dropDataGraphByGraph(): UIO[DropDataGraphByGraphACK] = {
val sparqlQuery = (graph: String) => s"DROP GRAPH <$graph>"

for {
_ <- ZIO.logInfo("==>> Drop All Data Start")
graphs <- getAllGraphs()
_ <- ZIO.foreach(graphs)(graph =>
getSparqlHttpResponse(sparqlQuery(graph), isUpdate = true)
.tap(result => ZIO.logDebug(s"==>> Dropped graph: $graph"))
)
_ <- ZIO.logInfo("==>> Drop All Data End")
} yield DropDataGraphByGraphACK()
}

/**
* Inserts the data referenced inside the `rdfDataObjects` by appending it to a default set of `rdfDataObjects`
* based on the list defined in `application.conf` under the `app.triplestore.default-rdf-data` key.
Expand Down
Expand Up @@ -82,9 +82,16 @@ object RepositoryUpdater {
for {
// No. Construct the list of updates that it needs.
_ <-
ZIO.logInfo(
s"Repository not up to date. Found: ${foundRepositoryVersion.getOrElse("None")}, Required: $requiredRepositoryVersion"
)
foundRepositoryVersion match {
case Some(foundRepositoryVersion) =>
ZIO.logInfo(
s"Repository not up to date. Found: $foundRepositoryVersion, Required: $requiredRepositoryVersion"
)
case None =>
ZIO.logWarning(
s"Repository not up to date. Found: None, Required: $requiredRepositoryVersion"
)
}
_ <- deleteTmpDirectories()
selectedPlugins <- selectPluginsForNeededUpdates(foundRepositoryVersion)
_ <-
Expand Down Expand Up @@ -202,8 +209,8 @@ object RepositoryUpdater {
)

// Empty the repository.
_ <- ZIO.logInfo("Emptying the repository...")
_ <- triplestoreService.dropAllTriplestoreContent()
_ <- ZIO.logInfo("Wiping the repository...")
_ <- triplestoreService.dropDataGraphByGraph()

// Upload the transformed repository.
_ <- ZIO.logInfo("Uploading transformed repository data...")
Expand Down