/
StringFormatter.scala
3020 lines (2618 loc) · 107 KB
/
StringFormatter.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright © 2021 - 2022 Swiss National Data and Service Center for the Humanities and/or DaSCH Service Platform contributors.
* SPDX-License-Identifier: Apache-2.0
*/
package org.knora.webapi.messages
import akka.actor.ActorRef
import akka.http.scaladsl.util.FastFuture
import akka.pattern._
import akka.util.Timeout
import com.google.gwt.safehtml.shared.UriUtils._
import com.typesafe.scalalogging.Logger
import org.apache.commons.lang3.StringUtils
import spray.json._
import java.nio.ByteBuffer
import java.time._
import java.time.format.DateTimeFormatter
import java.time.temporal.ChronoField
import java.time.temporal.TemporalAccessor
import java.util.Base64
import java.util.UUID
import java.util.concurrent.ConcurrentHashMap
import scala.concurrent.ExecutionContext
import scala.concurrent.Future
import scala.util.Failure
import scala.util.Success
import scala.util.Try
import scala.util.matching.Regex
import dsp.errors._
import dsp.valueobjects.Iri
import dsp.valueobjects.IriErrorMessages
import org.knora.webapi._
import org.knora.webapi.config.AppConfig
import org.knora.webapi.messages.IriConversions._
import org.knora.webapi.messages.admin.responder.projectsmessages.ProjectADM
import org.knora.webapi.messages.store.triplestoremessages.SparqlAskRequest
import org.knora.webapi.messages.store.triplestoremessages.SparqlAskResponse
import org.knora.webapi.messages.store.triplestoremessages.StringLiteralSequenceV2
import org.knora.webapi.messages.store.triplestoremessages.StringLiteralV2
import org.knora.webapi.messages.v1.responder.projectmessages.ProjectInfoV1
import org.knora.webapi.messages.v2.responder.KnoraContentV2
import org.knora.webapi.messages.v2.responder.standoffmessages._
import org.knora.webapi.util.Base64UrlCheckDigit
import org.knora.webapi.util.JavaUtil
/**
* Provides instances of [[StringFormatter]], as well as string formatting constants.
*/
object StringFormatter {
// A non-printing delimiter character, Unicode INFORMATION SEPARATOR ONE, that should never occur in data.
val INFORMATION_SEPARATOR_ONE = '\u001F'
// A non-printing delimiter character, Unicode INFORMATION SEPARATOR TWO, that should never occur in data.
val INFORMATION_SEPARATOR_TWO = '\u001E'
// A non-printing delimiter character, Unicode INFORMATION SEPARATOR THREE, that should never occur in data.
val INFORMATION_SEPARATOR_THREE = '\u001D'
// A non-printing delimiter character, Unicode INFORMATION SEPARATOR FOUR, that should never occur in data.
val INFORMATION_SEPARATOR_FOUR = '\u001C'
// a separator to be inserted in the XML to separate nodes from one another
// this separator is only used temporarily while XML is being processed
val PARAGRAPH_SEPARATOR = '\u2029'
// Control sequences for changing text colour in terminals.
val ANSI_RED = "\u001B[31m"
val ANSI_GREEN = "\u001B[32m"
val ANSI_YELLOW = "\u001B[33m"
val ANSI_RESET = "\u001B[0m"
/**
* Separates the calendar name from the rest of a Knora date.
*/
val CalendarSeparator: String = ":"
/**
* Separates year, month, and day in a Knora date.
*/
val PrecisionSeparator: String = "-"
/**
* Separates a date (year, month, day) from the era in a Knora date.
*/
val EraSeparator: String = " "
/**
* Before Christ (equivalent to BCE)
*/
val Era_BC: String = "BC"
/**
* Before Common Era (equivalent to BC)
*/
val Era_BCE: String = "BCE"
/**
* Anno Domini (equivalent to CE)
*/
val Era_AD: String = "AD"
/**
* Common Era (equivalent to AD)
*/
val Era_CE: String = "CE"
/**
* String representation of the name of the Gregorian calendar.
*/
val CalendarGregorian: String = "GREGORIAN"
/**
* String representation of the name of the Julian calendar.
*/
val CalendarJulian: String = "JULIAN"
/**
* String representation of the name of the Islamic calendar.
*/
val CalendarIslamic: String = "ISLAMIC"
/**
* String representation of day precision in a date.
*/
val PrecisionDay: String = "DAY"
/**
* String representation of month precision in a date.
*/
val PrecisionMonth: String = "MONTH"
/**
* String representation of year precision in a date.
*/
val PrecisionYear: String = "YEAR"
/**
* The version number of the current version of Knora's ARK URL format.
*/
val ArkVersion: String = "1"
/**
* The length of the canonical representation of a UUID.
*/
val CanonicalUuidLength = 36
/**
* The length of a Base64-encoded UUID.
*/
val Base64UuidLength = 22
/**
* The maximum number of times that `makeUnusedIri` will try to make a new, unused IRI.
*/
val MAX_IRI_ATTEMPTS: Int = 5
/**
* The domain name used to construct Knora IRIs.
*/
val IriDomain: String = "rdfh.ch"
/**
* A keyword used in IRI entity names to introduce a collection type annotation for client code generation.
*/
val ClientCollectionTypeKeyword: String = "collection:"
/**
* A string found in IRIs representing collection type annotations for client code generation.
*/
val ClientCollectionEntityNameStart: String = "#" + ClientCollectionTypeKeyword
/**
* A container for an XML import namespace and its prefix label.
*
* @param namespace the namespace.
* @param prefixLabel the prefix label.
*/
case class XmlImportNamespaceInfoV1(namespace: IRI, prefixLabel: String)
/*
In order to parse project-specific API v2 ontology IRIs, the StringFormatter
class needs the Knora API server's hostname, which is set in application.conf,
which is not read until the Akka ActorSystem starts. Therefore, IRI parsing is
done in the StringFormatter class, rather than in the StringFormatter object.
There are two instances of StringFormatter, defined below.
*/
/**
* The instance of [[StringFormatter]] that is initialised after the ActorSystem starts,
* and can parse project-specific API v2 ontology IRIs. This instance is used almost
* everywhere in the API server.
*/
private var generalInstance: Option[StringFormatter] = None
/**
* The instance of [[StringFormatter]] that can be used as soon as the JVM starts, but
* can't parse project-specific API v2 ontology IRIs. This instance is used
* only to initialise the hard-coded API v2 ontologies [[org.knora.webapi.messages.v2.responder.ontologymessages.KnoraBaseToApiV2SimpleTransformationRules]]
* and [[org.knora.webapi.messages.v2.responder.ontologymessages.KnoraBaseToApiV2ComplexTransformationRules]].
*/
private val instanceForConstantOntologies = new StringFormatter(None)
/**
* Gets the singleton instance of [[StringFormatter]] that handles IRIs from data.
*/
def getGeneralInstance: StringFormatter =
generalInstance match {
case Some(instance) => instance
case None => throw AssertionException("StringFormatter not yet initialised")
}
/**
* Gets the singleton instance of [[StringFormatter]] that can only handle the IRIs in built-in
* ontologies.
*/
def getInstanceForConstantOntologies: StringFormatter = instanceForConstantOntologies
/**
* Initialises the general instance of [[StringFormatter]].
*
* @param config the application's configuration.
*/
def init(config: AppConfig): Unit =
this.synchronized {
generalInstance match {
case Some(_) => ()
case None => generalInstance = Some(new StringFormatter(Some(config)))
}
}
/**
* Initialises the singleton instance of [[StringFormatter]] for a test.
*/
def initForTest(): Unit =
this.synchronized {
generalInstance match {
case Some(_) => ()
case None => generalInstance = Some(new StringFormatter(maybeConfig = None, initForTest = true))
}
}
/**
* Indicates whether the IRI is a data IRI, a definition IRI, or an IRI of an unknown type.
*/
private sealed trait IriType
/**
* Indicates that the IRI is a data IRI.
*/
private case object KnoraDataIri extends IriType
/**
* Indicates that the IRI is an ontology or ontology entity IRI.
*/
private case object KnoraDefinitionIri extends IriType
/**
* Indicates that the type of the IRI is unknown.
*/
private case object UnknownIriType extends IriType
/**
* Holds information extracted from the IRI.
*
* @param iriType the type of the IRI.
* @param projectCode the IRI's project code, if any.
* @param ontologyName the IRI's ontology name, if any.
* @param entityName the IRI's entity name, if any.
* @param resourceID if this is a resource IRI or value IRI, its resource ID.
* @param valueID if this is a value IRI, its value ID.
* @param standoffStartIndex if this is a standoff IRI, its start index.
* @param ontologySchema the IRI's ontology schema, or `None` if it is not a Knora definition IRI.
* @param isBuiltInDef `true` if the IRI refers to a built-in Knora ontology or ontology entity.
*/
private case class SmartIriInfo(
iriType: IriType,
projectCode: Option[String] = None,
ontologyName: Option[String] = None,
entityName: Option[String] = None,
resourceID: Option[String] = None,
valueID: Option[String] = None,
standoffStartIndex: Option[Int] = None,
ontologySchema: Option[OntologySchema],
isBuiltInDef: Boolean = false,
sharedOntology: Boolean = false
)
/**
* A cache that maps IRI strings to [[SmartIri]] instances. To keep the cache from getting too large,
* only IRIs from known ontologies are cached.
*/
private lazy val smartIriCache = new ConcurrentHashMap[IRI, SmartIri](2048)
/**
* Gets a cached smart IRI, or constructs and caches one.
*
* @param iriStr the IRI in string form.
* @param creationFun a function that creates the smart IRI to be cached.
* @return the smart IRI.
*/
private def getOrCacheSmartIri(iriStr: IRI, creationFun: () => SmartIri): SmartIri =
smartIriCache.computeIfAbsent(
iriStr,
JavaUtil.function({ _ =>
creationFun()
})
)
}
/**
* Represents a parsed IRI with Knora-specific functionality. To construct a `SmartIri`,
* `import org.knora.webapi.messages.StringFormatter.IriConversions.ConvertibleIri`, then call one of the methods that
* it implicitly defines on `String`, e.g.:
*
* - "http://knora.example.org/ontology/0000/example#Something".toSmartIri
* - "http://knora.example.org/ontology/0000/example#Something".toSmartIriWithErr(throw BadRequestException("Invalid IRI"))
*/
sealed trait SmartIri extends Ordered[SmartIri] with KnoraContentV2[SmartIri] {
/*
The smart IRI implementation, SmartIriImpl, is nested in the StringFormatter
class because it uses the Knora API server's hostname, which isn't available
until the Akka ActorSystem has started. However, this means that the type of a
SmartIriImpl instance is dependent on the instance of StringFormatter that
constructed it. Therefore, you can't compare two instances of SmartIriImpl
created by two different instances of StringFormatter.
To make it possible to compare smart IRI objects, the publicly visible smart IRI
type is the SmartIri trait. Since SmartIri is a top-level definition, two instances
of SmartIri can be compared, even if they were made by different instances of
StringFormatter. To make this work, SmartIri provides its own equals and hashCode
methods, which delegate to the string representation of the IRI.
*/
/**
* Returns this IRI as a string in angle brackets.
*/
def toSparql: String
/**
* Returns `true` if this is a Knora data or definition IRI.
*/
def isKnoraIri: Boolean
/**
* Returns `true` if this is a Knora data IRI.
*/
def isKnoraDataIri: Boolean
/**
* Returns `true` if this is a Knora resource IRI.
*/
def isKnoraResourceIri: Boolean
/**
* Returns `true` if this is a Knora value IRI.
*/
def isKnoraValueIri: Boolean
/**
* Returns `true` if this is a Knora standoff IRI.
*/
def isKnoraStandoffIri: Boolean
/**
* Returns `true` if this is a Knora ontology or entity IRI.
*/
def isKnoraDefinitionIri: Boolean
/**
* Returns `true` if this is a built-in Knora ontology or entity IRI.
*
* @return
*/
def isKnoraBuiltInDefinitionIri: Boolean
/**
* Returns `true` if this IRI belongs to a shared ontology.
*/
def isKnoraSharedDefinitionIri: Boolean
/**
* Returns `true` if this is an internal Knora ontology or entity IRI.
*
* @return
*/
def isKnoraInternalDefinitionIri: Boolean
/**
* Returns `true` if this is an internal Knora ontology entity IRI.
*/
def isKnoraInternalEntityIri: Boolean
/**
* Returns `true` if this is a Knora ontology IRI.
*/
def isKnoraOntologyIri: Boolean
/**
* Returns `true` if this is a Knora entity IRI.
*/
def isKnoraEntityIri: Boolean
/**
* Returns `true` if this is a Knora API v2 ontology or entity IRI.
*/
def isKnoraApiV2DefinitionIri: Boolean
/**
* Returns `true` if this is a Knora API v2 ontology entity IRI.
*/
def isKnoraApiV2EntityIri: Boolean
/**
* Returns the IRI's project code, if any.
*/
def getProjectCode: Option[String]
/**
* Returns the IRI's resource ID, if any.
*/
def getResourceID: Option[String]
/**
* Returns the IRI's value ID, if any.
*/
def getValueID: Option[String]
/**
* Returns the IRI's standoff start index, if any.
*/
def getStandoffStartIndex: Option[Int]
/**
* If this is an ontology entity IRI, returns its ontology IRI.
*/
def getOntologyFromEntity: SmartIri
/**
* If this is a Knora ontology or entity IRI, returns the name of the ontology. Otherwise, throws [[DataConversionException]].
*/
def getOntologyName: String
/**
* If this is a Knora entity IRI, returns the name of the entity. Otherwise, throws [[DataConversionException]].
*/
def getEntityName: String
/**
* If this is a Knora ontology IRI, constructs a Knora entity IRI based on it. Otherwise, throws [[DataConversionException]].
*
* @param entityName the name of the entity.
*/
def makeEntityIri(entityName: String): SmartIri
/**
* Returns the IRI's [[OntologySchema]], or `None` if this is not a Knora definition IRI.
*/
def getOntologySchema: Option[OntologySchema]
/**
* Checks that the IRI's ontology schema, if present, corresponds to the specified schema. If the IRI
* has no schema, does nothing. If the IRI has a schema that's different to the specified schema, calls
* `errorFun`.
*
* @param allowedSchema the schema to be allowed.
* @param errorFun a function that throws an exception. It will be called if the IRI has a different schema
* to the one specified.
* @return the same IRI
*/
def checkApiV2Schema(allowedSchema: ApiV2Schema, errorFun: => Nothing): SmartIri
/**
* Converts this IRI to another ontology schema.
*
* @param targetSchema the target schema.
*/
override def toOntologySchema(targetSchema: OntologySchema): SmartIri
/**
* Constructs a short prefix label for the ontology that the IRI belongs to.
*/
def getShortPrefixLabel: String
/**
* Constructs a longer prefix label than the one returned by `getShortPrefixLabel`, which may be needed
* if there are ontology name collisions.
*/
def getLongPrefixLabel: String
/**
* If this is the IRI of a link value property, returns the IRI of the corresponding link property. Throws
* [[DataConversionException]] if this IRI is not a Knora entity IRI.
*/
def fromLinkValuePropToLinkProp: SmartIri
/**
* If this is the IRI of a link property, returns the IRI of the corresponding link value property. Throws
* [[DataConversionException]] if this IRI is not a Knora entity IRI.
*/
def fromLinkPropToLinkValueProp: SmartIri
/**
* If this is a Knora data IRI representing a resource, returns an ARK URL for the resource. Throws
* [[DataConversionException]] if this IRI is not a Knora resource IRI.
*
* @param maybeTimestamp an optional timestamp indicating the point in the resource's version history that the ARK URL should
* cite.
*/
def fromResourceIriToArkUrl(maybeTimestamp: Option[Instant] = None): String
/**
* If this is a Knora data IRI representing a value, returns an ARK URL for the value. Throws
* [[DataConversionException]] if this IRI is not a Knora value IRI.
*
* @param maybeTimestamp an optional timestamp indicating the point in the value's version history that the ARK URL should
* cite.
*/
def fromValueIriToArkUrl(valueUUID: UUID, maybeTimestamp: Option[Instant] = None): String
override def equals(obj: scala.Any): Boolean =
// See the comment at the top of the SmartIri trait.
obj match {
case that: SmartIri => this.toString == that.toString
case _ => false
}
override def hashCode: Int = toString.hashCode
def compare(that: SmartIri): Int = toString.compare(that.toString)
}
/**
* Provides `apply` and `unapply` methods to for `SmartIri`.
*/
object SmartIri {
def apply(iriStr: IRI)(implicit stringFormatter: StringFormatter): SmartIri = stringFormatter.toSmartIri(iriStr)
def unapply(iri: SmartIri): Option[String] = Some(iri.toString)
}
/**
* Provides automatic conversion of IRI strings to [[SmartIri]] objects. See [[https://www.scala-lang.org/api/current/scala/AnyVal.html]]
* for details.
*/
object IriConversions {
implicit class ConvertibleIri(val self: IRI) extends AnyVal {
/**
* Converts an IRI string to a [[SmartIri]].
*/
def toSmartIri(implicit stringFormatter: StringFormatter): SmartIri = stringFormatter.toSmartIri(self)
/**
* Converts an IRI string to a [[SmartIri]]. If the string cannot be converted, a function is called to report
* the error. Use this function to parse IRIs from client input.
*
* @param errorFun A function that throws an exception. It will be called if the string cannot be converted.
*/
def toSmartIriWithErr(errorFun: => Nothing)(implicit stringFormatter: StringFormatter): SmartIri =
stringFormatter.toSmartIriWithErr(self, errorFun)
}
}
/**
* Handles string parsing, formatting, conversion, and validation.
*/
class StringFormatter private (
val maybeConfig: Option[AppConfig] = None,
maybeKnoraHostAndPort: Option[String] = None,
initForTest: Boolean = false
) {
import StringFormatter._
private val base64Encoder = Base64.getUrlEncoder.withoutPadding
private val base64Decoder = Base64.getUrlDecoder
// The host and port number that this Knora server is running on, and that should be used
// when constructing IRIs for project-specific ontologies.
private val knoraApiHostAndPort: Option[String] = if (initForTest) {
// Use the default host and port for automated testing.
Some("0.0.0.0:3333")
} else {
maybeConfig match {
case Some(config) => Some(config.knoraApi.externalOntologyIriHostAndPort)
case None => maybeKnoraHostAndPort
}
}
// The protocol and host that the ARK resolver is running on.
private val arkResolver: Option[String] = if (initForTest) {
Some("http://0.0.0.0:3336")
} else {
maybeConfig.map(_.ark.resolver)
}
// The DaSCH's ARK assigned number.
private val arkAssignedNumber: Option[Int] = if (initForTest) {
Some(72163)
} else {
maybeConfig.map(_.ark.assignedNumber)
}
// The hostname used in internal Knora IRIs.
private val InternalIriHostname = "www.knora.org"
// The hostname used in built-in and shared Knora API v2 IRIs.
private val CentralKnoraApiHostname = "api.knora.org"
// The strings that Knora data IRIs can start with.
private val DataIriStarts: Set[String] = Set(
"http://" + IriDomain + "/"
)
// The project code of the default shared ontologies project.
private val DefaultSharedOntologiesProjectCode = "0000"
// The beginnings of Knora definition IRIs that we know we can cache.
private val KnoraDefinitionIriStarts = (Set(
InternalIriHostname,
CentralKnoraApiHostname
) ++ knoraApiHostAndPort).map(hostname => "http://" + hostname)
// The beginnings of all definition IRIs that we know we can cache.
private val CacheableIriStarts = KnoraDefinitionIriStarts ++ Set(
OntologyConstants.Rdf.RdfPrefixExpansion,
OntologyConstants.Rdfs.RdfsPrefixExpansion,
OntologyConstants.Xsd.XsdPrefixExpansion,
OntologyConstants.Owl.OwlPrefixExpansion
)
// Reserved words used in Knora API v2 IRI version segments.
private val versionSegmentWords = Set("simple", "v2")
// Reserved words that cannot be used in project-specific ontology names.
private val reservedIriWords =
Set("knora", "ontology", "rdf", "rdfs", "owl", "xsd", "schema", "shared") ++ versionSegmentWords
// The expected format of a Knora date.
// Calendar:YYYY[-MM[-DD]][ EE][:YYYY[-MM[-DD]][ EE]]
// EE being the era: one of BC or AD
private val KnoraDateRegex: Regex = ("""^(GREGORIAN|JULIAN|ISLAMIC)""" +
CalendarSeparator + // calendar name
"""(?:[1-9][0-9]{0,3})(""" + // year
PrecisionSeparator +
"""(?!00)[0-9]{1,2}(""" + // month
PrecisionSeparator +
"""(?!00)[0-9]{1,2})?)?( BC| AD| BCE| CE)?(""" + // day
CalendarSeparator + // separator if a period is given
"""(?:[1-9][0-9]{0,3})(""" + // year 2
PrecisionSeparator +
"""(?!00)[0-9]{1,2}(""" + // month 2
PrecisionSeparator +
"""(?!00)[0-9]{1,2})?)?( BC| AD| BCE| CE)?)?$""").r // day 2
// Characters that are escaped in strings that will be used in SPARQL.
private val SparqlEscapeInput = Array(
"\\",
"\"",
"'",
"\t",
"\n"
)
// Escaped characters as they are used in SPARQL.
private val SparqlEscapeOutput = Array(
"\\\\",
"\\\"",
"\\'",
"\\t",
"\\n"
)
// A regex for matching hexadecimal color codes.
// http://stackoverflow.com/questions/1636350/how-to-identify-a-given-string-is-hex-color-format
private val ColorRegex: Regex = "^#(?:[0-9a-fA-F]{3}){1,2}$".r
// A regex sub-pattern for ontology prefix labels and local entity names. According to
// <https://www.w3.org/TR/turtle/#prefixed-name>, a prefix label in Turtle must be a valid XML NCName
// <https://www.w3.org/TR/1999/REC-xml-names-19990114/#NT-NCName>. Knora also requires a local entity name to
// be an XML NCName.
private val NCNamePattern: String =
"""[\p{L}_][\p{L}0-9_.-]*"""
// A regex for matching a string containing only an ontology prefix label or a local entity name.
private val NCNameRegex: Regex = ("^" + NCNamePattern + "$").r
// A regex sub-pattern for project IDs, which must consist of 4 hexadecimal digits.
private val ProjectIDPattern: String =
"""\p{XDigit}{4,4}"""
// A regex for matching a string containing the project ID.
private val ProjectIDRegex: Regex = ("^" + ProjectIDPattern + "$").r
// A regex for the URL path of an API v2 ontology (built-in or project-specific).
private val ApiV2OntologyUrlPathRegex: Regex = (
"^" + "/ontology/((" +
ProjectIDPattern + ")/)?(" + NCNamePattern + ")(" +
OntologyConstants.KnoraApiV2Complex.VersionSegment + "|" + OntologyConstants.KnoraApiV2Simple.VersionSegment + ")$"
).r
// The start of the IRI of a project-specific API v2 ontology that is served by this API server.
private val MaybeProjectSpecificApiV2OntologyStart: Option[String] = knoraApiHostAndPort match {
case Some(hostAndPort) => Some("http://" + hostAndPort + "/ontology/")
case None => None
}
// A regex for a project-specific XML import namespace.
private val ProjectSpecificXmlImportNamespaceRegex: Regex = (
"^" + OntologyConstants.KnoraXmlImportV1.ProjectSpecificXmlImportNamespace.XmlImportNamespaceStart +
"(shared/)?((" + ProjectIDPattern + ")/)?(" + NCNamePattern + ")" +
OntologyConstants.KnoraXmlImportV1.ProjectSpecificXmlImportNamespace.XmlImportNamespaceEnd + "$"
).r
// In XML import data, a property from another ontology is referred to as prefixLabel__localName. The prefix label
// may start with a project ID (prefixed with 'p') and a hyphen. This regex parses that pattern.
private val PropertyFromOtherOntologyInXmlImportRegex: Regex = (
"^(p(" + ProjectIDPattern + ")-)?(" + NCNamePattern + ")__(" + NCNamePattern + ")$"
).r
// In XML import data, a standoff link tag that refers to a resource described in the import must have the
// form defined by this regex.
private val StandoffLinkReferenceToClientIDForResourceRegex: Regex = (
"^ref:(" + NCNamePattern + ")$"
).r
private val ApiVersionNumberRegex: Regex = "^v[0-9]+.*$".r
// A regex for matching a string containing an email address.
private val EmailAddressRegex: Regex =
"""^.+@.+$""".r
// A regex sub-pattern matching the random IDs generated by KnoraIdUtil, which are Base64-encoded
// using the "URL and Filename safe" Base 64 alphabet, without padding, as specified in Table 2 of
// RFC 4648.
private val Base64UrlPattern = "[A-Za-z0-9_-]+"
private val Base64UrlPatternRegex: Regex = ("^" + Base64UrlPattern + "$").r
// Calculates check digits for resource IDs in ARK URLs.
private val base64UrlCheckDigit = new Base64UrlCheckDigit
// A regex that matches a Knora resource IRI.
private val ResourceIriRegex: Regex =
("^http://" + IriDomain + "/(" + ProjectIDPattern + ")/(" + Base64UrlPattern + ")$").r
// A regex that matches a Knora value IRI.
private val ValueIriRegex: Regex =
("^http://" + IriDomain + "/(" + ProjectIDPattern + ")/(" + Base64UrlPattern + ")/values/(" + Base64UrlPattern + ")$").r
// A regex that matches a Knora standoff IRI.
private val StandoffIriRegex: Regex =
("^http://" + IriDomain + "/(" + ProjectIDPattern + ")/(" + Base64UrlPattern + ")/values/(" + Base64UrlPattern + """)/standoff/(\d+)$""").r
// A regex that parses a Knora ARK timestamp.
private val ArkTimestampRegex: Regex =
"""^(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})(\d{2})(\d{1,9})?Z$""".r
// A regex that finds trailing zeroes.
private val TrailingZerosRegex: Regex =
"""0+$""".r
/**
* A regex that matches a valid username
* - 4 - 50 characters long
* - Only contains alphanumeric characters, underscore and dot.
* - Underscore and dot can't be at the end or start of a username
* - Underscore or dot can't be used multiple times in a row
*/
private val UsernameRegex: Regex =
"""^(?=.{4,50}$)(?![_.])(?!.*[_.]{2})[a-zA-Z0-9._]+(?<![_.])$""".r
/**
* The information that is stored about non-Knora IRIs.
*/
private val UnknownIriInfo = SmartIriInfo(
iriType = UnknownIriType,
projectCode = None,
ontologyName = None,
entityName = None,
ontologySchema = None
)
/**
* The implementation of [[SmartIri]]. An instance of this class can only be constructed by [[StringFormatter]].
* The constructor validates and parses the IRI.
*
* @param iriStr the IRI string to be parsed.
* @param parsedIriInfo if this smart IRI is the result of a conversion from another smart IRI, information
* about the IRI being constructed.
* @param errorFun a function that throws an exception. It will be called if the IRI is invalid.
*/
class SmartIriImpl(iriStr: IRI, parsedIriInfo: Option[SmartIriInfo], errorFun: => Nothing) extends SmartIri {
def this(iriStr: IRI) = this(iriStr, None, throw DataConversionException(s"Couldn't parse IRI: $iriStr"))
def this(iriStr: IRI, parsedIriInfo: Option[SmartIriInfo]) =
this(iriStr, parsedIriInfo, throw DataConversionException(s"Couldn't parse IRI: $iriStr"))
private val iri: IRI = validateAndEscapeIri(iriStr, errorFun)
/**
* Determines the API v2 schema of an external IRI.
*
* @param segments the segments of the namespace.
* @return the IRI's API schema.
*/
private def parseApiV2VersionSegments(segments: Vector[String]): ApiV2Schema = {
if (segments.length < 2) {
errorFun
}
val lastSegment = segments.last
val lastTwoSegments = segments.slice(segments.length - 2, segments.length)
if (lastTwoSegments == Vector("simple", "v2")) {
ApiV2Simple
} else if (lastSegment == "v2") {
ApiV2Complex
} else {
errorFun
}
}
// Extract Knora-specific information from the IRI.
private val iriInfo: SmartIriInfo = parsedIriInfo match {
case Some(info) =>
// This smart IRI is the result of a conversion from another smart IRI. Use the SmartIriInfo
// we were given.
info
case None =>
// Parse the IRI from scratch.
if (DataIriStarts.exists(startStr => iri.startsWith(startStr))) {
// This is a Knora data IRI. What sort of data IRI is it?
iri match {
case ResourceIriRegex(projectCode: String, resourceID: String) =>
// It's a resource IRI.
SmartIriInfo(
iriType = KnoraDataIri,
ontologySchema = None,
projectCode = Some(projectCode),
resourceID = Some(resourceID)
)
case ValueIriRegex(projectCode: String, resourceID: String, valueID: String) =>
// It's a value IRI.
SmartIriInfo(
iriType = KnoraDataIri,
ontologySchema = None,
projectCode = Some(projectCode),
resourceID = Some(resourceID),
valueID = Some(valueID)
)
case StandoffIriRegex(
projectCode: String,
resourceID: String,
valueID: String,
standoffStartIndex: String
) =>
// It's a standoff IRI.
SmartIriInfo(
iriType = KnoraDataIri,
ontologySchema = None,
projectCode = Some(projectCode),
resourceID = Some(resourceID),
valueID = Some(valueID),
standoffStartIndex = Some(standoffStartIndex.toInt)
)
case _ =>
// It's some other kind of data IRI; nothing else to do.
SmartIriInfo(
iriType = KnoraDataIri,
ontologySchema = None
)
}
} else if (
iri.startsWith(OntologyConstants.NamedGraphs.DataNamedGraphStart) ||
iri == OntologyConstants.NamedGraphs.KnoraExplicitNamedGraph
) {
// Nothing else to do.
SmartIriInfo(
iriType = KnoraDataIri,
ontologySchema = None
)
} else {
// If this is an entity IRI in a hash namespace, separate the entity name from the namespace.
val hashPos = iri.lastIndexOf('#')
val (namespace: String, entityName: Option[String]) = if (hashPos >= 0 && hashPos < iri.length) {
val namespace = iri.substring(0, hashPos)
val entityName = iri.substring(hashPos + 1)
// Validate the entity name as an NCName.
(namespace, Some(validateNCName(entityName, errorFun)))
} else {
(iri, None)
}
// Remove the URL scheme (http://), and split the remainder of the namespace into slash-delimited segments.
val body = namespace.substring(namespace.indexOf("//") + 2)
val segments = body.split('/').toVector
// The segments must contain at least a hostname.
if (segments.isEmpty) {
errorFun
}
// Determine the ontology schema by looking at the hostname and the version segment.
val hostname = segments.head
val (ontologySchema: Option[OntologySchema], hasProjectSpecificHostname: Boolean) = hostname match {
case InternalIriHostname => (Some(InternalSchema), false)
case CentralKnoraApiHostname => (Some(parseApiV2VersionSegments(segments)), false)
case _ =>
// If our StringFormatter instance was initialised with the Knora API server's hostname,
// use that to identify project-specific Knora API v2 IRIs.
knoraApiHostAndPort match {
case Some(hostAndPort) =>
if (hostname == hostAndPort) {
(Some(parseApiV2VersionSegments(segments)), true)
} else {
// If we don't recognise the hostname, this isn't a Knora IRI.
(None, false)
}
case None =>
// If we don't have the Knora API server's hostname (because we're using the
// StringFormatter instance for constant ontologies), we can't recognise
// project-specific Knora API v2 IRIs.
(None, false)
}
}
// If this is a Knora definition IRI, get its name and optional project code.
if (ontologySchema.nonEmpty) {
// A Knora definition IRI must start with "http://" and have "ontology" as its second segment.
if (!(iri.startsWith("http://") && segments.length >= 3 && segments(1) == "ontology")) {
errorFun
}
// Determine the length of the version segment, if any.
val versionSegmentsLength = ontologySchema match {
case Some(InternalSchema) => 0
case Some(ApiV2Complex) => 1
case Some(ApiV2Simple) => 2
case None => throw AssertionException("Unreachable code")
}
// Make a Vector containing just the optional 'shared' specification, the optional project code, and the ontology name.
val ontologyPath: Vector[String] = segments.slice(2, segments.length - versionSegmentsLength)
if (ontologyPath.isEmpty || ontologyPath.length > 3) {
errorFun
}
if (ontologyPath.exists(segment => versionSegmentWords.contains(segment))) {
errorFun
}
// Determine whether the ontology is shared, and get its project code, if any.
val (sharedOntology: Boolean, projectCode: Option[String]) = if (ontologyPath.head == "shared") {
if (ontologyPath.length == 2) {
(true, Some(DefaultSharedOntologiesProjectCode)) // default shared ontologies project
} else if (ontologyPath.length == 3) {
(true, Some(validateProjectShortcode(ontologyPath(1), errorFun))) // other shared ontologies project
} else {
errorFun
}
} else if (ontologyPath.length == 2) {
(
false,
Some(validateProjectShortcode(ontologyPath.head, errorFun))
) // non-shared ontology with project code
} else {
(false, None) // built-in ontology
}
// Extract the ontology name.
val ontologyName = ontologyPath.last
val hasBuiltInOntologyName = isBuiltInOntologyName(ontologyName)
if (!hasBuiltInOntologyName) {
validateProjectSpecificOntologyName(ontologyName, errorFun)
}