forked from DSpace/DSpace
-
Notifications
You must be signed in to change notification settings - Fork 4
/
AbstractMETSIngester.java
1532 lines (1366 loc) · 69.9 KB
/
AbstractMETSIngester.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.content.packager;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.sql.SQLException;
import java.util.Iterator;
import java.util.List;
import java.util.UUID;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.logging.log4j.Logger;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Bitstream;
import org.dspace.content.BitstreamFormat;
import org.dspace.content.Bundle;
import org.dspace.content.Collection;
import org.dspace.content.Community;
import org.dspace.content.DSpaceObject;
import org.dspace.content.InProgressSubmission;
import org.dspace.content.Item;
import org.dspace.content.WorkspaceItem;
import org.dspace.content.crosswalk.CrosswalkException;
import org.dspace.content.crosswalk.MetadataValidationException;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.BitstreamFormatService;
import org.dspace.content.service.BitstreamService;
import org.dspace.content.service.BundleService;
import org.dspace.content.service.CollectionService;
import org.dspace.content.service.CommunityService;
import org.dspace.content.service.ItemService;
import org.dspace.content.service.WorkspaceItemService;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.core.LogHelper;
import org.dspace.handle.factory.HandleServiceFactory;
import org.dspace.handle.service.HandleService;
import org.dspace.services.ConfigurationService;
import org.dspace.services.factory.DSpaceServicesFactory;
import org.dspace.workflow.WorkflowException;
import org.dspace.workflow.factory.WorkflowServiceFactory;
import org.jdom2.Element;
/**
* Base class for package ingester of METS (Metadata Encoding and Transmission
* Standard) Packages.<br>
* See <a href="http://www.loc.gov/standards/mets/">
* http://www.loc.gov/standards/mets/</a>.
* <p>
* This is a generic packager framework intended to be subclassed to create
* ingesters for more specific METS "profiles". METS is an abstract and flexible
* framework that can encompass many different kinds of metadata and inner
* package structures.
*
* <p>
* <b>Package Parameters:</b>
* <ul>
* <li><code>validate</code> -- true/false attempt to schema-validate the METS
* manifest.</li>
* <li><code>manifestOnly</code> -- package consists only of a manifest
* document.</li>
* <li><code>ignoreHandle</code> -- true/false, ignore AIP's idea of handle
* when ingesting.</li>
* <li><code>ignoreParent</code> -- true/false, ignore AIP's idea of parent
* when ingesting.</li>
* </ul>
* <p>
* <b>Configuration Properties:</b>
* <ul>
* <li><code>mets.CONFIGNAME.ingest.preserveManifest</code> - if <em>true</em>,
* the METS manifest itself is preserved in a bitstream named
* <code>mets.xml</code> in the <code>METADATA</code> bundle. If it is
* <em>false</em> (the default), the manifest is discarded after ingestion.</li>
*
* <li><code>mets.CONFIGNAME.ingest.manifestBitstreamFormat</code> - short name
* of the bitstream format to apply to the manifest; MUST be specified when
* preserveManifest is true.</li>
*
* <li><code>mets.default.ingest.crosswalk.MD_SEC_NAME</code> = PLUGIN_NAME
* Establishes a default crosswalk plugin for the given type of metadata in a
* METS mdSec (e.g. "DC", "MODS"). The plugin may be either a stream or
* XML-oriented ingestion crosswalk. Subclasses can override the default mapping
* with their own, substituting their configurationName for "default" in the
* configuration property key above.</li>
*
* <li><code>mets.CONFIGNAME.ingest.useCollectionTemplate</code> - if
* <em>true</em>, when an item is created, use the collection template. If it is
* <em>false</em> (the default), any existing collection template is ignored.</li>
* </ul>
*
* @author Larry Stone
* @author Tim Donohue
* @see org.dspace.content.packager.METSManifest
* @see AbstractPackageIngester
* @see PackageIngester
*/
public abstract class AbstractMETSIngester extends AbstractPackageIngester {
/**
* log4j category
*/
private static final Logger log = org.apache.logging.log4j.LogManager.getLogger(AbstractMETSIngester.class);
protected final BitstreamService bitstreamService = ContentServiceFactory.getInstance().getBitstreamService();
protected final BitstreamFormatService bitstreamFormatService = ContentServiceFactory.getInstance()
.getBitstreamFormatService();
protected final BundleService bundleService = ContentServiceFactory.getInstance().getBundleService();
protected final CommunityService communityService = ContentServiceFactory.getInstance().getCommunityService();
protected final CollectionService collectionService = ContentServiceFactory.getInstance().getCollectionService();
protected final ItemService itemService = ContentServiceFactory.getInstance().getItemService();
protected final HandleService handleService = HandleServiceFactory.getInstance().getHandleService();
protected final WorkspaceItemService workspaceItemService = ContentServiceFactory.getInstance()
.getWorkspaceItemService();
protected final ConfigurationService configurationService
= DSpaceServicesFactory.getInstance().getConfigurationService();
/**
* <p>
* An instance of ZipMdrefManager holds the state needed to retrieve the
* contents of an external metadata stream referenced by an
* <code>mdRef</code> element in a Zipped up METS manifest.
* <p>
* Initialize it with the Content (ORIGINAL) Bundle containing all of the
* metadata bitstreams. Match an mdRef by finding the bitstream with the
* same name.
*/
protected static final class MdrefManager implements METSManifest.Mdref {
private File packageFile = null;
private final PackageParameters params;
// constructor initializes from package file
private MdrefManager(File packageFile, PackageParameters params) {
super();
this.packageFile = packageFile;
this.params = params;
}
/**
* Make the contents of an external resource mentioned in an
* <code>mdRef</code> element available as an <code>InputStream</code>.
* See the <code>METSManifest.MdRef</code> interface for details.
*
* @param mdref the METS mdRef element to locate the input for.
* @return the input stream of its content.
* @throws MetadataValidationException if validation error
* @throws IOException if IO error
* @see METSManifest
*/
@Override
public InputStream getInputStream(Element mdref)
throws MetadataValidationException, IOException {
String path = METSManifest.getFileName(mdref);
if (packageFile == null) {
throw new MetadataValidationException(
"Failed referencing mdRef element, because there is no package specified.");
}
// Use the 'getFileInputStream()' method from the
// AbstractMETSIngester to retrieve the inputstream for the
// referenced external metadata file.
return AbstractMETSIngester.getFileInputStream(packageFile, params,
path);
}
} // end MdrefManager class
/**
* Create a new DSpace object out of a METS content package. All contents
* are dictated by the METS manifest. Package is a ZIP archive (or
* optionally bare manifest XML document). In a Zip, all files relative to
* top level and the manifest (as per spec) in mets.xml.
*
* @param context DSpace context.
* @param parent parent under which to create new object (may be null -- in
* which case ingester must determine parent from package or
* throw an error).
* @param pkgFile The package file to ingest
* @param params Properties-style list of options (interpreted by each
* packager).
* @param license may be null, which takes default license.
* @return DSpaceObject created by ingest.
* @throws PackageValidationException if package validation error
* if package is unacceptable or there is a fatal error turning
* it into a DSpaceObject.
* @throws CrosswalkException if crosswalk error
* @throws AuthorizeException if authorization error
* @throws SQLException if database error
* @throws IOException if IO error
* @throws WorkflowException if workflow error
*/
@Override
public DSpaceObject ingest(Context context, DSpaceObject parent,
File pkgFile, PackageParameters params, String license)
throws PackageValidationException, CrosswalkException,
AuthorizeException, SQLException, IOException, WorkflowException {
// parsed out METS Manifest from the file.
METSManifest manifest = null;
// new DSpace object created
DSpaceObject dso = null;
try {
log.info(LogHelper.getHeader(context, "package_parse",
"Parsing package for ingest, file=" + pkgFile.getName()));
// Parse our ingest package, extracting out the METS manifest in the
// package
manifest = parsePackage(context, pkgFile, params);
// must have a METS Manifest to ingest anything
if (manifest == null) {
throw new PackageValidationException(
"No METS Manifest found (filename="
+ METSManifest.MANIFEST_FILE
+ "). Package is unacceptable!");
}
// validate our manifest
checkManifest(manifest);
// if we are not restoring an object (i.e. we are submitting a new
// object) then, default the 'ignoreHandle' option to true (as a new
// object should get a new handle by default)
if (!params.restoreModeEnabled()
&& !params.containsKey("ignoreHandle")) { // ignore the handle in the manifest, and instead create a new
// handle
params.addProperty("ignoreHandle", "true");
}
// if we have a Parent Object, default 'ignoreParent' option to True
// (this will ignore the Parent specified in manifest)
if (parent != null && !params
.containsKey("ignoreParent")) { // ignore the parent in the manifest, and instead use the
// specified parent object
params.addProperty("ignoreParent", "true");
}
// Actually ingest the object described by the METS Manifest
dso = ingestObject(context, parent, manifest, pkgFile, params,
license);
//if ingestion was successful
if (dso != null) {
// Log whether we finished an ingest (create new obj) or a restore
// (restore previously existing obj)
String action = "package_ingest";
if (params.restoreModeEnabled()) {
action = "package_restore";
}
log.info(LogHelper.getHeader(context, action,
"Created new Object, type="
+ Constants.typeText[dso.getType()] + ", handle="
+ dso.getHandle() + ", dbID="
+ String.valueOf(dso.getID())));
// Check if the Packager is currently running recursively.
// If so, this means the Packager will attempt to recursively
// ingest all referenced child packages.
if (params.recursiveModeEnabled()) {
// Retrieve list of all Child object METS file paths from the
// current METS manifest.
// This is our list of known child packages
String[] childFilePaths = manifest.getChildMetsFilePaths();
// Save this list to our AbstractPackageIngester (and note which
// DSpaceObject the pkgs relate to).
// NOTE: The AbstractPackageIngester itself will perform the
// recursive ingest call, based on these child pkg references
for (int i = 0; i < childFilePaths.length; i++) {
addPackageReference(dso, childFilePaths[i]);
}
}
} //end if dso not null
return dso;
} catch (SQLException se) {
// no need to really clean anything up,
// transaction rollback will get rid of it anyway.
dso = null;
// Pass this exception on to the next handler.
throw se;
}
}
/**
* Parse a given input package, ultimately returning the METS manifest out
* of the package. METS manifest is assumed to be a file named 'mets.xml'
*
* @param context DSpace Context
* @param pkgFile package to parse
* @param params Ingestion parameters
* @return parsed out METSManifest
* @throws IOException if IO error
* @throws SQLException if database error
* @throws AuthorizeException if authorization error
* @throws MetadataValidationException if metadata validation error
*/
protected METSManifest parsePackage(Context context, File pkgFile,
PackageParameters params) throws IOException, SQLException,
AuthorizeException, MetadataValidationException {
// whether or not to validate the METSManifest before processing
// (default=false)
// (Even though it's preferable to validate -- it's costly and takes a
// lot of time, unless you cache schemas locally)
boolean validate = params.getBooleanProperty("validate", false);
// parsed out METS Manifest from the file.
METSManifest manifest = null;
// try to locate the METS Manifest in package
// 1. read "package" stream: it will be either bare Manifest
// or Package contents into bitstreams, depending on params:
if (params.getBooleanProperty("manifestOnly", false)) {
// parse the bare METS manifest and sanity-check it.
manifest = METSManifest.create(new FileInputStream(pkgFile),
validate, getConfigurationName());
} else {
try (ZipFile zip = new ZipFile(pkgFile)) {
// Retrieve the manifest file entry (named mets.xml)
ZipEntry manifestEntry = zip.getEntry(METSManifest.MANIFEST_FILE);
if (manifestEntry != null) {
// parse the manifest and sanity-check it.
manifest = METSManifest.create(zip.getInputStream(manifestEntry),
validate, getConfigurationName());
}
}
}
// return our parsed out METS manifest
return manifest;
}
/**
* Ingest/import a single DSpace Object, based on the associated METS
* Manifest and the parameters passed to the METSIngester
*
* @param context DSpace Context
* @param parent Parent DSpace Object
* @param manifest the parsed METS Manifest
* @param pkgFile the full package file (which may include content files if a
* zip)
* @param params Parameters passed to METSIngester
* @param license DSpace license agreement
* @return completed result as a DSpace object
* @throws IOException if IO error
* @throws SQLException if database error
* @throws AuthorizeException if authorization error
* @throws CrosswalkException if crosswalk error
* @throws MetadataValidationException if metadata validation error
* @throws WorkflowException if workflow error
* @throws PackageValidationException if package validation error
*/
protected DSpaceObject ingestObject(Context context, DSpaceObject parent,
METSManifest manifest, File pkgFile, PackageParameters params,
String license) throws IOException, SQLException,
AuthorizeException, CrosswalkException,
PackageValidationException, WorkflowException {
// type of DSpace Object (one of the type constants)
int type;
// -- Step 1 --
// Extract basic information (type, parent, handle) about DSpace object
// represented by manifest
type = getObjectType(manifest);
// if no parent passed in (or ignoreParent is false),
// attempt to determine parent DSpace object from manifest
if (type != Constants.SITE
&& (parent == null || !params.getBooleanProperty(
"ignoreParent", false))) {
try {
// get parent object from manifest
parent = getParentObject(context, manifest);
} catch (UnsupportedOperationException e) {
//If user specified to skip item ingest if any "missing parent" error message occur
if (params.getBooleanProperty("skipIfParentMissing", false)) {
//log a warning instead of throwing an error
log.warn(LogHelper.getHeader(context, "package_ingest",
"SKIPPING ingest of object '" + manifest.getObjID()
+ "' as parent DSpace Object could not be found. "
+ "If you are running a recursive ingest, it is likely this " +
"object will be created as soon as its parent is created."));
//return a null object (nothing ingested as parent was missing)
return null;
} else {
//else, throw exception upward to display to user
throw e;
}
}
}
String handle = null;
// if we are *not* ignoring the handle in manifest (i.e. ignoreHandle is
// false)
if (!params.getBooleanProperty("ignoreHandle", false)) {
// get handle from manifest
handle = getObjectHandle(manifest);
}
UUID uuid = getObjectID(manifest);
// -- Step 2 --
// Create our DSpace Object based on info parsed from manifest, and
// packager params
DSpaceObject dso;
try {
dso = PackageUtils.createDSpaceObject(context, parent,
type, handle, uuid, params);
} catch (SQLException sqle) {
throw new PackageValidationException("Exception while ingesting "
+ pkgFile.getPath(), sqle);
}
// if we are uninitialized, throw an error -- something's wrong!
if (dso == null) {
throw new PackageValidationException(
"Unable to initialize object specified by package (type='"
+ type + "', handle='" + handle + "' and parent='"
+ parent.getHandle() + "').");
}
// -- Step 3 --
// Run our Administrative metadata crosswalks!
// initialize callback object which will retrieve external inputstreams
// for any <mdRef>'s found in METS
MdrefManager callback = new MdrefManager(pkgFile, params);
// Crosswalk the sourceMD first, so that we make sure to fill in
// submitter info (and any other initial applicable info)
manifest.crosswalkObjectSourceMD(context, params, dso, callback);
// Next, crosswalk techMD, digiprovMD, rightsMD
manifest.crosswalkObjectOtherAdminMD(context, params, dso, callback);
// -- Step 4 --
// Run our Descriptive metadata (dublin core, etc) crosswalks!
crosswalkObjectDmd(context, dso, manifest, callback, manifest
.getItemDmds(), params);
// For Items, also sanity-check the metadata for minimum requirements.
if (type == Constants.ITEM) {
PackageUtils.checkItemMetadata((Item) dso);
}
// -- Step 5 --
// Add all content files as bitstreams on new DSpace Object
if (type == Constants.ITEM) {
Item item = (Item) dso;
//Check if this item is still in a user's workspace.
//It should be, as we haven't completed its install yet.
WorkspaceItem wsi = workspaceItemService.findByItem(context, item);
// Get collection this item is being submitted to
Collection collection = item.getOwningCollection();
if (collection == null) {
// Get the collection this workspace item belongs to
if (wsi != null) {
collection = wsi.getCollection();
}
}
// save manifest as a bitstream in Item if desired
if (preserveManifest()) {
addManifestBitstream(context, item, manifest);
}
// save all other bitstreams in Item
addBitstreams(context, item, manifest, pkgFile, params, callback);
// have subclass manage license since it may be extra package file.
addLicense(context, item, license, collection, params);
// Subclass hook for final checks and rearrangements
// (this allows subclasses to do some final validation / changes as
// necessary)
finishObject(context, dso, params);
// Finally, if item is still in the workspace, then we actually need
// to install it into the archive & assign its handle.
if (wsi != null) {
// Finish creating the item. This actually assigns the handle,
// and will either install item immediately or start a workflow, based on params
PackageUtils.finishCreateItem(context, wsi, handle, params);
}
} else if (type == Constants.COLLECTION || type == Constants.COMMUNITY) {
// Add logo if one is referenced from manifest
addContainerLogo(context, dso, manifest, pkgFile, params);
if (type == Constants.COLLECTION) {
//Add template item if one is referenced from manifest (only for Collections)
addTemplateItem(context, dso, manifest, pkgFile, params, callback);
}
// Subclass hook for final checks and rearrangements
// (this allows subclasses to do some final validation / changes as
// necessary)
finishObject(context, dso, params);
} else if (type == Constants.SITE) {
// Do nothing by default -- Crosswalks will handle anything necessary to ingest at Site-level
// Subclass hook for final checks and rearrangements
// (this allows subclasses to do some final validation / changes as
// necessary)
finishObject(context, dso, params);
} else {
throw new PackageValidationException(
"Unknown DSpace Object type in package, type="
+ String.valueOf(type));
}
// -- Step 6 --
// Finish things up!
// Update the object to make sure all changes are committed
PackageUtils.updateDSpaceObject(context, dso);
return dso;
}
/**
* Replace the contents of a single DSpace Object, based on the associated
* METS Manifest and the parameters passed to the METSIngester.
*
* @param context DSpace Context
* @param dso DSpace Object to replace
* @param manifest the parsed METS Manifest
* @param pkgFile the full package file (which may include content files if a
* zip)
* @param params Parameters passed to METSIngester
* @param license DSpace license agreement
* @return completed result as a DSpace object
* @throws IOException if IO error
* @throws SQLException if database error
* @throws AuthorizeException if authorization error
* @throws CrosswalkException if crosswalk error
* @throws MetadataValidationException if metadata validation error
* @throws PackageValidationException if package validation error
*/
protected DSpaceObject replaceObject(Context context, DSpaceObject dso,
METSManifest manifest, File pkgFile, PackageParameters params,
String license) throws IOException, SQLException,
AuthorizeException, CrosswalkException,
MetadataValidationException, PackageValidationException {
// -- Step 1 --
// Before going forward with the replace, let's verify these objects are
// of the same TYPE! (We don't want to go around trying to replace a
// COMMUNITY with an ITEM -- that's dangerous.)
int manifestType = getObjectType(manifest);
if (manifestType != dso.getType()) {
throw new PackageValidationException(
"The object type of the METS manifest ("
+ Constants.typeText[manifestType]
+ ") does not match up with the object type ("
+ Constants.typeText[dso.getType()]
+ ") of the DSpaceObject to be replaced!");
}
if (log.isDebugEnabled()) {
log.debug("Object to be replaced (handle=" + dso.getHandle()
+ ") is " + Constants.typeText[dso.getType()] + " id="
+ dso.getID());
}
// -- Step 2 --
// Clear out current object (as we are replacing all its contents &
// metadata)
// remove all files attached to this object
// (For communities/collections this just removes the logo bitstream)
PackageUtils.removeAllBitstreams(context, dso);
// clear out all metadata values associated with this object
PackageUtils.clearAllMetadata(context, dso);
// TODO -- We are currently NOT clearing out the following during a
// replace. So, even after a replace, the following information may be
// retained in the system:
// o Rights/Permissions in system or on objects
// o Collection item templates or Content Source info (e.g. OAI
// Harvesting collections)
// o Item status (embargo, withdrawn) or mappings to other collections
// -- Step 3 --
// Run our Administrative metadata crosswalks!
// initialize callback object which will retrieve external inputstreams
// for any <mdRef>s found in METS
MdrefManager callback = new MdrefManager(pkgFile, params);
// Crosswalk the sourceMD first, so that we make sure to fill in
// submitter info (and any other initial applicable info)
manifest.crosswalkObjectSourceMD(context, params, dso, callback);
// Next, crosswalk techMD, digiprovMD, rightsMD
manifest.crosswalkObjectOtherAdminMD(context, params, dso, callback);
// -- Step 4 --
// Add all content files as bitstreams on new DSpace Object
if (dso.getType() == Constants.ITEM) {
Item item = (Item) dso;
// save manifest as a bitstream in Item if desired
if (preserveManifest()) {
addManifestBitstream(context, item, manifest);
}
// save all other bitstreams in Item
addBitstreams(context, item, manifest, pkgFile, params, callback);
// have subclass manage license since it may be extra package file.
Collection owningCollection = (Collection) ContentServiceFactory.getInstance().getDSpaceObjectService(dso)
.getParentObject(context, dso);
if (owningCollection == null) {
//We are probably dealing with an item that isn't archived yet
InProgressSubmission inProgressSubmission = workspaceItemService.findByItem(context, item);
if (inProgressSubmission == null) {
inProgressSubmission = WorkflowServiceFactory.getInstance().getWorkflowItemService()
.findByItem(context, item);
}
owningCollection = inProgressSubmission.getCollection();
}
if (params.useCollectionTemplate()) {
itemService.populateWithTemplateItemMetadata(context, owningCollection, item);
}
addLicense(context, item, license, owningCollection
, params);
// FIXME ?
// should set lastModifiedTime e.g. when ingesting AIP.
// maybe only do it in the finishObject() callback for AIP.
} else if (dso.getType() == Constants.COLLECTION || dso.getType() == Constants.COMMUNITY) {
// Add logo if one is referenced from manifest
addContainerLogo(context, dso, manifest, pkgFile, params);
} else if (dso.getType() == Constants.SITE) {
// Do nothing -- Crosswalks will handle anything necessary to replace at Site-level
}
// -- Step 5 --
// Run our Descriptive metadata (dublin core, etc) crosswalks!
crosswalkObjectDmd(context, dso, manifest, callback, manifest
.getItemDmds(), params);
// For Items, also sanity-check the metadata for minimum requirements.
if (dso.getType() == Constants.ITEM) {
PackageUtils.checkItemMetadata((Item) dso);
}
// -- Step 6 --
// Finish things up!
// Subclass hook for final checks and rearrangements
// (this allows subclasses to do some final validation / changes as
// necessary)
finishObject(context, dso, params);
// Update the object to make sure all changes are committed
PackageUtils.updateDSpaceObject(context, dso);
return dso;
}
/**
* Add Bitstreams to an Item, based on the files listed in the METS Manifest
*
* @param context DSpace Context
* @param item DSpace Item
* @param manifest METS Manifest
* @param pkgFile the full package file (which may include content files if a
* zip)
* @param params Ingestion Parameters
* @param mdRefCallback MdrefManager storing info about mdRefs in manifest
* @throws SQLException if database error
* @throws IOException if IO error
* @throws AuthorizeException if authorization error
* @throws MetadataValidationException if metadata validation error
* @throws CrosswalkException if crosswalk error
* @throws PackageValidationException if package validation error
*/
protected void addBitstreams(Context context, Item item,
METSManifest manifest, File pkgFile, PackageParameters params,
MdrefManager mdRefCallback) throws SQLException, IOException,
AuthorizeException, MetadataValidationException,
CrosswalkException, PackageValidationException {
// Step 1 -- find the ID of the primary or Logo bitstream in manifest
String primaryID = null;
Element primaryFile = manifest.getPrimaryOrLogoBitstream();
if (primaryFile != null) {
primaryID = primaryFile.getAttributeValue("ID");
if (log.isDebugEnabled()) {
log.debug("Got primary bitstream file ID=\"" + primaryID + "\"");
}
}
// Step 2 -- find list of all content files from manifest
// Loop through these files, and add them one by one to Item
List<Element> manifestContentFiles = manifest
.getContentFiles();
List<Element> manifestBundleFiles = manifest
.getBundleFiles();
boolean setPrimaryBitstream = false;
BitstreamFormat unknownFormat = bitstreamFormatService.findUnknown(context);
for (Iterator<Element> mi = manifestContentFiles.iterator(); mi
.hasNext(); ) {
Element mfile = mi.next();
// basic validation -- check that it has an ID attribute
String mfileID = mfile.getAttributeValue("ID");
if (mfileID == null) {
throw new PackageValidationException(
"Invalid METS Manifest: file element without ID attribute.");
}
// retrieve path/name of file in manifest
String path = METSManifest.getFileName(mfile);
// extract the file input stream from package (or retrieve
// externally, if it is an externally referenced file)
InputStream fileStream = getFileInputStream(pkgFile, params, path);
// retrieve bundle name from manifest
String bundleName = METSManifest.getBundleName(mfile);
// Find or create the bundle where bitstream should be attached
Bundle bundle;
List<Bundle> bns = itemService.getBundles(item, bundleName);
if (CollectionUtils.isNotEmpty(bns)) {
bundle = bns.get(0);
} else {
bundle = bundleService.create(context, item, bundleName);
}
// Create the bitstream in the bundle & initialize its name
Bitstream bitstream = bitstreamService.create(context, bundle, fileStream);
bitstream.setName(context, path);
// Set bitstream sequence id, if known
String seqID = mfile.getAttributeValue("SEQ");
if (seqID != null && !seqID.isEmpty()) {
bitstream.setSequenceID(Integer.parseInt(seqID));
}
// crosswalk this bitstream's administrative metadata located in
// METS manifest (or referenced externally)
manifest.crosswalkBitstream(context, params, bitstream, mfileID,
mdRefCallback);
// is this the primary bitstream?
if (primaryID != null && mfileID.equals(primaryID)) {
bundle.setPrimaryBitstreamID(bitstream);
bundleService.update(context, bundle);
setPrimaryBitstream = true;
}
// Run any finishing activities -- this allows subclasses to
// change default bitstream information
finishBitstream(context, bitstream, mfile, manifest, params);
// Last-ditch attempt to divine the format, if crosswalk failed to
// set it:
// 1. attempt to guess from MIME type
// 2. if that fails, guess from "name" extension.
if (bitstream.getFormat(context).equals(unknownFormat)) {
if (log.isDebugEnabled()) {
log.debug("Guessing format of Bitstream left un-set: "
+ bitstream.toString());
}
String mimeType = mfile.getAttributeValue("MIMETYPE");
BitstreamFormat bf = (mimeType == null) ? null
: bitstreamFormatService.findByMIMEType(context, mimeType);
if (bf == null) {
bf = bitstreamFormatService.guessFormat(context, bitstream);
}
bitstreamService.setFormat(context, bitstream, bf);
}
bitstreamService.update(context, bitstream);
} // end for each manifest file
for (Iterator<Element> mi = manifestBundleFiles.iterator(); mi
.hasNext(); ) {
Element mfile = mi.next();
String bundleName = METSManifest.getBundleName(mfile, false);
Bundle bundle;
List<Bundle> bns = itemService.getBundles(item, bundleName);
if (CollectionUtils.isNotEmpty(bns)) {
bundle = bns.get(0);
} else {
bundle = bundleService.create(context, item, bundleName);
}
String mfileGrp = mfile.getAttributeValue("ADMID");
if (mfileGrp != null) {
manifest.crosswalkBundle(context, params, bundle, mfileGrp, mdRefCallback);
} else {
if (log.isDebugEnabled()) {
log.debug("Ingesting bundle with no ADMID, not crosswalking bundle metadata");
}
}
bundleService.update(context, bundle);
} // end for each manifest file
// Step 3 -- Sanity checks
// sanity check for primary bitstream
if (primaryID != null && !setPrimaryBitstream) {
log.warn("Could not find primary bitstream file ID=\"" + primaryID
+ "\" in manifest file \"" + pkgFile.getAbsolutePath()
+ "\"");
}
}
/**
* Save/Preserve the METS Manifest as a Bitstream attached to the given
* DSpace item.
*
* @param context DSpace Context
* @param item DSpace Item
* @param manifest The METS Manifest
* @throws IOException if IO error
* @throws SQLException if database error
* @throws AuthorizeException if authorization error
* @throws PackageValidationException if package validation error
*/
protected void addManifestBitstream(Context context, Item item,
METSManifest manifest) throws IOException, SQLException,
AuthorizeException, PackageValidationException {
// We'll save the METS Manifest as part of the METADATA bundle.
Bundle mdBundle = bundleService.create(context, item, Constants.METADATA_BUNDLE_NAME);
// Create a Bitstream from the METS Manifest's content
Bitstream manifestBitstream = bitstreamService.create(context, mdBundle, manifest
.getMetsAsStream());
manifestBitstream.setName(context, METSManifest.MANIFEST_FILE);
manifestBitstream.setSource(context, METSManifest.MANIFEST_FILE);
bitstreamService.update(context, manifestBitstream);
// Get magic bitstream format to identify manifest.
String fmtName = getManifestBitstreamFormat();
if (fmtName == null) {
throw new PackageValidationException(
"Configuration Error: No Manifest BitstreamFormat configured for METS ingester type="
+ getConfigurationName());
}
BitstreamFormat manifestFormat = PackageUtils
.findOrCreateBitstreamFormat(context, fmtName,
"application/xml", fmtName + " package manifest");
manifestBitstream.setFormat(context, manifestFormat);
bitstreamService.update(context, manifestBitstream);
}
/**
* Add a Logo to a Community or Collection container object based on a METS
* Manifest.
*
* @param context DSpace Context
* @param dso DSpace Container Object
* @param manifest METS Manifest
* @param pkgFile the full package file (which may include content files if a
* zip)
* @param params Ingestion Parameters
* @throws SQLException if database error
* @throws IOException if IO error
* @throws AuthorizeException if authorization error
* @throws MetadataValidationException if metadata validation error
* @throws PackageValidationException if package validation error
*/
protected void addContainerLogo(Context context, DSpaceObject dso,
METSManifest manifest, File pkgFile, PackageParameters params)
throws SQLException, IOException, AuthorizeException,
MetadataValidationException, PackageValidationException {
Element logoRef = manifest.getPrimaryOrLogoBitstream();
// only continue if a logo specified in manifest
if (logoRef != null) {
// Find ID of logo file
String logoID = logoRef.getAttributeValue("ID");
// Loop through manifest content files to find actual logo file
for (Iterator<Element> mi = manifest
.getContentFiles().iterator(); mi.hasNext(); ) {
Element mfile = mi.next();
if (logoID.equals(mfile.getAttributeValue("ID"))) {
String path = METSManifest.getFileName(mfile);
// extract the file input stream from package (or retrieve
// externally, if it is an externally referenced file)
InputStream fileStream = getFileInputStream(pkgFile,
params, path);
// Add this logo to the Community/Collection
if (dso.getType() == Constants.COLLECTION) {
collectionService.setLogo(context, ((Collection) dso), fileStream);
} else {
communityService.setLogo(context, ((Community) dso), fileStream);
}
break;
}
} // end for each file in manifest
} // end if logo reference found
}
/**
* Add a Template Item to a Collection container object based on a METS
* Manifest.
*
* @param context DSpace Context
* @param dso DSpace Container Object
* @param manifest METS Manifest
* @param pkgFile the full package file (which may include content files if a
* zip)
* @param params Ingestion Parameters
* @param callback the MdrefManager (manages all external metadata files
* referenced by METS <code>mdref</code> elements)
* @throws SQLException if database error
* @throws IOException if IO error
* @throws AuthorizeException if authorization error
* @throws MetadataValidationException if metadata validation error
* @throws PackageValidationException if package validation error
*/
protected void addTemplateItem(Context context, DSpaceObject dso,
METSManifest manifest, File pkgFile, PackageParameters params,
MdrefManager callback)
throws SQLException, IOException, AuthorizeException,
CrosswalkException, PackageValidationException {
//Template items only valid for collections
if (dso.getType() != Constants.COLLECTION) {
return;
}
Collection collection = (Collection) dso;
//retrieve list of all <div>s representing child objects from manifest
List childObjList = manifest.getChildObjDivs();
if (childObjList != null && !childObjList.isEmpty()) {
Element templateItemDiv = null;
Iterator childIterator = childObjList.iterator();
//Search for the child with a type of "DSpace ITEM Template"
while (childIterator.hasNext()) {
Element childDiv = (Element) childIterator.next();
String childType = childDiv.getAttributeValue("TYPE");
//should be the only child of type "ITEM" with "Template" for a suffix
if (childType.contains(Constants.typeText[Constants.ITEM]) &&
childType.endsWith(AbstractMETSDisseminator.TEMPLATE_TYPE_SUFFIX)) {
templateItemDiv = childDiv;
break;
}
}
//If an Template Item was found, create it with the specified metadata
if (templateItemDiv != null) {
//make sure this templateItemDiv is associated with one or more dmdSecs
String templateDmdIds = templateItemDiv.getAttributeValue("DMDID");
if (templateDmdIds != null) {
//create our template item & get a reference to it
itemService.createTemplateItem(context, collection);
Item templateItem = collection.getTemplateItem();
//get a reference to the dmdSecs which describe the metadata for this template item
Element[] templateDmds = manifest.getDmdElements(templateDmdIds);
// Run our Descriptive metadata (dublin core, etc) crosswalks to add metadata to template item
crosswalkObjectDmd(context, templateItem, manifest, callback, templateDmds, params);
// update the template item to save metadata changes
PackageUtils.updateDSpaceObject(context, templateItem);
}
}
}
}
/**
* Replace an existing DSpace object with the contents of a METS-based
* package. All contents are dictated by the METS manifest. Package is a ZIP
* archive (or optionally bare manifest XML document). In a Zip, all files
* relative to top level and the manifest (as per spec) in mets.xml.