|
43 | 43 | from invenio_files_rest.errors import MultipartMissingParts |
44 | 44 | from invenio_files_rest.models import Bucket, FileInstance, ObjectVersion |
45 | 45 | from invenio_jsonschemas.errors import JSONSchemaNotFound |
| 46 | +from invenio_pidstore.errors import PIDDoesNotExistError |
46 | 47 | from invenio_records.models import RecordMetadata |
47 | 48 | from invenio_records_files.models import RecordsBuckets |
48 | 49 | from invenio_rest.errors import FieldError |
| 50 | + |
| 51 | +from invenio_sipstore.api import RecordSIP, SIP as SIPApi |
| 52 | +from invenio_sipstore.archivers import BagItArchiver |
| 53 | +from invenio_sipstore.models import SIP as SIPModel, \ |
| 54 | + RecordSIP as RecordSIPModel |
| 55 | + |
49 | 56 | from jsonschema.validators import Draft4Validator, RefResolutionError |
50 | 57 | from sqlalchemy.exc import IntegrityError |
51 | 58 | from sqlalchemy.orm.exc import NoResultFound |
|
59 | 66 | from cap.modules.user.utils import (get_existing_or_register_role, |
60 | 67 | get_existing_or_register_user) |
61 | 68 |
|
62 | | -from .errors import (DepositValidationError, FileUploadError, |
| 69 | +from .errors import (ArchivingError, DepositValidationError, FileUploadError, |
63 | 70 | UpdateDepositPermissionsError) |
64 | 71 | from .fetchers import cap_deposit_fetcher |
65 | 72 | from .minters import cap_deposit_minter |
@@ -190,14 +197,59 @@ def permissions(self, pid=None): |
190 | 197 | return self.edit_permissions(data) |
191 | 198 |
|
192 | 199 | @mark_as_action |
193 | | - def publish(self, *args, **kwargs): |
| 200 | + def publish(self, sip_agent=None, *args, **kwargs): |
194 | 201 | """Simple file check before publishing.""" |
195 | 202 | with AdminDepositPermission(self).require(403): |
196 | 203 | for file_ in self.files: |
197 | 204 | if file_.data['checksum'] is None: |
198 | 205 | raise MultipartMissingParts() |
199 | 206 |
|
200 | | - return super(CAPDeposit, self).publish(*args, **kwargs) |
| 207 | + try: |
| 208 | + _, last_record = self.fetch_published() |
| 209 | + is_first_publishing = False |
| 210 | + fetched_files = last_record.files |
| 211 | + create_sip_files = not compare_files(fetched_files, self.files) |
| 212 | + except (PIDDoesNotExistError, KeyError): |
| 213 | + is_first_publishing = True |
| 214 | + create_sip_files = True if self.files else False |
| 215 | + |
| 216 | + deposit = super(CAPDeposit, self).publish(*args, **kwargs) |
| 217 | + recid, record = deposit.fetch_published() |
| 218 | + sip_patch_of = None |
| 219 | + if not is_first_publishing: |
| 220 | + sip_recid = recid |
| 221 | + |
| 222 | + sip_patch_of = ( |
| 223 | + db.session.query(SIPModel) |
| 224 | + .join(RecordSIPModel, RecordSIPModel.sip_id == SIPModel.id) |
| 225 | + .filter(RecordSIPModel.pid_id == sip_recid.id) |
| 226 | + .order_by(SIPModel.created.desc()) |
| 227 | + .first() |
| 228 | + ) |
| 229 | + |
| 230 | + recordsip = RecordSIP.create( |
| 231 | + recid, record, archivable=True, |
| 232 | + create_sip_files=create_sip_files, |
| 233 | + sip_metadata_type='json', |
| 234 | + user_id=current_user.id, |
| 235 | + agent=sip_agent) |
| 236 | + |
| 237 | + archiver = BagItArchiver( |
| 238 | + recordsip.sip, include_all_previous=(not is_first_publishing), |
| 239 | + patch_of=sip_patch_of) |
| 240 | + |
| 241 | + archiver.save_bagit_metadata() |
| 242 | + |
| 243 | + sip = ( |
| 244 | + RecordSIPModel.query |
| 245 | + .filter_by(pid_id=recid.id) |
| 246 | + .order_by(RecordSIPModel.created.desc()) |
| 247 | + .first().sip |
| 248 | + ) |
| 249 | + |
| 250 | + archive_sip.delay(str(sip.id)) |
| 251 | + |
| 252 | + return deposit |
201 | 253 |
|
202 | 254 | @mark_as_action |
203 | 255 | def upload(self, pid=None, *args, **kwargs): |
@@ -559,6 +611,21 @@ def _validate_data(cls, data): |
559 | 611 | .format(schema_fullpath)) |
560 | 612 |
|
561 | 613 |
|
| 614 | +def compare_files(files1, files2): |
| 615 | + """Compare file lists.""" |
| 616 | + if files1 is None or files2 is None: |
| 617 | + return False |
| 618 | + if len(files1) != len(files2): |
| 619 | + return False |
| 620 | + |
| 621 | + checksums = (f['checksum'] for f in files2) |
| 622 | + for f in files1: |
| 623 | + if f['checksum'] not in checksums: |
| 624 | + return False |
| 625 | + |
| 626 | + return True |
| 627 | + |
| 628 | + |
562 | 629 | @shared_task(max_retries=5) |
563 | 630 | def download_url(pid, url, fileinfo): |
564 | 631 | """Task for fetching external files/repos.""" |
@@ -601,6 +668,36 @@ def download_repo(pid, url, filename): |
601 | 668 | task_commit(record, response.raw, filename, total) |
602 | 669 |
|
603 | 670 |
|
| 671 | +@shared_task(ignore_result=True, max_retries=6, |
| 672 | + default_retry_delay=4 * 60 * 60) |
| 673 | +def archive_sip(sip_uuid): |
| 674 | + """Send the SIP for archiving. |
| 675 | +
|
| 676 | + Retries every 4 hours, six times, which should work for up to 24 hours |
| 677 | + archiving system downtime. |
| 678 | + :param sip_uuid: UUID of the SIP for archiving. |
| 679 | + :type sip_uuid: str |
| 680 | + """ |
| 681 | + try: |
| 682 | + sip = SIPApi(SIPModel.query.get(sip_uuid)) |
| 683 | + archiver = BagItArchiver(sip) |
| 684 | + bagmeta = archiver.get_bagit_metadata(sip) |
| 685 | + if bagmeta is None: |
| 686 | + raise ArchivingError( |
| 687 | + 'Bagit metadata does not exist for SIP: {0}.'.format(sip.id)) |
| 688 | + if sip.archived: |
| 689 | + raise ArchivingError( |
| 690 | + 'SIP was already archived {0}.'.format(sip.id)) |
| 691 | + archiver.write_all_files() |
| 692 | + sip.archived = True |
| 693 | + db.session.commit() |
| 694 | + except Exception as exc: |
| 695 | + # On ArchivingError (see above), do not retry, but re-raise |
| 696 | + if not isinstance(exc, ArchivingError): |
| 697 | + archive_sip.retry(exc=exc) |
| 698 | + raise |
| 699 | + |
| 700 | + |
604 | 701 | def task_commit(record, response, filename, total): |
605 | 702 | """Commit file to the record.""" |
606 | 703 | record.files[filename].file.set_contents( |
|
0 commit comments