Skip to content

Commit fe0c62f

Browse files
authored
Merge pull request #12081 from GlobalDataverseCommunityConsortium/DANS-2139-reExportAll
Merging. I'm hacking this part of the code heavily in my never-ending exports pr, but a conflict is minimal. reExportAll Improvements
2 parents 0d165f6 + 4162bae commit fe0c62f

File tree

3 files changed

+51
-23
lines changed

3 files changed

+51
-23
lines changed

doc/sphinx-guides/source/admin/metadataexport.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,15 @@ In addition to the automated exports, a Dataverse installation admin can start a
2222

2323
``curl http://localhost:8080/api/admin/metadata/reExportAll``
2424

25+
``curl http://localhost:8080/api/admin/metadata/reExportAll?olderThan=<YYYY-MM-DD>``
26+
2527
``curl http://localhost:8080/api/admin/metadata/clearExportTimestamps``
2628

2729
``curl http://localhost:8080/api/admin/metadata/:persistentId/reExportDataset?persistentId=doi:10.5072/FK2/AAA000``
2830

2931
The first will attempt to export all the published, local (non-harvested) datasets that haven't been exported yet.
30-
The second will *force* a re-export of every published, local dataset, regardless of whether it has already been exported or not.
32+
The second will *force* a re-export of every published, local dataset, regardless of whether it has already been exported or not.
33+
With the optional olderThan query parameter, the second will *force* re-export of all published, local datasets that were last exported before the olderThan date.
3134

3235
The first two calls return a status message informing the administrator that the process has been launched (``{"status":"WORKFLOW_IN_PROGRESS"}``). The administrator can check the progress of the process via log files: ``[Payara directory]/glassfish/domains/domain1/logs/export_[time stamp].log``.
3336

src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,13 @@ public void reExportAllAsync() {
698698
exportAllDatasets(true);
699699
}
700700

701+
// reExportAll with a date *forces* a reexport on all published datasets that were not exported or were exported before the date;
702+
@Asynchronous
703+
public void reExportAllAsync(Date reExportDate) {
704+
exportAllDatasets(true, reExportDate);
705+
706+
}
707+
701708
public void reExportAll() {
702709
exportAllDatasets(true);
703710
}
@@ -715,30 +722,27 @@ public void exportAll() {
715722
exportAllDatasets(false);
716723
}
717724

718-
public void exportAllDatasets(boolean forceReExport) {
725+
private void exportAllDatasets(boolean b) {
726+
exportAllDatasets(b, null);
727+
}
728+
729+
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
730+
private void exportAllDatasets(boolean forceReExport, Date reExportDate) {
719731
Integer countAll = 0;
720732
Integer countSuccess = 0;
721733
Integer countError = 0;
722734
String logTimestamp = logFormatter.format(new Date());
723735
Logger exportLogger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.DatasetServiceBean." + "ExportAll" + logTimestamp);
724736
String logFileName = System.getProperty("com.sun.aas.instanceRoot") + File.separator + "logs" + File.separator + "export_" + logTimestamp + ".log";
725737
FileHandler fileHandler;
726-
boolean fileHandlerSuceeded;
727738
try {
728739
fileHandler = new FileHandler(logFileName);
729740
exportLogger.setUseParentHandlers(false);
730-
fileHandlerSuceeded = true;
741+
exportLogger.addHandler(fileHandler);
731742
} catch (IOException | SecurityException ex) {
732743
Logger.getLogger(DatasetServiceBean.class.getName()).log(Level.SEVERE, null, ex);
733744
return;
734745
}
735-
736-
if (fileHandlerSuceeded) {
737-
exportLogger.addHandler(fileHandler);
738-
} else {
739-
exportLogger = logger;
740-
}
741-
742746
exportLogger.info("Starting an export all job");
743747

744748
for (Long datasetId : findAllLocalDatasetIds()) {
@@ -757,9 +761,17 @@ public void exportAllDatasets(boolean forceReExport) {
757761

758762
// can't trust dataset.getPublicationDate(), no.
759763
Date publicationDate = dataset.getReleasedVersion().getReleaseTime(); // we know this dataset has a non-null released version! Maybe not - SEK 8/19 (We do now! :)
760-
if (forceReExport || (publicationDate != null
761-
&& (dataset.getLastExportTime() == null
762-
|| dataset.getLastExportTime().before(publicationDate)))) {
764+
/**
765+
* Three cases: force is true and no date given - reexport every dataset force
766+
* is true and reExport date given - reexport datasets last exported before that
767+
* date force is false, reExportDate ignored - reexport datasets last exported
768+
* before they were last published
769+
*/
770+
if ((forceReExport && reExportDate == null)
771+
|| (forceReExport && (dataset.getLastExportTime() == null || dataset.getLastExportTime().before(reExportDate)))
772+
|| (forceReExport == false
773+
&& (publicationDate != null && (dataset.getLastExportTime() == null
774+
|| dataset.getLastExportTime().before(publicationDate))))) {
763775
countAll++;
764776
try {
765777
recordService.exportAllFormatsInNewTransaction(dataset);
@@ -768,6 +780,13 @@ public void exportAllDatasets(boolean forceReExport) {
768780
} catch (Exception ex) {
769781
exportLogger.log(Level.INFO, "Error exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalId().asString() + "; " + ex.getMessage(), ex);
770782
countError++;
783+
} catch (Throwable t) {
784+
exportLogger.log(Level.SEVERE, "Fatal error exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalId().asString() + "; " + t.getClass().getName() + ": " + t.getMessage(), t);
785+
exportLogger.info("Datasets processed before fatal error: " + countAll.toString());
786+
exportLogger.info("Datasets exported successfully: " + countSuccess.toString());
787+
exportLogger.info("Datasets failures: " + countError.toString());
788+
fileHandler.close();
789+
throw t;
771790
}
772791
}
773792
}
@@ -778,10 +797,7 @@ public void exportAllDatasets(boolean forceReExport) {
778797
exportLogger.info("Datasets failures: " + countError.toString());
779798
exportLogger.info("Finished export-all job.");
780799

781-
if (fileHandlerSuceeded) {
782-
fileHandler.close();
783-
}
784-
800+
fileHandler.close();
785801
}
786802

787803
@Asynchronous
@@ -1140,4 +1156,5 @@ public void saveStorageQuota(Dataset target, Long allocation) {
11401156
}
11411157
em.flush();
11421158
}
1159+
11431160
}

src/main/java/edu/harvard/iq/dataverse/api/Metadata.java

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,9 @@
88
import edu.harvard.iq.dataverse.Dataset;
99
import edu.harvard.iq.dataverse.DatasetServiceBean;
1010

11+
import java.util.Date;
1112
import java.util.logging.Logger;
1213
import jakarta.ejb.EJB;
13-
import jakarta.json.Json;
14-
import jakarta.json.JsonArrayBuilder;
15-
import jakarta.json.JsonObjectBuilder;
1614
import jakarta.ws.rs.*;
1715

1816
import jakarta.ws.rs.core.Response;
@@ -57,8 +55,18 @@ public Response exportAll() {
5755
@GET
5856
@Path("/reExportAll")
5957
@Produces("application/json")
60-
public Response reExportAll() {
61-
datasetService.reExportAllAsync();
58+
public Response reExportAll(@QueryParam(value = "olderThan") String olderThan) {
59+
Date reExportDate = null;
60+
if (olderThan != null && !olderThan.isEmpty()) {
61+
try {
62+
java.text.SimpleDateFormat dateFormat = new java.text.SimpleDateFormat("yyyy-MM-dd");
63+
dateFormat.setLenient(false);
64+
reExportDate = dateFormat.parse(olderThan);
65+
} catch (java.text.ParseException e) {
66+
return error(Response.Status.BAD_REQUEST, "Invalid date format for olderThan parameter. Expected format: YYYY-MM-DD");
67+
}
68+
}
69+
datasetService.reExportAllAsync(reExportDate);
6270
return this.accepted();
6371
}
6472

0 commit comments

Comments
 (0)