Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv
- We fixed the fallback window height (786 → 768) in JabRefGUI. [#14295](https://github.com/JabRef/jabref/pull/14295)
- We fixed localization of the "New Entries" dialog. [#14455](https://github.com/JabRef/jabref/pull/14455)
- We fixed an issue where keybindings could not be edited and saved. [#14237](https://github.com/JabRef/jabref/issues/14237)
- We fixed an issue of cleaning of wrongly encoded DOIs. [#14704](https://github.com/JabRef/jabref/pull/14704)
- We readded the missing gui commands for importing and exporting preferences. [#14492](https://github.com/JabRef/jabref/pull/14492)
- We fixed the keyboard navigation in the entry editor: pressing <kbd>Shift</kbd> + <kbd>Tab</kbd> on the first field now correctly focuses the last field of the previous tab. [#14513](https://github.com/JabRef/jabref/issues/14513)
- We fixed a crash when importing preferences from older JabRef versions. [#14497](https://github.com/JabRef/jabref/issues/14497)
Expand Down
108 changes: 58 additions & 50 deletions jablib/src/main/java/org/jabref/logic/cleanup/DoiCleanup.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicBoolean;

import org.jabref.logic.formatter.bibtexfields.ClearFormatter;
import org.jabref.model.FieldChange;
Expand All @@ -16,70 +16,78 @@
import org.jabref.model.entry.identifier.ArXivIdentifier;
import org.jabref.model.entry.identifier.DOI;

/**
* Formats the DOI (e.g. removes http part) and also infers DOIs from the note, url, eprint or ee fields.
*/
/// Formats the DOI (e.g. removes http part) and also infers DOIs from the note, url, eprint or ee fields.
public class DoiCleanup implements CleanupJob {

/**
* Fields to check for DOIs.
*/
private static final List<Field> FIELDS = Arrays.asList(StandardField.NOTE, StandardField.URL, StandardField.EPRINT,
new UnknownField("ee"));
/// Fields to check for DOIs.
private static final List<Field> FIELDS = List.of(
StandardField.NOTE,
StandardField.URL,
StandardField.EPRINT,
new UnknownField("ee")
);

@Override
public List<FieldChange> cleanup(BibEntry entry) {
List<FieldChange> changes = new ArrayList<>();

// First check if the Doi Field is empty
if (entry.hasField(StandardField.DOI)) {
String doiFieldValue = entry.getField(StandardField.DOI).orElse(null);
AtomicBoolean validDoiExistsInDoiField = new AtomicBoolean(false);

String decodeDoiFieldValue = "";
decodeDoiFieldValue = URLDecoder.decode(doiFieldValue, StandardCharsets.UTF_8);
doiFieldValue = decodeDoiFieldValue;
entry.getField(StandardField.DOI)
.ifPresent(currentlyStoredDoi -> {
String decodedDoiFieldValue;
try {
decodedDoiFieldValue = URLDecoder.decode(currentlyStoredDoi, StandardCharsets.UTF_8);
} catch (IllegalArgumentException e) {
// If decoding fails, we keep the original value
decodedDoiFieldValue = currentlyStoredDoi;
}

Optional<DOI> doi = DOI.parse(doiFieldValue);
String cleanCurrentlyStoredDoi = decodedDoiFieldValue;

if (doi.isPresent()) {
String newValue = doi.get().asString();
if (!doiFieldValue.equals(newValue)) {
entry.setField(StandardField.DOI, newValue);
DOI.parse(cleanCurrentlyStoredDoi)
.map(DOI::asString)
.ifPresent(parsedDoi -> {
validDoiExistsInDoiField.set(true);
if (!parsedDoi.equals(cleanCurrentlyStoredDoi)) {
entry.setField(StandardField.DOI, parsedDoi);

FieldChange change = new FieldChange(entry, StandardField.DOI, doiFieldValue, newValue);
changes.add(change);
}
FieldChange change = new FieldChange(entry, StandardField.DOI, currentlyStoredDoi, parsedDoi);
changes.add(change);
}

// Doi field seems to contain Doi -> cleanup note, url, ee field
for (Field field : FIELDS) {
entry.getField(field).flatMap(DOI::parse)
.ifPresent(unused -> removeFieldValue(entry, field, changes));
}
}
} else {
// As the Doi field is empty we now check if note, url, or ee field contains a Doi
for (Field field : FIELDS) {
Optional<String> fieldContentOpt = entry.getField(field);
// Doi field seems to contain Doi -> cleanup note, url, ee field
for (Field field : FIELDS) {
entry.getField(field)
.flatMap(DOI::parse) // only returns something if **complete** field is a DOI
.ifPresent(_ -> removeFieldValue(entry, field, changes));
}
});
});

Optional<DOI> doi = fieldContentOpt.flatMap(DOI::parse);

if (doi.isPresent()) {
// Update Doi
Optional<FieldChange> change = entry.setField(StandardField.DOI, doi.get().asString());
change.ifPresent(changes::add);
removeFieldValue(entry, field, changes);
}
for (Field field : FIELDS) {
entry.getField(field)
.flatMap(DOI::parse) // covers a full DOI only
.ifPresent(doi -> {
if (!validDoiExistsInDoiField.get()) {
Optional<FieldChange> change = entry.setField(StandardField.DOI, doi.asString());
change.ifPresent(changes::add);
}
removeFieldValue(entry, field, changes);
});
}

if (StandardField.EPRINT == field) {
fieldContentOpt.flatMap(ArXivIdentifier::parse)
.flatMap(ArXivIdentifier::inferDOI)
.ifPresent(inferredDoi -> {
Optional<FieldChange> change = entry.setField(StandardField.DOI, inferredDoi.asString());
change.ifPresent(changes::add);
});
}
}
if (!validDoiExistsInDoiField.get()) {
// Try to infer DOI from arXiv ID in eprint field and set it if DOI field is empty
entry.getField(StandardField.EPRINT)
.flatMap(ArXivIdentifier::parse)
.flatMap(ArXivIdentifier::inferDOI)
.ifPresent(inferredDoi -> {
Optional<FieldChange> change = entry.setField(StandardField.DOI, inferredDoi.asString());
change.ifPresent(changes::add);
});
}

return changes;
}

Expand Down
24 changes: 21 additions & 3 deletions jablib/src/test/java/org/jabref/logic/cleanup/DoiCleanupTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@
class DoiCleanupTest {

@ParameterizedTest
@MethodSource("provideDoiForAllLowers")
@MethodSource
void changeDoi(BibEntry expected, BibEntry doiInfoField) {
DoiCleanup cleanUp = new DoiCleanup();
cleanUp.cleanup(doiInfoField);

assertEquals(expected, doiInfoField);
}

private static Stream<Arguments> provideDoiForAllLowers() {
private static Stream<Arguments> changeDoi() {
UnknownField unknownField = new UnknownField("ee");
BibEntry doiResult = new BibEntry().withField(StandardField.DOI, "10.1145/2594455");

Expand Down Expand Up @@ -66,6 +66,24 @@ private static Stream<Arguments> provideDoiForAllLowers() {
Arguments.of(new BibEntry()
.withField(StandardField.DOI, "10.18726/2018_3"),
new BibEntry()
.withField(unknownField, "https://doi.org/10.18726/2018%7B%5Ctextunderscore%7D3")));
.withField(unknownField, "https://doi.org/10.18726/2018%7B%5Ctextunderscore%7D3")),

// cleanup of wrong percent encoded chars
Arguments.of(new BibEntry()
.withField(StandardField.NOTE, "Some strange %% LaTeX %% text"),
new BibEntry()
.withField(StandardField.NOTE, "Some strange %% LaTeX %% text")),

// cleanup of correct percent encoded chars
Arguments.of(new BibEntry()
.withField(StandardField.DOI, "10.18726/2018_3"),
new BibEntry()
.withField(StandardField.NOTE, "10.18726/2018%7B%5Ctextunderscore%7D3")),

// cleanup of wrong percent encoded chars in DOI field
Arguments.of(new BibEntry()
.withField(StandardField.NOTE, "10.18726/2018%7B%%5Ctextunderscore%7D3"),
new BibEntry()
.withField(StandardField.NOTE, "10.18726/2018%7B%%5Ctextunderscore%7D3")));
}
}
Loading