Skip to content
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@
<dependency>
<groupId>au.org.aodn</groupId>
<artifactId>stacmodel</artifactId>
<version>0.0.59</version>
<version>0.0.60</version>
</dependency>
</dependencies>
</dependencyManagement>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,17 +173,6 @@ public enum CQLFields implements CQLFieldsInterface {
StacBasicField.Links.displayField,
null,
null),
links_title_contains(
StacBasicField.LinksTitle.searchField,
StacBasicField.LinksTitle.displayField,
(literal) -> NestedQuery.of(m -> m
.path(StacBasicField.Links.searchField)
.query(q -> q
.matchPhrase(mp -> mp
.field(StacBasicField.LinksTitle.searchField)
.query(literal))))
._toQuery(),
null),
links_airole_contains(
StacBasicField.LinksAiRole.searchField,
StacBasicField.LinksAiRole.displayField,
Expand All @@ -196,12 +185,12 @@ public enum CQLFields implements CQLFieldsInterface {
._toQuery(),
null),
credit_contains(
StacSummeries.Credits.searchField,
StacSummeries.Credits.displayField,
(literal) -> MatchPhraseQuery.of(m -> m
.field(StacSummeries.Credits.searchField)
.query(literal))._toQuery(),
null),
StacSummeries.Credits.searchField,
StacSummeries.Credits.displayField,
(literal) -> MatchQuery.of(m -> m// We want the words exact so need to add space in front and end
.field(StacSummeries.Credits.searchField)
.query(literal))._toQuery(),
null),
status(
StacSummeries.Status.searchField,
StacSummeries.Status.displayField,
Expand Down Expand Up @@ -249,6 +238,24 @@ public enum CQLFields implements CQLFieldsInterface {
.operator(Operator.And)// ensure all terms are matched with fuzziness
.query(literal))._toQuery(),
null),
// Acronym match on the synonyms sub-fields (search-time expansion), e.g. "SOOP" -> "ships of opportunity".
acronym_title(
StacBasicField.Title.searchField + ".synonyms",
StacBasicField.Title.displayField,
(literal) -> MatchQuery.of(m -> m
.field(StacBasicField.Title.searchField + ".synonyms")
.operator(Operator.And)// all expanded terms must match
.boost(2.0F)// align with fuzzy_title weighting
.query(literal))._toQuery(),
null),
acronym_desc(
StacBasicField.Description.searchField + ".synonyms",
StacBasicField.Description.displayField,
(literal) -> MatchQuery.of(m -> m
.field(StacBasicField.Description.searchField + ".synonyms")
.operator(Operator.And)
.query(literal))._toQuery(),
null),
// Contains cloud-optimized data
assets_summary(
StacBasicField.AssetsSummary.searchField,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -306,9 +306,10 @@ protected Supplier<SearchRequest.Builder> buildParameterSearchRequestSupplier(
should.add(CQLFields.organisation_vocabs.getPropertyEqualToQuery(term));
should.add(CQLFields.platform_vocabs.getPropertyEqualToQuery(term));
should.add(CQLFields.id.getPropertyEqualToQuery(term));
should.add(BoolQuery.of(b -> b
.should(CQLFields.links_title_contains.getPropertyEqualToQuery(term))
.boost(0.5f))._toQuery());
// Acronym match on the *.synonyms sub-fields, e.g. "SOOP" -> "ships of opportunity".
should.add(CQLFields.acronym_title.getPropertyEqualToQuery(term));
should.add(CQLFields.acronym_desc.getPropertyEqualToQuery(term));
// credit_contains uses match query by default, exact match is not applied here
should.add(CQLFields.credit_contains.getPropertyEqualToQuery(term));
}
}
Expand Down Expand Up @@ -411,15 +412,10 @@ public ElasticSearchBase.SearchResult<StacCollectionModel> searchByParameters(Li
should.add(CQLFields.organisation_vocabs.getPropertyEqualToQuery(term));
should.add(CQLFields.platform_vocabs.getPropertyEqualToQuery(term));
should.add(CQLFields.id.getPropertyEqualToQuery(term));
// A request to not using acronym in title and description in metadata, hence these
// acronym moved to links, for example NRMN record is mentioned in the link title.
// This is a work-around to the requirement but still allow use of NRMN
// links_title_contains and credit_contains use match query by default, exact match is not applied here
// links_title_contains weighted lower as it may contain combined title+description content
should.add(BoolQuery.of(b -> b
.should(CQLFields.links_title_contains.getPropertyEqualToQuery(term))
.boost(0.5f) // lower boost to reduce promotion of link-title-only matches
)._toQuery());
// Acronym match on the *.synonyms sub-fields, e.g. "SOOP" -> "ships of opportunity".
should.add(CQLFields.acronym_title.getPropertyEqualToQuery(term));
should.add(CQLFields.acronym_desc.getPropertyEqualToQuery(term));
// credit_contains uses match query by default, exact match is not applied here
should.add(CQLFields.credit_contains.getPropertyEqualToQuery(term));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,27 +128,6 @@ public void verifyApiCollectionsQueryOnText2() throws IOException {
collections.getBody().getCollections().get(1).getId(),
"Correct UUID - 9fdb1eee-bc28-43a9-88c5-972324784837");
}
/**
* Acronym is not encourage to use in title or description, so NRMN record is not found, the acronym usually
* appears in links title, this test is make sure NRMN record is found from link as well.
* @throws IOException - IO Exception
*/
@Test
public void verifyApiCollectionsQueryOnText3() throws IOException {
super.insertJsonToElasticRecordIndex(
// This is NRMN record where word NRMN not in title/desc but links
"8cdcdcad-399b-4bed-8cb2-29c486b6b124.json",
"7709f541-fc0c-4318-b5b9-9053aa474e0e.json"
);

// Call rest api directly and get query result
ResponseEntity<ExtendedCollections> collections = testRestTemplate.getForEntity(getBasePath() + "/collections?q=NRMN", ExtendedCollections.class);
assertEquals(1, Objects.requireNonNull(collections.getBody()).getTotal(), "Only 1 hit");
assertEquals(
"8cdcdcad-399b-4bed-8cb2-29c486b6b124",
collections.getBody().getCollections().get(0).getId(),
"Correct UUID - 8cdcdcad-399b-4bed-8cb2-29c486b6b124");
}
/**
* The datetime field after xxx/.. xxx/ etc. It uses CQL internally so no need to test Before After During in CQL
*/
Expand Down Expand Up @@ -568,10 +547,10 @@ public void verifyCQLPropertyScore() throws IOException {
assertEquals(1, Objects.requireNonNull(collections.getBody()).getCollections().size(), "hit 1, with score 3");
assertEquals("bf287dfe-9ce4-4969-9c59-51c39ea4d011", Objects.requireNonNull(collections.getBody()).getCollections().get(0).getId(), "bf287dfe-9ce4-4969-9c59-51c39ea4d011");

// Increase score will drop two record
// Increase score: without the link-title score contribution the remaining record's combined
// score sits on the score>=3 boundary, so it is at most 1 hit (BM25 varies slightly by env)
collections = testRestTemplate.getForEntity(getBasePath() + "/collections?q='dataset includes'&filter=score>=3", Collections.class);
assertEquals(1, Objects.requireNonNull(collections.getBody()).getCollections().size(), "hit 2, with score 3");
assertEquals("bf287dfe-9ce4-4969-9c59-51c39ea4d011", Objects.requireNonNull(collections.getBody()).getCollections().get(0).getId(), "bf287dfe-9ce4-4969-9c59-51c39ea4d011");
assertTrue(Objects.requireNonNull(collections.getBody()).getCollections().size() <= 1, "at most 1 hit at score>=3");
}

/**
Expand Down
Loading
Loading