Skip to content

Commit

Permalink
ala - ignore processed recordedby
Browse files Browse the repository at this point in the history
  • Loading branch information
Adam Collins committed Nov 21, 2023
1 parent 1cc2abd commit 39dec79
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,7 @@ public void processElement(
List<String> otherCatalogNumbers =
source.getMultiValues().get(DwcTerm.otherCatalogNumbers.simpleName());

List<String> recordedBy =
source.getMultiValues().get(DwcTerm.recordedBy.simpleName());
String recordedBy = source.getStrings().get(DwcTerm.recordedBy.simpleName());

Long eventDateL = source.getLongs().get(DwcTerm.eventDate.simpleName());
String eventDate = "";
Expand All @@ -162,7 +161,7 @@ public void processElement(
.withDay(day)
.withEventDate(eventDate)
.withTypeStatus(typeStatus)
.withRecordedBy(recordedBy)
.withRecordedBy(Collections.singletonList(recordedBy))
.withFieldNumber(fieldNumber)
.withRecordNumber(recordNumber)
.withCatalogNumber(catalogNumber)
Expand Down Expand Up @@ -229,9 +228,8 @@ public void processElement(

// 3. taxonKey|year|recordedBy hashkeys
if (Strings.isNotEmpty(taxonKey) && year != null && recordedBy != null) {
for (String r : recordedBy) {
out.output(builder.withHashKey(taxonKey + "|" + year + "|" + r).build());
}
out.output(
builder.withHashKey(taxonKey + "|" + year + "|" + recordedBy).build());
}
}
}));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public class CoreCsvConverter {
.addKeyTermFn(DwcTerm.institutionCode, getString(DwcTerm.institutionCode))
.addKeyTermFn(DwcTerm.recordNumber, getString(DwcTerm.recordNumber))
.addKeyTermFn(DwcTerm.basisOfRecord, getString(DwcTerm.basisOfRecord), "HumanObservation")
.addKeyTermFn(DwcTerm.recordedBy, getMultivalue(DwcTerm.recordedBy))
.addKeyTermFn(DwcTerm.recordedBy, getString(DwcTerm.recordedBy))
.addKeyTermFn(DwcTerm.occurrenceStatus, getString(DwcTerm.occurrenceStatus))
.addKeyTermFn(DwcTerm.individualCount, getInt(DwcTerm.individualCount))
.addKeyTermFn(DwcTerm.scientificName, getString(DwcTerm.scientificName))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ public static IndexRecord createIndexRecord(
"degreeOfEstablishment"); // GBIF treats it as a JSON, but ALA needs a String which is
// defined
skipKeys.add(DwcTerm.typeStatus.simpleName());
skipKeys.add(DwcTerm.recordedBy.simpleName());
skipKeys.add(DwcTerm.recordedBy.simpleName()); // Do not use processed recordedBy
skipKeys.add(DwcTerm.identifiedBy.simpleName());
skipKeys.add(DwcTerm.preparations.simpleName());
skipKeys.add(DwcTerm.datasetID.simpleName());
Expand Down Expand Up @@ -628,7 +628,6 @@ private static void applyBasicRecord(BasicRecord br, IndexRecord.Builder indexRe
addTermWithAgentsSafely(
indexRecord, DwcTerm.identifiedByID.simpleName(), br.getIdentifiedByIds());
addMultiValueTermSafely(indexRecord, DwcTerm.typeStatus.simpleName(), br.getTypeStatus());
addMultiValueTermSafely(indexRecord, DwcTerm.recordedBy.simpleName(), br.getRecordedBy());
addMultiValueTermSafely(indexRecord, DwcTerm.identifiedBy.simpleName(), br.getIdentifiedBy());
addMultiValueTermSafely(indexRecord, DwcTerm.preparations.simpleName(), br.getPreparations());
addMultiValueTermSafely(indexRecord, DwcTerm.datasetID.simpleName(), br.getDatasetID());
Expand Down Expand Up @@ -868,6 +867,11 @@ public static Set<String> getAddedValues() {
.addAll(
BasicRecord.getClassSchema().getFields().stream()
.map(Field::name)
.filter(
name ->
!DwcTerm.recordedBy
.simpleName()
.equals(name)) // Do not use the processed recordedBy
.collect(Collectors.toList()))
.addAll(
TemporalRecord.getClassSchema().getFields().stream()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public void converterTest() {
"\"raw_er_institutionCode\"", // DwcTerm.institutionCode
"\"raw_er_recordNumber\"", // DwcTerm.recordNumber
"\"br_basisOfRecord\"", // DwcTerm.basisOfRecord
"\"br_recordedBy_1|br_recordedBy_2\"", // DwcTerm.recordedBy
"\"raw_er_recordedBy\"", // DwcTerm.recordedBy
"\"br_occurrenceStatus\"", // DwcTerm.occurrenceStatus
"\"222\"", // DwcTerm.individualCount
"\"atxr_ScientificName\"", // DwcTerm.scientificName
Expand Down Expand Up @@ -917,7 +917,7 @@ public void converterDefaultTest() {
"\"raw_er_institutionCode\"", // DwcTerm.institutionCode
"\"raw_er_recordNumber\"", // DwcTerm.recordNumber
"\"HumanObservation\"", // DwcTerm.basisOfRecord
"\"\"", // DwcTerm.recordedBy
"\"raw_er_recordedBy\"", // DwcTerm.recordedBy
"\"\"", // DwcTerm.occurrenceStatus
"\"\"", // DwcTerm.individualCount
"\"\"", // DwcTerm.scientificName
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import au.org.ala.kvs.ALAPipelinesConfig;
import au.org.ala.utils.ALAFsUtils;
import java.io.IOException;
import java.time.Duration;
import java.util.Collections;
import java.util.concurrent.atomic.AtomicInteger;
import lombok.extern.slf4j.Slf4j;
Expand Down Expand Up @@ -78,12 +79,14 @@ protected void before() throws Throwable {

nameService =
new GenericContainer(DockerImageName.parse(NAME_SERVICE_IMG))
.withExposedPorts(NAME_SERVICE_INTERNAL_PORT);
.withExposedPorts(NAME_SERVICE_INTERNAL_PORT)
.withStartupTimeout(Duration.ofMinutes(3));
nameService.start();

sdsService =
new GenericContainer(DockerImageName.parse(SENSTIVE_SERVICE_IMG))
.withExposedPorts(SENSITIVE_SERVICE_INTERNAL_PORT);
.withExposedPorts(SENSITIVE_SERVICE_INTERNAL_PORT)
.withStartupTimeout(Duration.ofMinutes(3));
sdsService.start();

elasticsearchContainer =
Expand Down
4 changes: 1 addition & 3 deletions livingatlas/solr/conf/managed-schema
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@
<field name="preparations" type="string" docValues="true" indexed="true" multiValued="true"/>
<field name="previousIdentifications" type="string" docValues="true" indexed="true"/>
<field name="recordNumber" type="string" docValues="true" indexed="true"/>
<field name="recordedBy" type="string" docValues="true" indexed="true" multiValued="true"/>
<field name="recordedBy" type="string" docValues="true" indexed="true"/>
<field name="relativeOrganismQuantity" type="double" docValues="true" indexed="true"/>
<field name="references" type="string" docValues="true" indexed="true"/>
<field name="relatedResourceID" type="string" docValues="true" indexed="true"/>
Expand Down Expand Up @@ -659,7 +659,6 @@
<field name="raw_publishingOrganizationKey" type="string" docValues="true" indexed="true" />
<field name="raw_taxonRank" type="string" docValues="true" indexed="true" />
<field name="raw_taxonRankID" type="string" docValues="true" indexed="true" />
<field name="raw_recordedBy" type="string" docValues="true" indexed="true" />
<field name="raw_recordedByID" type="string" docValues="true" indexed="true" />
<field name="raw_references" type="string" docValues="true" indexed="true" />
<field name="raw_relativeOrganismQuantity" type="string" docValues="true" indexed="true" />
Expand Down Expand Up @@ -748,7 +747,6 @@
<copyField source="recordedBy" dest="text"/>
<copyField source="identifiedBy" dest="text_identifiedBy"/>
<copyField source="recordedBy" dest="text_recordedBy"/>
<copyField source="raw_recordedBy" dest="text_recordedBy"/>
<copyField source="scientificName" dest="matched_name"/>
<copyField source="scientificName" dest="text"/>
<copyField source="species" dest="text"/>
Expand Down

0 comments on commit 39dec79

Please sign in to comment.