diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/ClusteringPipeline.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/ClusteringPipeline.java index 5091f3405..ac59f23a0 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/ClusteringPipeline.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/ClusteringPipeline.java @@ -137,8 +137,7 @@ public void processElement( List otherCatalogNumbers = source.getMultiValues().get(DwcTerm.otherCatalogNumbers.simpleName()); - List recordedBy = - source.getMultiValues().get(DwcTerm.recordedBy.simpleName()); + String recordedBy = source.getStrings().get(DwcTerm.recordedBy.simpleName()); Long eventDateL = source.getLongs().get(DwcTerm.eventDate.simpleName()); String eventDate = ""; @@ -162,7 +161,7 @@ public void processElement( .withDay(day) .withEventDate(eventDate) .withTypeStatus(typeStatus) - .withRecordedBy(recordedBy) + .withRecordedBy(Collections.singletonList(recordedBy)) .withFieldNumber(fieldNumber) .withRecordNumber(recordNumber) .withCatalogNumber(catalogNumber) @@ -229,9 +228,8 @@ public void processElement( // 3. taxonKey|year|recordedBy hashkeys if (Strings.isNotEmpty(taxonKey) && year != null && recordedBy != null) { - for (String r : recordedBy) { - out.output(builder.withHashKey(taxonKey + "|" + year + "|" + r).build()); - } + out.output( + builder.withHashKey(taxonKey + "|" + year + "|" + recordedBy).build()); } } })); diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/converters/CoreCsvConverter.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/converters/CoreCsvConverter.java index f3a259806..93f259bbf 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/converters/CoreCsvConverter.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/converters/CoreCsvConverter.java @@ -24,7 +24,7 @@ public class CoreCsvConverter { .addKeyTermFn(DwcTerm.institutionCode, getString(DwcTerm.institutionCode)) .addKeyTermFn(DwcTerm.recordNumber, getString(DwcTerm.recordNumber)) .addKeyTermFn(DwcTerm.basisOfRecord, getString(DwcTerm.basisOfRecord), "HumanObservation") - .addKeyTermFn(DwcTerm.recordedBy, getMultivalue(DwcTerm.recordedBy)) + .addKeyTermFn(DwcTerm.recordedBy, getString(DwcTerm.recordedBy)) .addKeyTermFn(DwcTerm.occurrenceStatus, getString(DwcTerm.occurrenceStatus)) .addKeyTermFn(DwcTerm.individualCount, getInt(DwcTerm.individualCount)) .addKeyTermFn(DwcTerm.scientificName, getString(DwcTerm.scientificName)) diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java index c708275cd..b23bca785 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java @@ -198,7 +198,7 @@ public static IndexRecord createIndexRecord( "degreeOfEstablishment"); // GBIF treats it as a JSON, but ALA needs a String which is // defined skipKeys.add(DwcTerm.typeStatus.simpleName()); - skipKeys.add(DwcTerm.recordedBy.simpleName()); + skipKeys.add(DwcTerm.recordedBy.simpleName()); // Do not use processed recordedBy skipKeys.add(DwcTerm.identifiedBy.simpleName()); skipKeys.add(DwcTerm.preparations.simpleName()); skipKeys.add(DwcTerm.datasetID.simpleName()); @@ -610,7 +610,6 @@ private static void applyBasicRecord(BasicRecord br, IndexRecord.Builder indexRe addTermWithAgentsSafely( indexRecord, DwcTerm.identifiedByID.simpleName(), br.getIdentifiedByIds()); addMultiValueTermSafely(indexRecord, DwcTerm.typeStatus.simpleName(), br.getTypeStatus()); - addMultiValueTermSafely(indexRecord, DwcTerm.recordedBy.simpleName(), br.getRecordedBy()); addMultiValueTermSafely(indexRecord, DwcTerm.identifiedBy.simpleName(), br.getIdentifiedBy()); addMultiValueTermSafely(indexRecord, DwcTerm.preparations.simpleName(), br.getPreparations()); addMultiValueTermSafely(indexRecord, DwcTerm.datasetID.simpleName(), br.getDatasetID()); @@ -850,6 +849,11 @@ public static Set getAddedValues() { .addAll( BasicRecord.getClassSchema().getFields().stream() .map(Field::name) + .filter( + name -> + !DwcTerm.recordedBy + .simpleName() + .equals(name)) // Do not use the processed recordedBy .collect(Collectors.toList())) .addAll( TemporalRecord.getClassSchema().getFields().stream() diff --git a/livingatlas/pipelines/src/test/java/au/org/ala/pipelines/converters/CoreTsvConverterTest.java b/livingatlas/pipelines/src/test/java/au/org/ala/pipelines/converters/CoreTsvConverterTest.java index 9f13d51ed..bfbc4ef70 100644 --- a/livingatlas/pipelines/src/test/java/au/org/ala/pipelines/converters/CoreTsvConverterTest.java +++ b/livingatlas/pipelines/src/test/java/au/org/ala/pipelines/converters/CoreTsvConverterTest.java @@ -52,7 +52,7 @@ public void converterTest() { "\"raw_er_institutionCode\"", // DwcTerm.institutionCode "\"raw_er_recordNumber\"", // DwcTerm.recordNumber "\"br_basisOfRecord\"", // DwcTerm.basisOfRecord - "\"br_recordedBy_1|br_recordedBy_2\"", // DwcTerm.recordedBy + "\"raw_er_recordedBy\"", // DwcTerm.recordedBy "\"br_occurrenceStatus\"", // DwcTerm.occurrenceStatus "\"222\"", // DwcTerm.individualCount "\"atxr_ScientificName\"", // DwcTerm.scientificName @@ -940,7 +940,7 @@ public void converterDefaultTest() { "\"raw_er_institutionCode\"", // DwcTerm.institutionCode "\"raw_er_recordNumber\"", // DwcTerm.recordNumber "\"HumanObservation\"", // DwcTerm.basisOfRecord - "\"\"", // DwcTerm.recordedBy + "\"raw_er_recordedBy\"", // DwcTerm.recordedBy "\"\"", // DwcTerm.occurrenceStatus "\"\"", // DwcTerm.individualCount "\"\"", // DwcTerm.scientificName diff --git a/livingatlas/solr/conf/managed-schema b/livingatlas/solr/conf/managed-schema index c1e7610f2..b51638686 100644 --- a/livingatlas/solr/conf/managed-schema +++ b/livingatlas/solr/conf/managed-schema @@ -368,7 +368,7 @@ - + @@ -661,7 +661,6 @@ - @@ -751,7 +750,6 @@ -