diff --git a/.gitignore b/.gitignore index 5094a13..105d4e3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ *~ **/.DS_Store **/.idea/* +**/.vscode/* +docker/bin/** +**/*.zip *.iml target/ pom.xml.tag diff --git a/.travis.yml b/.travis.yml index aaa3072..4a90a64 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,8 +3,7 @@ language: java script: mvn -B install jdk: -- openjdk8 -- openjdk11 +- openjdk17 services: - docker diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cc480a5..c7803bc 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -8,7 +8,7 @@ Fork, then clone the repo: Set up your environment: -- Java 8 +- Java 17 - Maven - Get a [Rosette API key](https://developer.rosette.com/signup) if you don't have one - export ROSETTE_API_KEY="\" diff --git a/README.md b/README.md index eb13c03..2f6dd36 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ There are two common ways to install the plugin into Elasticsearch. (Make sure t #### Note on Versioning: The plugin uses semantic versioning. The first three numbers describe which version of Elasticsearch this version of the plugin is compatible with, and the last number indicates the version of the plugin within that Elasticsearch version. -For instance, `5.3.0.1` is the second patch version of the plugin for Elasticsearch 5.3.0. +For instance, `8.15.0.1` is the second patch version of the plugin for Elasticsearch 8.15.0. ## How to Build Building the plugin requires a Rosette API key. If you don’t already have a Rosette API developer account, head over to [developer.rosette.com](https://developer.rosette.com/signup) to get your free Rosette API key. diff --git a/docker/.gitignore b/docker/.gitignore index c6483d4..14a75eb 100644 --- a/docker/.gitignore +++ b/docker/.gitignore @@ -1,2 +1,4 @@ Dockerfile plugins +*.zip +*.iml \ No newline at end of file diff --git a/docker/README.md b/docker/README.md index 2a8667d..a51db8a 100644 --- a/docker/README.md +++ b/docker/README.md @@ -4,6 +4,7 @@ Testing the plugin `mvn clean install` from the top level directory first, then: ``` +cd docker mvn docker:build ROSETTE_API_KEY= mvn docker:run @@ -11,13 +12,5 @@ ROSETTE_API_KEY= mvn docker:run -./test.sh +./test.sh localhost: ``` -================== - -There is a known issue where the docker container logs an error message for unknown reasons: -``` -[ERROR] DOCKER> Cannot process chunk response: java.io.IOException: Bad file descriptor -``` -This can be ignored. - \ No newline at end of file diff --git a/docker/pom.xml b/docker/pom.xml index f293ccc..52dfec2 100644 --- a/docker/pom.xml +++ b/docker/pom.xml @@ -1,28 +1,26 @@ - + 4.0.0 - com.rosette.elasticsearch rosette-elasticsearch-docker pom rosette-elasticsearch-parent com.rosette.elasticsearch - 7.17.0.1-SNAPSHOT + 8.15.0.0-SNAPSHOT .. @@ -35,11 +33,41 @@ ${project.build.directory}/curl-output.txt - 0.36.0 + 0.45.0 + 3.6.0 verify + + org.codehaus.mojo + build-helper-maven-plugin + ${build-helper-maven-plugin.version} + + + get-local-ip + pre-integration-test + + local-ip + + + elastic.ip + + + + get-random-port + pre-integration-test + + reserve-network-port + + + + server.port + + + + + io.fabric8 docker-maven-plugin @@ -53,8 +81,9 @@ basistechnologycorporation/rosette-elasticsearch docker.elastic.co/elasticsearch/elasticsearch:${elasticsearch.version} + true - find /plugins -name "*.zip" -exec /usr/share/elasticsearch/bin/elasticsearch-plugin install --batch file://{} \; + find /plugins -name "*.zip" -exec /usr/share/elasticsearch/bin/elasticsearch-plugin install --batch file://{} \; @@ -74,21 +103,28 @@ - http://${host.ip}:${docker.port} + http://localhost:${server.port} GET 200..399 - + - + ${env.ROSETTE_API_KEY} - _local_ - 0.0.0.0 + single-node + -Xms4g -Xmx4g false + + ${server.port}:9200 + + + elasticsearch + red + @@ -116,30 +152,15 @@ exec-maven-plugin - run_IT + Run Integration Test exec integration-test - ./test.sh - - ${host.ip} - ${docker.port} - - ${curl.output} - - - - verify_IT_success - - exec - - verify - - ./verify.sh + ${project.basedir}/test.sh - ${curl.output} + http://${elastic.ip}:${server.port} @@ -147,68 +168,4 @@ - - - set-url-localhost - - - !env.DOCKER_HOST - - - - - - org.codehaus.mojo - build-helper-maven-plugin - ${build-helper-maven-plugin.version} - - - get-local-ip - initialize - - local-ip - - - host.ip - - - - - - - - - set-url-from-docker-host - - - env.DOCKER_HOST - - - - - - org.codehaus.mojo - build-helper-maven-plugin - ${build-helper-maven-plugin.version} - - - validate - regex-property - - regex-property - - - host.ip - ${env.DOCKER_HOST} - ^tcp://(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3}):\d{1,5}$ - $1.$2.$3.$4 - true - - - - - - - - diff --git a/docker/test.sh b/docker/test.sh index 09d425d..f13678c 100755 --- a/docker/test.sh +++ b/docker/test.sh @@ -1,205 +1,180 @@ #!/usr/bin/env bash -code=0 -command_counter=0 -set -x +################################################################################ +# This data and information is proprietary to, and a valuable trade secret +# of, Basis Technology Corp. It is given in confidence by Basis Technology +# and may only be used as permitted under the license agreement under which +# it has been distributed, and in no other way. +# +# Copyright (c) 2024 Basis Technology Corporation All rights reserved. +# +# The technical data and information provided herein are provided with +# `limited rights', and the computer software provided herein is provided +# with `restricted rights' as those terms are defined in DAR and ASPR +# 7-104.9(a). +# +################################################################################ -# uses '|| code=$?' to make sure that even if there is an error, -# the maven build continues and stops the docker image -# the exit codes will be appended to the output file for later verification +ES_HOST=$1 -curl -fsSL -H 'Content-Type: application/json' -XPUT "$1:$2/_ingest/pipeline/my_pipeline" -d' -{ - "processors": [ - { - "ros_language" : { - "field" : "text", - "target_field" : "language" - } - } - ] +TMPDIR=$(mktemp -d) +function cleanup() { + echo "Cleaning up $TMPDIR" + test -d $TMPDIR && rm -rf $TMPDIR } -' || ((code++)) -echo -echo "After command ${command_counter} our error code count is ${code}." -command_counter=$((command_counter + 1)) +trap "cleanup" EXIT -curl -fsSL -H 'Content-Type: application/json' -XPOST "$1:$2/indexname/mappingName?pipeline=my_pipeline&pretty" -d' -{ - "text" : "This is a document containing English text" -} -' || ((code++)) -echo "After command ${command_counter} our error code count is ${code}." -command_counter=$((command_counter + 1)) +if [ -z "$ES_HOST" ]; then + echo "Error: missing ES_HOST" + exit 1 +fi -curl -fsSL -H "Content-Type: application/json" -XPUT "$1:$2/_ingest/pipeline/rosapi?pretty" -d' -{ - "processors": [ +EXISTS=$(curl --head -L -s -o /dev/null -w "%{http_code}" "${ES_HOST}") +if [[ $EXISTS -ne 200 ]]; then + echo "Error: not available at ${ES_HOST} got HTTP ${EXISTS}" + exit 1 +fi +echo "Creating pipeline on ${ES_HOST}" +curl -s -XPUT "${ES_HOST}/_ingest/pipeline/rosette_pipeline" -H 'Content-Type: application/json' -d'{ + "description" :"All Rosette Processors", + "processors": [ { - "ros_language" : { - "field" : "text", - "target_field" : "language" - } + "ros_language" : { + "field" : "text", + "target_field" : "language" + } }, { - "ros_categories" : { - "field" : "text", - "target_field" : "category" - } + "ros_categories" : { + "field" : "text", + "target_field" : "category" + } }, { - "ros_sentiment" : { - "field" : "text", - "target_field" : "sentiment" - } + "ros_name_translation" : { + "field" : "text", + "target_language" : "kor", + "target_field" : "kor_name" + } }, { - "ros_entities" : { - "field" : "text", - "target_field" : "entities_sentiment", - "include_sentiment" : true, - "include_offsets" : true, - "include_translation" : true, - "translation_language" : "eng" - } + "ros_sentiment" : { + "field" : "text", + "target_field" : "sentiment" + } }, { - "ros_entities" : { - "field" : "text", - "target_field" : "entities", - "include_sentiment" : false, - "include_offsets" : false, - "include_translation" : false, - "translation_language" : "eng" - } + "ros_entities" : { + "field" : "text", + "target_field" : "entities_sentiment", + "include_sentiment" : true, + "include_offsets" : true, + "include_translation" : true, + "translation_language" : "kor" + } }, - { - "ros_name_translation" : { - "field" : "name", - "target_field" : "translation", - "target_language" : "rus" - } + { + "ros_entities" : { + "field" : "text", + "target_field" : "entities_english", + "include_sentiment" : false, + "include_offsets" : false, + "include_translation" : true, + "translation_language" : "eng" + } } ] -} -' || ((code++)) -echo "After command ${command_counter} our error code count is ${code}." -command_counter=$((command_counter + 1)) +}' | jq -r .acknowledged | grep -q true || echo "FAILED" -#Pipeline without categories since it only supports English -curl -fsSL -H "Content-Type: application/json" -XPUT "$1:$2/_ingest/pipeline/rosapi_jpn?pretty" -d' -{ - "processors": [ - { - "ros_language" : { - "field" : "text", - "target_field" : "language" - } - }, - { - "ros_sentiment" : { - "field" : "text", - "target_field" : "sentiment" - } - }, - { - "ros_entities" : { - "field" : "text", - "target_field" : "entities_sentiment", - "include_sentiment" : true, - "include_offsets" : true, - "include_translation" : true, - "translation_language" : "eng" - } - }, - { - "ros_entities" : { - "field" : "text", - "target_field" : "entities", - "include_sentiment" : false, - "include_offsets" : false, - "include_translation" : false, - "translation_language" : "eng" - } - }, - { - "ros_name_translation" : { - "field" : "name", - "target_field" : "translation", - "target_language" : "rus" - } - } - ] -} -' || ((code++)) -echo "After command ${command_counter} our error code count is ${code}." -command_counter=$((command_counter + 1)) - -curl -fsSL -H "Content-Type: application/json" -XPUT "$1:$2/test_idx?include_type_name=true&pretty" -d' -{ - "mappings": { - "rosette": { - "properties": { - "text" : { "type" : "text" }, - "name" : { "type" : "text" }, - "language" : { "type" : "keyword" }, - "category" : { "type" : "keyword" }, - "sentiment" : { "type" : "keyword" }, - "entities" : { "type" : "nested" }, - "translation" : { "type" : "text" } - } - } - } -} -' || ((code++)) -echo "After command ${command_counter} our error code count is ${code}." -command_counter=$((command_counter + 1)) - -curl -fsSL -H "Content-Type: application/json" -XPUT "$1:$2/test_idx/rosette/1?pretty&refresh=true&pipeline=rosapi" -d' -{ - "text": "Original Ghostbuster Dan Aykroyd, who also co-wrote the 1984 Ghostbusters film, couldn’t be more pleased with the new all-female Ghostbusters cast, telling The Hollywood Reporter, “The Aykroyd family is delighted by this inheritance of the Ghostbusters torch by these most magnificent women in comedy.”" -} -' || ((code++)) -echo "After command ${command_counter} our error code count is ${code}." -command_counter=$((command_counter + 1)) - -curl -fsSL -H "Content-Type: application/json" -XPUT "$1:$2/test_idx/rosette/2?pretty&refresh=true&pipeline=rosapi_jpn" -d' -{ - "text": "バングラデシュ政府、ロヒンギャ難民の島への移動を計画
\nバングラデシュ政府、ロヒンギャ難民の島への移動を計画\n\nテンガール・チャール島は約10年前に、メグナ川の堆積土で形成され、高潮の際には数十センチの水に囲まれてしまう。道路や堤防などは築かれておらず、島を記載する地図はあまりない。\n\n約30キロ西には60万人が住むハティア島があり、現在の難民キャンプからの移動には9時間かかる。\n\nある地元政府関係者はAFP通信に対し、テンガール・チャール島について、「島に行けるのは冬のみで、海賊たちの隠れ家になっている」と語った。島を洪水から守るため植樹が行われているが、完了するまでには少なくとも10年がかかるという。同関係者は、「モンスーンの季節には完全に水浸しになってしまう」と話し、「あそこに住まわせるというのは、ひどいアイデアだ」と指摘した。\n\nImage caption 移住が計画されているテンガール・チャール島はハティア(Hatiya)島の近くにある\n\nミャンマーでは、ロヒンギャの人々は国境を接するバングラデシュからの不法移民として扱われており、国籍の取得ができずにいる。\n\n" -} -' || ((code++)) -echo "After command ${command_counter} our error code count is ${code}." -command_counter=$((command_counter + 1)) +EXISTS=$(curl -s -o /dev/null -w "%{http_code}" "${ES_HOST}/_ingest/pipeline/rosette_pipeline") +if [[ $EXISTS -ne 200 ]]; then + echo "Error: pipeline not available ${EXISTS}" + exit 1 +else + echo "Pipeline created" +fi -curl -fsSL -H "Content-Type: application/json" -XPUT "$1:$2/test_idx/rosette/3?pretty&refresh=true&pipeline=rosapi" -d' -{ - "text" : "Vladimir Vladimirovich Nabokov was a Russian-American novelist and entomologist. His first nine novels were in Russian, and he achieved international prominence after he began writing English prose.", - "name" : "Vladimir Nabokov" -} -' || ((code++)) -echo "After command ${command_counter} our error code count is ${code}." -command_counter=$((command_counter + 1)) +echo "Annotating document with pipeline" +curl -s -XPUT "${ES_HOST}/test_idx/_doc/2?pipeline=rosette_pipeline&pretty" -H 'Content-Type: application/json' -d'{"text":"New York"}' -sleep 3 - -curl -fsSL -H "Content-Type: application/json" -XPOST "$1:$2/test_idx/_search?pretty" -d' -{ - "query": { - "constant_score" : { - "filter" : { - "exists" : {"field" : "language"} - } - } - } -} -' || ((code++)) -echo "After command ${command_counter} our error code count is ${code}." +EXISTS=$(curl -s -o /dev/null -w "%{http_code}" "${ES_HOST}/test_idx/_doc/2?pretty") +if [[ $EXISTS -ne 200 ]]; then + echo "Error: document not available ${EXISTS}" + exit 1 +else + echo "document created" +fi -set +x -# The way we check to see if test.sh succeeded is to read in the output -# in verify.sh. We then check the last line of the output file and -# based on the last line, decide if the test was successful. This -# mechanism fails sporadically, and I suspect it is caused by a buffering -# issue. Perhaps a brief snooze will make it more reliable. -sleep 2 -echo "exit: $code" +echo "Fetching document" +DOC="${TMPDIR}/document.json" +curl -s -o "${DOC}" "${ES_HOST}/test_idx/_doc/2?pretty" +FAILED=0 +echo "Testing sentiment" +if [[ "neu" == $(jq -r ._source.sentiment "${DOC}") ]]; then + echo "pass"; +else + FAILED=1 + echo "fail"; +fi +echo "Testing entities_sentiment sentiment" +if [[ "neu" == $(jq -r ._source.entities_sentiment[0].sentiment "${DOC}") ]]; then + echo "pass"; +else + FAILED=1 + echo "fail"; +fi +echo "Testing entities_sentiment QID Q60" +if [[ "Q60" == $(jq -r ._source.entities_sentiment[0].entityId "${DOC}") ]]; then + echo "pass"; +else + FAILED=1 + echo "fail"; +fi +echo "Testing entities_sentiment entity Korean name" +if [[ "노옥 케이티" == $(jq -r ._source.entities_sentiment[0].translation "${DOC}") ]]; then + echo "pass"; +else + FAILED=1 + echo "fail"; +fi +echo "Testing document name translation" +if [[ "노옥" == $(jq -r ._source.kor_name "${DOC}") ]]; then + echo "pass"; +else + FAILED=1 + echo "fail"; +fi +echo "Testing document language" +if [[ "eng" == $(jq -r ._source.language "${DOC}") ]]; then + echo "pass"; +else + FAILED=1 + echo "fail"; +fi +echo "Testing document category" +if [[ "ARTS_AND_ENTERTAINMENT" == $(jq -r ._source.category "${DOC}") ]]; then + echo "pass"; +else + FAILED=1 + echo "fail"; +fi +echo "Testing document entities_english translation" +if [[ "Niyu Yurk Siti" == $(jq -r ._source.entities_english[0].translation "${DOC}") ]]; then + echo "pass"; +else + FAILED=1 + echo "fail"; +fi +echo "Testing document entities_english QID Q60" +if [[ "Q60" == $(jq -r ._source.entities_english[0].entityId "${DOC}") ]]; then + echo "pass"; +else + FAILED=1 + echo "fail"; +fi +if [[ $FAILED -eq 0 ]]; then + echo "All tests passed" +else + echo "Some tests failed" +fi +exit $FAILED diff --git a/docker/verify.sh b/docker/verify.sh deleted file mode 100755 index 9619e26..0000000 --- a/docker/verify.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env bash - -if [[ "$(tail -1 "$1" 2>/dev/null)" != "exit: 0" ]] ; then - echo "There are test failures. Exiting..." - cat $1 - exit 1 -fi - -if grep -E -i 'exception|"failed":[1-9]' "$1" ; then - echo "Test query failed! See $1 for details. Exiting..." - cat $1 - exit 1 -fi diff --git a/docs/Rosette-API-Plugin-for-Elasticsearch-Doc-Enrichment.md b/docs/Rosette-API-Plugin-for-Elasticsearch-Doc-Enrichment.md index ee0a988..3060773 100644 --- a/docs/Rosette-API-Plugin-for-Elasticsearch-Doc-Enrichment.md +++ b/docs/Rosette-API-Plugin-for-Elasticsearch-Doc-Enrichment.md @@ -18,7 +18,7 @@ See [Rosette’s Elasticsearch Plugins](https://www.rosette.com/elastic/) or con Rosette functionality is called through an ingest node of Elasticsearch that pre-processes documents before indexing takes place. You define a pipeline that specifies the series of processors that transforms or enriches the document. See the [Ingest APIs of Elasticsearch](https://www.elastic.co/guide/en/elasticsearch/reference/master/ingest-apis.html) for more about how to create, add, or delete pipelines. ### Version Compatibility The plugin uses semantic versioning. The first three numbers indicate the version of Elasticsearch that the plugin is compatible with, and the last number indicates the version of the plugin within that Elasticsearch version. -For example, 5.3.1.1 is the second patch version of the plugin for Elasticsearch 5.3.1. +For example, 8.15.0.1 is the second patch version of the plugin for Elasticsearch 8.15.0. ### Installation 1. Install Elasticsearch diff --git a/plugin/pom.xml b/plugin/pom.xml index 1d99d67..b0c0433 100644 --- a/plugin/pom.xml +++ b/plugin/pom.xml @@ -1,27 +1,25 @@ - + 4.0.0 - com.rosette.elasticsearch rosette-elasticsearch-plugin rosette-elasticsearch-parent com.rosette.elasticsearch - 7.17.0.1-SNAPSHOT + 8.15.0.0-SNAPSHOT .. 2017 @@ -40,47 +38,88 @@ 5.3.0 - 1.15 - 3.12.0 + 1.17.1 + 3.17.0 + 5.3.1 + 2.18.0 + 2.17.1 + 1.30.0 + 5.4 3.0.2 - 2.10.13 + 2.13.0 5.0.4 2.2.14 - 5.11.2 + 5.15.0 /rest/worker/v1/ - 2.2 - 1.7.32 + 3.0 + 2.0.16 + 5.11.0 + false + + org.slf4j + slf4j-api + ${slf4j-ext.version} + provided + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.version} + + + com.fasterxml.jackson.core + jackson-annotations + ${jackson-annotations.version} + + + com.fasterxml.jackson.core + jackson-core + ${jackson-annotations.version} + com.basistech.rosette - rosette-api - - - commons-codec - commons-codec - - + rosette-api-common + ${rosette.version} - commons-codec - commons-codec - ${commons-codec.version} + org.apache.httpcomponents.client5 + httpclient5 + ${httpclient.version} - org.slf4j - slf4j-api + junit + junit + test + + + org.mockito + mockito-core + ${mockito.version} + test + + + org.mockito + mockito-junit-jupiter + ${mockito.version} + test org.apache.logging.log4j log4j-slf4j-impl ${log4j.version} + test org.elasticsearch elasticsearch provided + + com.carrotsearch.randomizedtesting + randomizedtesting-runner + net.sf.jopt-simple jopt-simple @@ -95,11 +134,6 @@ - - org.apache.logging.log4j - log4j-api - provided - org.elasticsearch.test framework @@ -109,7 +143,6 @@ org.apache.commons commons-lang3 - commons-logging commons-logging @@ -135,7 +168,6 @@ ${mockserver.version} test - io.swagger swagger-core @@ -144,11 +176,11 @@ commons-codec commons-codec - + jakarta.validation jakarta.validation-api - + jakarta.xml.bind jakarta.xml.bind-api @@ -156,7 +188,7 @@ javax.validation validation-api - + javax.xml.bind jaxb-api @@ -180,10 +212,6 @@ ${mockserver.version} test - - ch.qos.logback - logback-classic - com.github.java-json-tools json-schema-validator @@ -208,10 +236,6 @@ org.hamcrest hamcrest - - org.slf4j - slf4j-ext - @@ -324,46 +348,6 @@ - - - org.apache.maven.plugins - maven-shade-plugin - - - package - - shade - - - true - true - - - - - META-INF/maven/dependencies.properties - - - - - - com.basistech:adm-json - com.basistech.rosette:rosette-api - com.basistech.rosette:rosette-api-json - com.basistech:common-api-jackson - com.fasterxml.jackson.core:* - - - - - com.fasterxml - com.basistech.shaded.com.fasterxml - - - - - - org.apache.maven.plugins maven-assembly-plugin @@ -420,6 +404,7 @@ http://localhost:${mockserver.port}${mockserver.baseurl} + ${skipTests} @@ -432,13 +417,6 @@ verify - - - com.basistech:adm-json - com.basistech.rosette:rosette-api - com.basistech.rosette:rosette-api-json - com.basistech:common-api-jackson - false @@ -471,7 +449,7 @@ maven-javadoc-plugin ${maven-javadoc-plugin.version} - ${jdk.version} + ${maven.compiler.source} true diff --git a/plugin/src/main/assemblies/plugin.xml b/plugin/src/main/assemblies/plugin.xml index aa1dfa1..dfa3b51 100644 --- a/plugin/src/main/assemblies/plugin.xml +++ b/plugin/src/main/assemblies/plugin.xml @@ -1,4 +1,19 @@ + + plugin @@ -19,15 +34,6 @@ true false - - - com.fasterxml.jackson.core:jackson-core - com.basistech:adm-json - com.basistech.rosette:rosette-api - com.basistech.rosette:rosette-api-json - com.basistech:common-api-jackson - com.fasterxml.jackson.core:* - diff --git a/plugin/src/main/java/com/rosette/elasticsearch/CategoriesProcessor.java b/plugin/src/main/java/com/rosette/elasticsearch/CategoriesProcessor.java index 9a1f5d7..c5207fc 100644 --- a/plugin/src/main/java/com/rosette/elasticsearch/CategoriesProcessor.java +++ b/plugin/src/main/java/com/rosette/elasticsearch/CategoriesProcessor.java @@ -1,42 +1,34 @@ -/* -* Copyright 2017 Basis Technology Corp. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ +/******************************************************************************* + * This data and information is proprietary to, and a valuable trade secret + * of, Basis Technology Corp. It is given in confidence by Basis Technology + * and may only be used as permitted under the license agreement under which + * it has been distributed, and in no other way. + * + * Copyright (c) 2024 Basis Technology Corporation All rights reserved. + * + * The technical data and information provided herein are provided with + * `limited rights', and the computer software provided herein is provided + * with `restricted rights' as those terms are defined in DAR and ASPR + * 7-104.9(a). + * + ******************************************************************************/ + package com.rosette.elasticsearch; -import com.basistech.rosette.api.HttpRosetteAPIException; -import com.basistech.rosette.apimodel.CategoriesOptions; -import com.basistech.rosette.apimodel.CategoriesResponse; -import com.basistech.rosette.apimodel.DocumentRequest; +import com.fasterxml.jackson.databind.JsonNode; +import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.common.Strings; -import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.ingest.ConfigurationUtils; import org.elasticsearch.ingest.IngestDocument; import org.elasticsearch.ingest.Processor; -import java.security.AccessController; -import java.security.PrivilegedAction; -import java.util.Map; -import static com.basistech.rosette.api.common.AbstractRosetteAPI.CATEGORIES_SERVICE_PATH; +import java.util.Map; public class CategoriesProcessor extends RosetteAbstractProcessor { - public static final String TYPE = "ros_categories"; - private static final Logger LOGGER = Loggers - .getLogger(CategoriesProcessor.class, CategoriesProcessor.class.getName()); + private static final Logger LOGGER = LogManager.getLogger(CategoriesProcessor.class); + private static final String CATEGORIES_SERVICE_PATH = "categories"; CategoriesProcessor(RosetteApiWrapper rosAPI, String tag, String description, String inputField, String targetField) { @@ -44,33 +36,29 @@ public class CategoriesProcessor extends RosetteAbstractProcessor { } @Override - public void processDocument(String inputText, IngestDocument ingestDocument) throws Exception { + public void processDocument(String inputText, IngestDocument ingestDocument) { // call /categories endpoint and set the top result in the field - DocumentRequest request = DocumentRequest.builder() - .content(inputText).build(); - CategoriesResponse response; try { - // RosApi client binding's Jackson needs elevated privilege - response = AccessController.doPrivileged((PrivilegedAction) () -> - rosAPI.getHttpRosetteAPI().perform(CATEGORIES_SERVICE_PATH, request, CategoriesResponse.class) - ); - } catch (HttpRosetteAPIException ex) { - LOGGER.error(ex.getErrorResponse().getMessage()); - throw new ElasticsearchException(ex.getErrorResponse().getMessage(), ex); - } - - if (response.getCategories() != null - && !response.getCategories().isEmpty() - && response.getCategories().get(0) != null - && !Strings.isNullOrEmpty(response.getCategories().get(0).getLabel())) { - ingestDocument.setFieldValue(targetField, response.getCategories().get(0).getLabel()); - } else { - throw new ElasticsearchException(TYPE + " ingest processor failed to categorize document."); + JsonNode resp = rosAPI.performDocumentRequest(CATEGORIES_SERVICE_PATH, inputText, null); + JsonNode categories = resp.get("categories"); + if (categories != null) { + JsonNode category = categories.get(0); + if (category != null) { + ingestDocument.setFieldValue(targetField, category.get("label").asText()); + } else { + throw new ElasticsearchException(TYPE + " ingest processor failed to categorize document."); + } + } else { + throw new ElasticsearchException(TYPE + " ingest processor failed to categorize document."); + } + } catch (HttpClientException | HttpServerException ex) { + LOGGER.error(ex.getMessage()); + throw new ElasticsearchException(ex.getMessage(), ex); } } public static final class Factory implements Processor.Factory { - private RosetteApiWrapper rosAPI; + private final RosetteApiWrapper rosAPI; Factory(RosetteApiWrapper rosAPI) { this.rosAPI = rosAPI; @@ -78,7 +66,7 @@ public static final class Factory implements Processor.Factory { @Override public Processor create(Map registry, String processorTag, - String processorDescription, Map config) throws Exception { + String processorDescription, Map config) { String inputField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field"); String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, Parameters.TARGET_FIELD.name, Parameters.TARGET_FIELD.defaultValue); @@ -89,8 +77,8 @@ public Processor create(Map registry, String processo enum Parameters { TARGET_FIELD("target_field", "ros_category"); - String name; - String defaultValue; + final String name; + final String defaultValue; Parameters(String name, String defaultValue) { this.name = name; diff --git a/plugin/src/main/java/com/rosette/elasticsearch/EntitiesProcessor.java b/plugin/src/main/java/com/rosette/elasticsearch/EntitiesProcessor.java index 71fdcf0..bd7bec2 100644 --- a/plugin/src/main/java/com/rosette/elasticsearch/EntitiesProcessor.java +++ b/plugin/src/main/java/com/rosette/elasticsearch/EntitiesProcessor.java @@ -1,59 +1,41 @@ -/* -* Copyright 2017 Basis Technology Corp. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ +/******************************************************************************* + * This data and information is proprietary to, and a valuable trade secret + * of, Basis Technology Corp. It is given in confidence by Basis Technology + * and may only be used as permitted under the license agreement under which + * it has been distributed, and in no other way. + * + * Copyright (c) 2024 Basis Technology Corporation All rights reserved. + * + * The technical data and information provided herein are provided with + * `limited rights', and the computer software provided herein is provided + * with `restricted rights' as those terms are defined in DAR and ASPR + * 7-104.9(a). + * + ******************************************************************************/ package com.rosette.elasticsearch; -import com.basistech.rosette.api.HttpRosetteAPIException; -import com.basistech.rosette.apimodel.DocumentRequest; -import com.basistech.rosette.apimodel.EntitiesOptions; -import com.basistech.rosette.apimodel.NameTranslationRequest; -import com.basistech.rosette.apimodel.NameTranslationResponse; -import com.basistech.rosette.apimodel.SentimentOptions; -import com.basistech.rosette.dm.AnnotatedText; -import com.basistech.rosette.dm.Entity; -import com.basistech.rosette.dm.Mention; import com.basistech.util.LanguageCode; +import com.fasterxml.jackson.databind.JsonNode; +import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.ingest.ConfigurationUtils; import org.elasticsearch.ingest.IngestDocument; import org.elasticsearch.ingest.Processor; -import java.security.AccessController; -import java.security.PrivilegedAction; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.stream.Collectors; - -import static com.basistech.rosette.api.common.AbstractRosetteAPI.ENTITIES_SERVICE_PATH; -import static com.basistech.rosette.api.common.AbstractRosetteAPI.NAME_TRANSLATION_SERVICE_PATH; -import static com.basistech.rosette.api.common.AbstractRosetteAPI.SENTIMENT_SERVICE_PATH; public class EntitiesProcessor extends RosetteAbstractProcessor { - public static final String TYPE = "ros_entities"; - - private static final Logger LOGGER = Loggers.getLogger(EntitiesProcessor.class, EntitiesProcessor.class.getName()); - - private boolean includeOffsets; - private boolean doTranslate; - private LanguageCode translateLanguage; - private boolean doSentiment; + private static final String SERVICE_PATH = "entities"; + private static final Logger LOGGER = LogManager.getLogger(TYPE); + private final boolean includeOffsets; + private final boolean doTranslate; + private final LanguageCode translateLanguage; + private final boolean doSentiment; EntitiesProcessor(RosetteApiWrapper rosAPI, String tag, String description, String inputField, String targetField, boolean includeOffsets, boolean doTranslate, LanguageCode translateLanguage, @@ -63,39 +45,32 @@ public class EntitiesProcessor extends RosetteAbstractProcessor { this.doTranslate = doTranslate; this.translateLanguage = translateLanguage; this.doSentiment = doSentiment; + LOGGER.info("Creating EntitiesProcessor {}", TYPE); } @Override - public void processDocument(String inputText, IngestDocument ingestDocument) throws Exception { - //Need to use the ADM for entities so we get offsets - AnnotatedText adm; - + public void processDocument(String inputText, IngestDocument ingestDocument) throws ElasticsearchException { + JsonNode resp; //If entity level sentiment is desired, use the entity information from the ASCENT call try { //SENTIMENT if (doSentiment) { - DocumentRequest sentrequest = DocumentRequest.builder() - .content(inputText).build(); - adm = AccessController.doPrivileged((PrivilegedAction) () -> - rosAPI.getHttpRosetteAPI().perform(SENTIMENT_SERVICE_PATH, sentrequest) - ); + resp = rosAPI.performDocumentRequest(SentimentProcessor.SERVICE_PATH, inputText, null); } else { - //REX - DocumentRequest entityrequest = DocumentRequest.builder() - .content(inputText).build(); - adm = AccessController.doPrivileged((PrivilegedAction) () -> - rosAPI.getHttpRosetteAPI().perform(ENTITIES_SERVICE_PATH, entityrequest) - ); + resp = rosAPI.performDocumentRequest(SERVICE_PATH, inputText, null); } - } catch (HttpRosetteAPIException ex) { - LOGGER.error(ex.getErrorResponse().getMessage()); - throw new ElasticsearchException(ex.getErrorResponse().getMessage(), ex); + } catch (HttpClientException | HttpServerException ex) { + LOGGER.error(ex.getMessage()); + throw new ElasticsearchException(ex.getMessage(), ex); } - - List> entities = adm.getEntities().stream().map(this::processEntity) - .collect(Collectors.toList()); - - ingestDocument.setFieldValue(targetField, entities); + JsonNode elems = resp.get("entities"); + List> entityList = new ArrayList<>(); + if (elems != null) { + for (JsonNode elem : elems) { + entityList.add(processEntity(elem)); + } + } + ingestDocument.setFieldValue(targetField, entityList); } public static final class Factory implements Processor.Factory { @@ -124,67 +99,56 @@ public Processor create(Map registry, String processo return new EntitiesProcessor(rosAPI, processorTag, processorDescription, inputField, targetField, includeOffsets, doTranslate, translateLanguage, doSentiment); + } } - //Transforms the Entity object into a Map that ES can serialize. - // Calls RNT on each head mention for a translation (if requested) - private HashMap processEntity(Entity entity) { - List mentions = entity.getMentions(); - String headMention = mentions.get(entity.getHeadMentionIndex()).getNormalized(); - String type = entity.getType(); - + private HashMap processEntity(JsonNode entity) { HashMap toReturn = new HashMap<>(); - - toReturn.put("mention", headMention); - toReturn.put("entityId", entity.getEntityId()); + String type = entity.get("type").asText(); + String normalized = entity.get("normalized").asText(); + toReturn.put("mention", normalized); + toReturn.put("entityId", entity.get("entityId").asText()); toReturn.put("type", type); - toReturn.put("count", mentions.size()); + toReturn.put("count", entity.get("count").asInt()); if (includeOffsets) { List> offsets = new ArrayList<>(); - for (Mention mention : mentions) { - Map offsetMap = new HashMap<>(); - offsetMap.put("start", mention.getStartOffset()); - offsetMap.put("end", mention.getEndOffset()); - offsets.add(offsetMap); + if (entity.get("mentionOffsets") != null) { + for (JsonNode mention : entity.get("mentionOffsets")) { + Map offsetMap = new HashMap<>(); + offsetMap.put("start", mention.get("startOffset").asInt()); + offsetMap.put("end", mention.get("endOffset").asInt()); + offsets.add(offsetMap); + } } toReturn.put("offsets", offsets); } - - //RNT + if (doSentiment) { + JsonNode sentiment = entity.get("sentiment"); + if (sentiment != null) { + toReturn.put("sentiment", sentiment.get("label").asText()); + } + } if (doTranslate && ("PERSON".equalsIgnoreCase(type) || "LOCATION".equalsIgnoreCase(type) || "ORGANIZATION".equalsIgnoreCase(type))) { - NameTranslationRequest rntrequest = NameTranslationRequest.builder() - .name(headMention) - .targetLanguage(translateLanguage) - .entityType(type) - .build(); - NameTranslationResponse rntresponse; try { - rntresponse = AccessController.doPrivileged((PrivilegedAction) () -> - rosAPI.getHttpRosetteAPI().perform(NAME_TRANSLATION_SERVICE_PATH, rntrequest, - NameTranslationResponse.class) - ); - } catch (HttpRosetteAPIException ex) { - LOGGER.error(ex.getErrorResponse().getMessage()); - throw new ElasticsearchException(ex.getErrorResponse().getMessage(), ex); + JsonNode rntresponse = rosAPI.performNameTranslationRequest(NameTranslationProcessor.SERVICE_PATH, + normalized, translateLanguage, type, null, null, null, null); + toReturn.put("translation", rntresponse.get("translation").asText()); + } catch (HttpClientException | HttpServerException ex) { + LOGGER.error(ex.getMessage()); + throw new ElasticsearchException(ex.getMessage(), ex); } - toReturn.put("translation", rntresponse.getTranslation()); } - - if (entity.getSentiment() != null) { - toReturn.put("sentiment", entity.getSentiment().get(0).getLabel()); - } - return toReturn; } enum Parameters { - TARGET_FIELD("target_field", "ros_entities"), + TARGET_FIELD("target_field", TYPE), OFFSETS("include_offsets", "false"), TRANSLATE("include_translation", "false"), TRANSLATE_LANGUAGE("translation_language", "eng"), @@ -199,3 +163,4 @@ enum Parameters { } } } + diff --git a/plugin/src/main/java/com/rosette/elasticsearch/HttpClientException.java b/plugin/src/main/java/com/rosette/elasticsearch/HttpClientException.java new file mode 100644 index 0000000..99cb48f --- /dev/null +++ b/plugin/src/main/java/com/rosette/elasticsearch/HttpClientException.java @@ -0,0 +1,26 @@ +/******************************************************************************* + * This data and information is proprietary to, and a valuable trade secret + * of, Basis Technology Corp. It is given in confidence by Basis Technology + * and may only be used as permitted under the license agreement under which + * it has been distributed, and in no other way. + * + * Copyright (c) 2024 Basis Technology Corporation All rights reserved. + * + * The technical data and information provided herein are provided with + * `limited rights', and the computer software provided herein is provided + * with `restricted rights' as those terms are defined in DAR and ASPR + * 7-104.9(a). + * + ******************************************************************************/ + +package com.rosette.elasticsearch; + +public class HttpClientException extends Exception { + public HttpClientException(String message) { + super(message); + } + + public HttpClientException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/plugin/src/main/java/com/rosette/elasticsearch/HttpClientRuntimeException.java b/plugin/src/main/java/com/rosette/elasticsearch/HttpClientRuntimeException.java new file mode 100644 index 0000000..9c4717f --- /dev/null +++ b/plugin/src/main/java/com/rosette/elasticsearch/HttpClientRuntimeException.java @@ -0,0 +1,22 @@ +/******************************************************************************* + * This data and information is proprietary to, and a valuable trade secret + * of, Basis Technology Corp. It is given in confidence by Basis Technology + * and may only be used as permitted under the license agreement under which + * it has been distributed, and in no other way. + * + * Copyright (c) 2024 Basis Technology Corporation All rights reserved. + * + * The technical data and information provided herein are provided with + * `limited rights', and the computer software provided herein is provided + * with `restricted rights' as those terms are defined in DAR and ASPR + * 7-104.9(a). + * + ******************************************************************************/ + +package com.rosette.elasticsearch; + +public class HttpClientRuntimeException extends RuntimeException { + public HttpClientRuntimeException(String msg) { + super(msg); + } +} diff --git a/plugin/src/main/java/com/rosette/elasticsearch/HttpServerException.java b/plugin/src/main/java/com/rosette/elasticsearch/HttpServerException.java new file mode 100644 index 0000000..02cf0b9 --- /dev/null +++ b/plugin/src/main/java/com/rosette/elasticsearch/HttpServerException.java @@ -0,0 +1,26 @@ +/******************************************************************************* + * This data and information is proprietary to, and a valuable trade secret + * of, Basis Technology Corp. It is given in confidence by Basis Technology + * and may only be used as permitted under the license agreement under which + * it has been distributed, and in no other way. + * + * Copyright (c) 2024 Basis Technology Corporation All rights reserved. + * + * The technical data and information provided herein are provided with + * `limited rights', and the computer software provided herein is provided + * with `restricted rights' as those terms are defined in DAR and ASPR + * 7-104.9(a). + * + ******************************************************************************/ + +package com.rosette.elasticsearch; + +public class HttpServerException extends Exception { + public HttpServerException(String message) { + super(message); + } + + public HttpServerException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/plugin/src/main/java/com/rosette/elasticsearch/LanguageProcessor.java b/plugin/src/main/java/com/rosette/elasticsearch/LanguageProcessor.java index 82ea517..5e13532 100644 --- a/plugin/src/main/java/com/rosette/elasticsearch/LanguageProcessor.java +++ b/plugin/src/main/java/com/rosette/elasticsearch/LanguageProcessor.java @@ -1,76 +1,70 @@ -/* -* Copyright 2017 Basis Technology Corp. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ +/******************************************************************************* + * This data and information is proprietary to, and a valuable trade secret + * of, Basis Technology Corp. It is given in confidence by Basis Technology + * and may only be used as permitted under the license agreement under which + * it has been distributed, and in no other way. + * + * Copyright (c) 2024 Basis Technology Corporation All rights reserved. + * + * The technical data and information provided herein are provided with + * `limited rights', and the computer software provided herein is provided + * with `restricted rights' as those terms are defined in DAR and ASPR + * 7-104.9(a). + * + ******************************************************************************/ package com.rosette.elasticsearch; -import com.basistech.rosette.api.HttpRosetteAPIException; -import com.basistech.rosette.apimodel.DocumentRequest; -import com.basistech.rosette.apimodel.LanguageOptions; -import com.basistech.rosette.apimodel.LanguageResponse; +import com.fasterxml.jackson.databind.JsonNode; +import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.ingest.ConfigurationUtils; import org.elasticsearch.ingest.IngestDocument; import org.elasticsearch.ingest.Processor; -import java.security.AccessController; -import java.security.PrivilegedAction; import java.util.Map; -import static com.basistech.rosette.api.common.AbstractRosetteAPI.LANGUAGE_SERVICE_PATH; - public class LanguageProcessor extends RosetteAbstractProcessor { - + public static final String LANGUAGE_SERVICE_PATH = "language"; public static final String TYPE = "ros_language"; - - private static final Logger LOGGER = Loggers.getLogger(LanguageProcessor.class, LanguageProcessor.class.getName()); + private static final Logger LOGGER = LogManager.getLogger(LanguageProcessor.class); LanguageProcessor(RosetteApiWrapper rosAPI, String tag, String description, String inputField, String targetField) { super(rosAPI, tag, description, TYPE, inputField, targetField); } @Override - public void processDocument(String inputText, IngestDocument ingestDocument) throws Exception { - // call /language endpoint and set the result in the field - DocumentRequest request = DocumentRequest.builder() - .content(inputText).build(); - LanguageResponse response; + public void processDocument(String inputText, IngestDocument ingestDocument) { + boolean guessedLanguage = true; try { - // RosApi client binding's Jackson needs elevated privilege - response = AccessController.doPrivileged((PrivilegedAction) () -> - rosAPI.getHttpRosetteAPI().perform(LANGUAGE_SERVICE_PATH, request, - LanguageResponse.class) - ); - } catch (HttpRosetteAPIException ex) { - LOGGER.error(ex.getErrorResponse().getMessage()); - throw new ElasticsearchException(ex.getErrorResponse().getMessage(), ex); - } - - if (response.getLanguageDetections() != null - && !response.getLanguageDetections().isEmpty() - && response.getLanguageDetections().get(0) != null - && response.getLanguageDetections().get(0).getLanguage() != null) { - ingestDocument.setFieldValue(targetField, response.getLanguageDetections().get(0).getLanguage().ISO639_3()); - } else { - throw new ElasticsearchException(TYPE + " ingest processor failed to guess language of document."); + JsonNode resp = rosAPI.performDocumentRequest(LANGUAGE_SERVICE_PATH, inputText, null); + JsonNode detections = resp.get("languageDetections"); + if (detections != null) { + JsonNode detection = detections.get(0); + if (detection != null) { + JsonNode language = detection.get("language"); + if (language != null) { + ingestDocument.setFieldValue(targetField, language.asText()); + } else { + guessedLanguage = false; + } + } else { + guessedLanguage = false; + } + } else { + guessedLanguage = false; + } + if (!guessedLanguage) { + throw new ElasticsearchException(TYPE + " ingest processor failed to guess language of document."); + } + } catch (HttpClientException | HttpServerException ex) { + LOGGER.error(ex.getMessage()); + throw new ElasticsearchException(ex.getMessage(), ex); } } public static final class Factory implements Processor.Factory { - private RosetteApiWrapper rosAPI; + private final RosetteApiWrapper rosAPI; Factory(RosetteApiWrapper rosAPI) { this.rosAPI = rosAPI; @@ -78,7 +72,7 @@ public static final class Factory implements Processor.Factory { @Override public Processor create(Map registry, String processorTag, - String processorDescription, Map config) throws Exception { + String processorDescription, Map config) { String inputField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field"); String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, Parameters.TARGET_FIELD.name, Parameters.TARGET_FIELD.defaultValue); @@ -87,10 +81,10 @@ public Processor create(Map registry, String processo } enum Parameters { - TARGET_FIELD("target_field", "ros_language"); + TARGET_FIELD("target_field", TYPE); - String name; - String defaultValue; + final String name; + final String defaultValue; Parameters(String name, String defaultValue) { this.name = name; diff --git a/plugin/src/main/java/com/rosette/elasticsearch/NameTranslationProcessor.java b/plugin/src/main/java/com/rosette/elasticsearch/NameTranslationProcessor.java index 63f0e41..e460f38 100644 --- a/plugin/src/main/java/com/rosette/elasticsearch/NameTranslationProcessor.java +++ b/plugin/src/main/java/com/rosette/elasticsearch/NameTranslationProcessor.java @@ -1,50 +1,42 @@ -/* -* Copyright 2017 Basis Technology Corp. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ +/******************************************************************************* + * This data and information is proprietary to, and a valuable trade secret + * of, Basis Technology Corp. It is given in confidence by Basis Technology + * and may only be used as permitted under the license agreement under which + * it has been distributed, and in no other way. + * + * Copyright (c) 2024 Basis Technology Corporation All rights reserved. + * + * The technical data and information provided herein are provided with + * `limited rights', and the computer software provided herein is provided + * with `restricted rights' as those terms are defined in DAR and ASPR + * 7-104.9(a). + * + ******************************************************************************/ package com.rosette.elasticsearch; -import com.basistech.rosette.api.HttpRosetteAPIException; -import com.basistech.rosette.api.common.AbstractRosetteAPI; -import com.basistech.rosette.apimodel.NameTranslationRequest; -import com.basistech.rosette.apimodel.NameTranslationResponse; import com.basistech.util.ISO15924; import com.basistech.util.LanguageCode; +import com.fasterxml.jackson.databind.JsonNode; +import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.ingest.ConfigurationUtils; import org.elasticsearch.ingest.IngestDocument; import org.elasticsearch.ingest.Processor; -import java.security.AccessController; -import java.security.PrivilegedAction; import java.util.Map; public class NameTranslationProcessor extends RosetteAbstractProcessor { - public static final String TYPE = "ros_name_translation"; + public static final String SERVICE_PATH = "name-translation"; + private static final Logger LOGGER = LogManager.getLogger(TYPE); - private static final Logger LOGGER = Loggers - .getLogger(NameTranslationProcessor.class, NameTranslationProcessor.class.getName()); - - private LanguageCode targetLanguage; - private ISO15924 targetScript; - private String entityType; - private LanguageCode sourceLanguage; - private ISO15924 sourceScript; - private LanguageCode sourceOrigin; + private final LanguageCode targetLanguage; + private final ISO15924 targetScript; + private final String entityType; + private final LanguageCode sourceLanguage; + private final ISO15924 sourceScript; + private final LanguageCode sourceOrigin; NameTranslationProcessor(RosetteApiWrapper rosAPI, String tag, String description, String inputField, String targetField, LanguageCode targetLanguage, ISO15924 targetScript, String entityType, @@ -59,34 +51,20 @@ public class NameTranslationProcessor extends RosetteAbstractProcessor { } @Override - public void processDocument(String inputText, IngestDocument ingestDocument) throws Exception { + public void processDocument(String inputText, IngestDocument ingestDocument) { // call /name-translation endpoint and set the result in the field - NameTranslationRequest request = NameTranslationRequest.builder() - .name(inputText) - .targetLanguage(targetLanguage) - .entityType(entityType) - .targetScript(targetScript) - .sourceLanguageOfUse(sourceLanguage) - .sourceLanguageOfOrigin(sourceOrigin) - .sourceScript(sourceScript).build(); - - NameTranslationResponse response; try { - // RosApi client binding's Jackson needs elevated privilege - response = AccessController.doPrivileged((PrivilegedAction) () -> - rosAPI.getHttpRosetteAPI().perform(AbstractRosetteAPI.NAME_TRANSLATION_SERVICE_PATH, request, - NameTranslationResponse.class) - ); - } catch (HttpRosetteAPIException ex) { - LOGGER.error(ex.getErrorResponse().getMessage()); - throw new ElasticsearchException(ex.getErrorResponse().getMessage(), ex); + JsonNode resp = rosAPI.performNameTranslationRequest(SERVICE_PATH, inputText, + targetLanguage, entityType, targetScript, sourceLanguage, sourceOrigin, sourceScript); + ingestDocument.setFieldValue(targetField, resp.get("translation").asText()); + } catch (HttpClientException | HttpServerException ex) { + LOGGER.error(ex.getMessage()); + throw new ElasticsearchException(ex.getMessage(), ex); } - - ingestDocument.setFieldValue(targetField, response.getTranslation()); } public static final class Factory implements Processor.Factory { - private RosetteApiWrapper rosAPI; + private final RosetteApiWrapper rosAPI; Factory(RosetteApiWrapper rosAPI) { this.rosAPI = rosAPI; @@ -94,7 +72,7 @@ public static final class Factory implements Processor.Factory { @Override public Processor create(Map registry, String processorTag, - String processorDescription, Map config) throws Exception { + String processorDescription, Map config) { String inputField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field"); String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, @@ -132,8 +110,8 @@ enum Parameters { SOURCE_SCRIPT("source_script", "Zyyy"), SOURCE_LANGUAGE_ORIGIN("source_language_of_origin", "xxx"); - String name; - String defaultValue; + final String name; + final String defaultValue; Parameters(String name, String defaultValue) { this.name = name; diff --git a/plugin/src/main/java/com/rosette/elasticsearch/RosetteAbstractProcessor.java b/plugin/src/main/java/com/rosette/elasticsearch/RosetteAbstractProcessor.java index d0879cd..5cf5700 100644 --- a/plugin/src/main/java/com/rosette/elasticsearch/RosetteAbstractProcessor.java +++ b/plugin/src/main/java/com/rosette/elasticsearch/RosetteAbstractProcessor.java @@ -1,30 +1,34 @@ -/* -* Copyright 2020 Basis Technology Corp. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ +/******************************************************************************* + * This data and information is proprietary to, and a valuable trade secret + * of, Basis Technology Corp. It is given in confidence by Basis Technology + * and may only be used as permitted under the license agreement under which + * it has been distributed, and in no other way. + * + * Copyright (c) 2024 Basis Technology Corporation All rights reserved. + * + * The technical data and information provided herein are provided with + * `limited rights', and the computer software provided herein is provided + * with `restricted rights' as those terms are defined in DAR and ASPR + * 7-104.9(a). + * + ******************************************************************************/ package com.rosette.elasticsearch; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.SpecialPermission; import org.elasticsearch.common.Strings; import org.elasticsearch.ingest.AbstractProcessor; import org.elasticsearch.ingest.IngestDocument; +import java.io.Closeable; +import java.io.IOException; + /** * Class that holds code shared by all Rosette ingest processors */ -public abstract class RosetteAbstractProcessor extends AbstractProcessor { +public abstract class RosetteAbstractProcessor extends AbstractProcessor implements Closeable { + private static final Logger LOGGER = LogManager.getLogger("RosetteAbstractProcessor"); protected String inputField; protected String targetField; @@ -40,6 +44,14 @@ public abstract class RosetteAbstractProcessor extends AbstractProcessor { this.processorType = processorType; } + @Override + public void close() throws IOException { + LOGGER.info("Closing Rosette API client"); + if (rosAPI != null) { + rosAPI.close(); + } + } + @Override public IngestDocument execute(IngestDocument ingestDocument) throws Exception { if (ingestDocument.hasField(targetField)) { @@ -52,17 +64,10 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception { } String inputText = ingestDocument.getFieldValue(inputField, String.class); - if (Strings.isNullOrEmpty(inputText)) { //Do nothing return ingestDocument; } - - SecurityManager sm = System.getSecurityManager(); - if (sm != null) { - sm.checkPermission(new SpecialPermission()); - } - processDocument(inputText, ingestDocument); return ingestDocument; } diff --git a/plugin/src/main/java/com/rosette/elasticsearch/RosetteApiWrapper.java b/plugin/src/main/java/com/rosette/elasticsearch/RosetteApiWrapper.java index f2b91fc..2918dc1 100644 --- a/plugin/src/main/java/com/rosette/elasticsearch/RosetteApiWrapper.java +++ b/plugin/src/main/java/com/rosette/elasticsearch/RosetteApiWrapper.java @@ -1,37 +1,67 @@ -/* -* Copyright 2017 Basis Technology Corp. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ +/******************************************************************************* + * This data and information is proprietary to, and a valuable trade secret + * of, Basis Technology Corp. It is given in confidence by Basis Technology + * and may only be used as permitted under the license agreement under which + * it has been distributed, and in no other way. + * + * Copyright (c) 2024 Basis Technology Corporation All rights reserved. + * + * The technical data and information provided herein are provided with + * `limited rights', and the computer software provided herein is provided + * with `restricted rights' as those terms are defined in DAR and ASPR + * 7-104.9(a). + * + ******************************************************************************/ package com.rosette.elasticsearch; -import java.io.IOException; -import java.io.InputStream; -import java.util.Properties; - +import com.basistech.util.ISO15924; +import com.basistech.util.LanguageCode; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.hc.client5.http.classic.methods.HttpPost; +import org.apache.hc.client5.http.impl.classic.CloseableHttpClient; +import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse; +import org.apache.hc.client5.http.impl.classic.HttpClientBuilder; +import org.apache.hc.client5.http.impl.classic.HttpClients; +import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManager; +import org.apache.hc.core5.http.ContentType; +import org.apache.hc.core5.http.Header; +import org.apache.hc.core5.http.HttpResponse; +import org.apache.hc.core5.http.HttpStatus; +import org.apache.hc.core5.http.ParseException; +import org.apache.hc.core5.http.io.entity.EntityUtils; +import org.apache.hc.core5.http.io.entity.StringEntity; +import org.apache.hc.core5.http.message.BasicHeader; +import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.Strings; -import com.basistech.rosette.api.HttpRosetteAPI; -import org.elasticsearch.common.logging.Loggers; +import java.io.Closeable; +import java.io.IOException; +import java.io.InputStream; +import java.security.AccessController; +import java.security.PrivilegedAction; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; //Configures and holds on to the shared Rosette API client -public final class RosetteApiWrapper { - - private static final Logger LOGGER = Loggers.getLogger(RosetteApiWrapper.class, RosetteApiWrapper.class.getName()); +public final class RosetteApiWrapper implements Closeable { + private static final Logger LOGGER = LogManager.getLogger("RosetteApiWrapper"); + private static final int MAX_CONNECTIONS = 10; + private static final int MAX_CONNECTIONS_PER_ROUTE = 10; + private static final String DEFAULT_URL_BASE = "https://api.rosette.com/rest/v1"; private static final String APP_HEADER; + private final String targetURL; + private CloseableHttpClient httpClient; + private boolean closed = true; + private final ObjectMapper objectMapper; + static { Properties props = new Properties(); String appHeader = ""; @@ -50,39 +80,176 @@ public final class RosetteApiWrapper { } } - // TODO: revisit this when we use embedded client - private HttpRosetteAPI httpRosetteAPI; - RosetteApiWrapper() { this(null, null); } RosetteApiWrapper(String apiKey, String altUrl) { + LOGGER.info("Creating Rosette API client"); + objectMapper = new ObjectMapper(); + PoolingHttpClientConnectionManager connectionManager = new PoolingHttpClientConnectionManager(); + connectionManager.setMaxTotal(MAX_CONNECTIONS); + connectionManager.setDefaultMaxPerRoute(MAX_CONNECTIONS_PER_ROUTE); + + HttpClientBuilder builder = HttpClients.custom(); + Collection
defaultHeaders = new ArrayList<>(); + defaultHeaders.add(new BasicHeader("Accept", "application/json")); + defaultHeaders.add(new BasicHeader("Content-Type", "application/json")); + defaultHeaders.add(new BasicHeader("X-RosetteAPI-App", APP_HEADER)); + builder.setDefaultHeaders(defaultHeaders); + builder.setConnectionManager(connectionManager); + if (Strings.isNullOrEmpty(apiKey)) { apiKey = System.getenv("ROSETTE_API_KEY"); } - if (Strings.isNullOrEmpty(altUrl)) { altUrl = System.getenv("ROSETTE_API_URL"); } - - if ((HttpRosetteAPI.DEFAULT_URL_BASE.equalsIgnoreCase(altUrl) || Strings.isNullOrEmpty(altUrl)) + if (!Strings.isNullOrEmpty(altUrl)) { + LOGGER.info("Using URL for Rosette API: {} ", altUrl); + if (altUrl.charAt(altUrl.length() - 1) == '/') { + altUrl = altUrl.substring(0, altUrl.length() - 1); + } + targetURL = altUrl; + } else { + LOGGER.info("Using default URL for Rosette API at : {} ", DEFAULT_URL_BASE); + targetURL = DEFAULT_URL_BASE; + } + if ((DEFAULT_URL_BASE.equalsIgnoreCase(altUrl) || Strings.isNullOrEmpty(altUrl)) && Strings.isNullOrEmpty(apiKey)) { throw new ElasticsearchException("Rosette plugin requires setting an API Key either via the '" + RosetteTextAnalysisPlugin.ROSETTE_API_KEY.getKey() + "' setting, or the 'ROSETTE_API_KEY' environment variable."); } + if (apiKey != null) { + defaultHeaders.add(new BasicHeader("X-RosetteAPI-Key", apiKey)); + } + httpClient = builder.build(); + closed = false; + } - HttpRosetteAPI.Builder clientBuilder = new HttpRosetteAPI.Builder(); - clientBuilder.key(apiKey).additionalHeader("X-RosetteAPI-App", APP_HEADER); - if (!Strings.isNullOrEmpty(altUrl)) { - LOGGER.info("Using alternative URL for Rosette API at : {} ", altUrl); - clientBuilder.url(altUrl); + + @Override + public void close() throws IOException { + if (!closed && httpClient != null) { + LOGGER.info("Closing Rosette API client"); + httpClient.close(); + httpClient = null; + closed = true; + } + } + private JsonNode makePostRequest(HttpPost request) throws HttpClientException, HttpServerException { + LOGGER.info("Calling Rosette API with request: {}", request.getRequestUri()); + CloseableHttpResponse response = null; + try { + response = + AccessController.doPrivileged( + (PrivilegedAction) () -> { + try { + LOGGER.info("Executing request: {}", request.getRequestUri()); + return httpClient.execute(request); + } catch (IOException e) { + throw new HttpClientRuntimeException(e.getMessage()); + } + }); + LOGGER.info("Response code: {}", response.getCode()); + checkResponseCode(response); + // Parse the JSON string into a JsonNode + return objectMapper.readTree(EntityUtils.toString(response.getEntity())); + } catch (HttpClientRuntimeException rte) { + LOGGER.error("Error connecting to Rosette API", rte); + throw new ElasticsearchException("Error connecting to Rosette API"); + } catch (IOException | ParseException e) { + LOGGER.error("Error while performing request to name-translation Rosette API ", e); + throw new ElasticsearchException("Error while performing request to name-translation Rosette API", e); + } finally { + request.clear(); + try { + if (response != null) { + response.close(); + } + } catch (IOException e) { + LOGGER.error("Error while closing response and client", e); + } + } + } + public JsonNode performNameTranslationRequest(String path, String name, LanguageCode targetLanguage, + String entityType, ISO15924 targetScript, + LanguageCode sourceLanguageOfUse, LanguageCode sourceLanguageOfOrigin, + ISO15924 sourceScript) + throws HttpClientException, HttpServerException { + if (!path.startsWith("/")) { + path = "/".concat(path); + } + LOGGER.info("Performing name translation request for name: {} to {}", + name, targetLanguage != null ? targetLanguage.languageName() : "unknown"); + Map bodyMap = new HashMap<>(); + bodyMap.put("name", name); + if (entityType != null) { + bodyMap.put("entityType", entityType); + } + if (sourceScript != null && sourceScript != ISO15924.Zyyy) { + bodyMap.put("sourceScript", sourceScript.code4()); + } + if (sourceLanguageOfUse != null) { + bodyMap.put("sourceLanguageOfUse", sourceLanguageOfUse.ISO639_3()); + } + if (sourceLanguageOfOrigin != null) { + bodyMap.put("sourceLanguageOfOrigin", sourceLanguageOfOrigin.ISO639_3()); + } + if (targetLanguage != null) { + bodyMap.put("targetLanguage", targetLanguage.ISO639_3()); + } + if (targetScript != null && sourceScript != ISO15924.Zyyy) { + bodyMap.put("targetScript", targetScript.code4()); + } + try (StringEntity payload = + new StringEntity(objectMapper.writeValueAsString(bodyMap), ContentType.APPLICATION_JSON)) { + HttpPost request = new HttpPost(targetURL + path); + request.setEntity(payload); + return makePostRequest(request); + } catch (JsonProcessingException e) { + LOGGER.error("Error generating content for name translation request", e); + } catch (IOException e) { + LOGGER.error("Error while closing payload", e); } - httpRosetteAPI = clientBuilder.build(); + return null; } - public HttpRosetteAPI getHttpRosetteAPI() { - return httpRosetteAPI; + public JsonNode performDocumentRequest(String path, String content, String language) + throws HttpClientException, HttpServerException { + if (!path.startsWith("/")) { + path = "/".concat(path); + } + LOGGER.info("Performing document request for path: {}", path); + + Map bodyMap = new HashMap<>(); + bodyMap.put("content", content); + if (language != null) { + bodyMap.put("language", language); + } + try (StringEntity payload = + new StringEntity(objectMapper.writeValueAsString(bodyMap), ContentType.APPLICATION_JSON)) { + HttpPost docRequest = new HttpPost(targetURL + path); + docRequest.setEntity(payload); + return makePostRequest(docRequest); + } catch (IOException e) { + LOGGER.error("Error while closing payload", e); + } + return null; } + + private void checkResponseCode(HttpResponse response) throws HttpClientException, HttpServerException { + if (response.getCode() == HttpStatus.SC_OK) { + return; + } + int code = response.getCode(); + if (code >= 400 && code < 500) { + throw new HttpClientException(response.getReasonPhrase()); + } + if (code >= 500 && code < 600) { + throw new HttpServerException(response.getReasonPhrase()); + } + } + } diff --git a/plugin/src/main/java/com/rosette/elasticsearch/RosetteTextAnalysisPlugin.java b/plugin/src/main/java/com/rosette/elasticsearch/RosetteTextAnalysisPlugin.java index 2d76e7a..b223c30 100644 --- a/plugin/src/main/java/com/rosette/elasticsearch/RosetteTextAnalysisPlugin.java +++ b/plugin/src/main/java/com/rosette/elasticsearch/RosetteTextAnalysisPlugin.java @@ -1,50 +1,61 @@ -/* -* Copyright 2017 Basis Technology Corp. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ +/******************************************************************************* + * This data and information is proprietary to, and a valuable trade secret + * of, Basis Technology Corp. It is given in confidence by Basis Technology + * and may only be used as permitted under the license agreement under which + * it has been distributed, and in no other way. + * + * Copyright (c) 2024 Basis Technology Corporation All rights reserved. + * + * The technical data and information provided herein are provided with + * `limited rights', and the computer software provided herein is provided + * with `restricted rights' as those terms are defined in DAR and ASPR + * 7-104.9(a). + * + ******************************************************************************/ package com.rosette.elasticsearch; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.ingest.Processor; import org.elasticsearch.plugins.IngestPlugin; import org.elasticsearch.plugins.MapperPlugin; import org.elasticsearch.plugins.Plugin; -public class RosetteTextAnalysisPlugin extends Plugin implements MapperPlugin, IngestPlugin { +import java.io.IOException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +public class RosetteTextAnalysisPlugin extends Plugin implements AutoCloseable, MapperPlugin, IngestPlugin { public static final Setting ROSETTE_API_KEY = Setting.simpleString("ingest.rosette.api_key", Setting.Property.NodeScope, Setting.Property.Filtered); public static final Setting ROSETTE_API_URL = Setting.simpleString("ingest.rosette.api_url", Setting.Property.NodeScope, Setting.Property.Filtered); + private static final Logger LOGGER = LogManager.getLogger("RosetteAnalysisPlugin"); + private RosetteApiWrapper rosAPI; @Override public List> getSettings() { return Arrays.asList(ROSETTE_API_KEY, ROSETTE_API_URL); } - + @Override + public void close() throws IOException { + LOGGER.info("Closing Rosette API client"); + if (rosAPI != null) { + rosAPI.close(); + } + } @Override public Map getProcessors(Processor.Parameters parameters) { String key = ROSETTE_API_KEY.get(parameters.env.settings()); String altURL = ROSETTE_API_URL.get(parameters.env.settings()); //As this method is called at Node startup, this should ensure only one instance of the api client - RosetteApiWrapper rosAPI = new RosetteApiWrapper(key, altURL); - + if (rosAPI == null) { + LOGGER.info("Creating Rosette API client"); + rosAPI = new RosetteApiWrapper(key, altURL); + } Map processors = new HashMap<>(); processors.put(LanguageProcessor.TYPE, new LanguageProcessor.Factory(rosAPI)); processors.put(CategoriesProcessor.TYPE, new CategoriesProcessor.Factory(rosAPI)); diff --git a/plugin/src/main/java/com/rosette/elasticsearch/SentimentProcessor.java b/plugin/src/main/java/com/rosette/elasticsearch/SentimentProcessor.java index e33c0dd..2b0a696 100644 --- a/plugin/src/main/java/com/rosette/elasticsearch/SentimentProcessor.java +++ b/plugin/src/main/java/com/rosette/elasticsearch/SentimentProcessor.java @@ -1,44 +1,33 @@ -/* -* Copyright 2017 Basis Technology Corp. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ +/******************************************************************************* + * This data and information is proprietary to, and a valuable trade secret + * of, Basis Technology Corp. It is given in confidence by Basis Technology + * and may only be used as permitted under the license agreement under which + * it has been distributed, and in no other way. + * + * Copyright (c) 2024 Basis Technology Corporation All rights reserved. + * + * The technical data and information provided herein are provided with + * `limited rights', and the computer software provided herein is provided + * with `restricted rights' as those terms are defined in DAR and ASPR + * 7-104.9(a). + * + ******************************************************************************/ package com.rosette.elasticsearch; -import com.basistech.rosette.api.HttpRosetteAPIException; -import com.basistech.rosette.apimodel.DocumentRequest; -import com.basistech.rosette.apimodel.SentimentOptions; -import com.basistech.rosette.apimodel.SentimentResponse; +import com.fasterxml.jackson.databind.JsonNode; +import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.common.Strings; -import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.ingest.ConfigurationUtils; import org.elasticsearch.ingest.IngestDocument; import org.elasticsearch.ingest.Processor; -import java.security.AccessController; -import java.security.PrivilegedAction; import java.util.Map; -import static com.basistech.rosette.api.common.AbstractRosetteAPI.SENTIMENT_SERVICE_PATH; - public class SentimentProcessor extends RosetteAbstractProcessor { - public static final String TYPE = "ros_sentiment"; - - private static final Logger LOGGER = Loggers.getLogger(SentimentProcessor.class, - SentimentProcessor.class.getName()); + public static final String SERVICE_PATH = "sentiment"; + private static final Logger LOGGER = LogManager.getLogger(TYPE); SentimentProcessor(RosetteApiWrapper rosAPI, String tag, String description, String inputField, String targetField) { @@ -46,32 +35,30 @@ public class SentimentProcessor extends RosetteAbstractProcessor { } @Override - public void processDocument(String inputText, IngestDocument ingestDocument) throws Exception { + public void processDocument(String inputText, IngestDocument ingestDocument) { // call /sentiment endpoint and set the top result in the field - DocumentRequest request = DocumentRequest.builder() - .content(inputText).build(); - SentimentResponse response; try { - // RosApi client binding's Jackson needs elevated privilege - response = AccessController.doPrivileged((PrivilegedAction) () -> - rosAPI.getHttpRosetteAPI().perform(SENTIMENT_SERVICE_PATH, request, - SentimentResponse.class) - ); - } catch (HttpRosetteAPIException ex) { - LOGGER.error(ex.getErrorResponse().getMessage()); - throw new ElasticsearchException(ex.getErrorResponse().getMessage(), ex); - } - - if (response.getDocument() != null - && !Strings.isNullOrEmpty(response.getDocument().getLabel())) { - ingestDocument.setFieldValue(targetField, response.getDocument().getLabel()); - } else { - throw new ElasticsearchException(TYPE + " ingest processor failed to determine sentiment of document."); + JsonNode resp = rosAPI.performDocumentRequest(SERVICE_PATH, inputText, null); + JsonNode document = resp.get("document"); + if (document != null) { + JsonNode label = document.get("label"); + if (label != null) { + ingestDocument.setFieldValue(targetField, label.asText()); + } else { + throw new ElasticsearchException(TYPE + + " ingest processor failed to determine sentiment of document."); + } + } else { + throw new ElasticsearchException(TYPE + " ingest processor failed to determine sentiment of document."); + } + } catch (HttpClientException | HttpServerException ex) { + LOGGER.error(ex.getMessage()); + throw new ElasticsearchException(ex.getMessage(), ex); } } public static final class Factory implements Processor.Factory { - private RosetteApiWrapper rosAPI; + private final RosetteApiWrapper rosAPI; Factory(RosetteApiWrapper rosAPI) { this.rosAPI = rosAPI; @@ -79,7 +66,7 @@ public static final class Factory implements Processor.Factory { @Override public Processor create(Map registry, String processorTag, - String processorDescription, Map config) throws Exception { + String processorDescription, Map config) { String inputField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field"); String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, Parameters.TARGET_FIELD.name, Parameters.TARGET_FIELD.defaultValue); @@ -88,10 +75,10 @@ public Processor create(Map registry, String processo } enum Parameters { - TARGET_FIELD("target_field", "ros_sentiment"); + TARGET_FIELD("target_field", TYPE); - String name; - String defaultValue; + final String name; + final String defaultValue; Parameters(String name, String defaultValue) { this.name = name; diff --git a/plugin/src/main/resources/log4j2.xml b/plugin/src/main/resources/log4j2.xml new file mode 100644 index 0000000..4d6b6c5 --- /dev/null +++ b/plugin/src/main/resources/log4j2.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/plugin/src/main/resources/plugin-descriptor.properties b/plugin/src/main/resources/plugin-descriptor.properties index 2838f17..9c2a813 100644 --- a/plugin/src/main/resources/plugin-descriptor.properties +++ b/plugin/src/main/resources/plugin-descriptor.properties @@ -1,6 +1,21 @@ +################################################################################ +# This data and information is proprietary to, and a valuable trade secret +# of, Basis Technology Corp. It is given in confidence by Basis Technology +# and may only be used as permitted under the license agreement under which +# it has been distributed, and in no other way. +# +# Copyright (c) 2024 Basis Technology Corporation All rights reserved. +# +# The technical data and information provided herein are provided with +# `limited rights', and the computer software provided herein is provided +# with `restricted rights' as those terms are defined in DAR and ASPR +# 7-104.9(a). +# +################################################################################ + description=${project.description} version=${project.version} name=rosapi classname=com.rosette.elasticsearch.RosetteTextAnalysisPlugin -java.version=${jdk.version} +java.version=${maven.compiler.source} elasticsearch.version=${elasticsearch.version} diff --git a/plugin/src/main/resources/plugin-security.policy b/plugin/src/main/resources/plugin-security.policy index 34fde7f..4cf47b8 100644 --- a/plugin/src/main/resources/plugin-security.policy +++ b/plugin/src/main/resources/plugin-security.policy @@ -1,5 +1,3 @@ grant { - permission java.lang.RuntimePermission "accessDeclaredMembers"; - permission java.lang.reflect.ReflectPermission "suppressAccessChecks"; - permission java.net.SocketPermission "*", "connect,resolve"; -}; + permission java.net.SocketPermission "*", "accept,connect,resolve"; +}; \ No newline at end of file diff --git a/plugin/src/test/java/com/rosette/elasticsearch/CategoriesProcessorTest.java b/plugin/src/test/java/com/rosette/elasticsearch/CategoriesProcessorTest.java index 7d66b57..34d8440 100644 --- a/plugin/src/test/java/com/rosette/elasticsearch/CategoriesProcessorTest.java +++ b/plugin/src/test/java/com/rosette/elasticsearch/CategoriesProcessorTest.java @@ -1,23 +1,22 @@ -/* -* Copyright 2020 Basis Technology Corp. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ +/******************************************************************************* + * This data and information is proprietary to, and a valuable trade secret + * of, Basis Technology Corp. It is given in confidence by Basis Technology + * and may only be used as permitted under the license agreement under which + * it has been distributed, and in no other way. + * + * Copyright (c) 2024 Basis Technology Corporation All rights reserved. + * + * The technical data and information provided herein are provided with + * `limited rights', and the computer software provided herein is provided + * with `restricted rights' as those terms are defined in DAR and ASPR + * 7-104.9(a). + * + ******************************************************************************/ package com.rosette.elasticsearch; import org.elasticsearch.ingest.IngestDocument; import org.elasticsearch.ingest.RandomDocumentPicks; -import org.elasticsearch.test.ESSingleNodeTestCase; +import org.elasticsearch.test.ESTestCase; import org.hamcrest.MatcherAssert; import org.hamcrest.Matchers; import org.junit.Test; @@ -25,22 +24,25 @@ import java.util.HashMap; import java.util.Map; -public class CategoriesProcessorTest extends ESSingleNodeTestCase { +public class CategoriesProcessorTest extends ESTestCase { @Test - public void testCategories() throws Exception { - CategoriesProcessor processor = new CategoriesProcessor(new RosetteApiWrapper(), randomUnicodeOfLength(10), - "description", "text", "category"); + public void testCategories() { + try (CategoriesProcessor processor = new CategoriesProcessor(new RosetteApiWrapper(), randomUnicodeOfLength(10), + "description", "text", "category")) { - String inputText = "The people played lots of sports like soccer and hockey. The score was very high. " - + "Touchdown!"; + String inputText = "The people played lots of sports like soccer and hockey. The score was very high. " + + "Touchdown!"; - Map document = new HashMap<>(); - document.put("text", inputText); - IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); - processor.execute(ingestDocument); + Map document = new HashMap<>(); + document.put("text", inputText); + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); + processor.execute(ingestDocument); - MatcherAssert.assertThat(ingestDocument.getSourceAndMetadata().get("text"), Matchers.equalTo(inputText)); - MatcherAssert.assertThat(ingestDocument.getSourceAndMetadata().get("category"), Matchers.equalTo("SPORTS")); + MatcherAssert.assertThat(ingestDocument.getSourceAndMetadata().get("text"), Matchers.equalTo(inputText)); + MatcherAssert.assertThat(ingestDocument.getSourceAndMetadata().get("category"), Matchers.equalTo("SPORTS")); + } catch (Exception e) { + assertNull(e); + } } } diff --git a/plugin/src/test/java/com/rosette/elasticsearch/EntitiesProcessorTest.java b/plugin/src/test/java/com/rosette/elasticsearch/EntitiesProcessorTest.java index d7794eb..ad29209 100644 --- a/plugin/src/test/java/com/rosette/elasticsearch/EntitiesProcessorTest.java +++ b/plugin/src/test/java/com/rosette/elasticsearch/EntitiesProcessorTest.java @@ -1,33 +1,32 @@ -/* -* Copyright 2020 Basis Technology Corp. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ +/******************************************************************************* + * This data and information is proprietary to, and a valuable trade secret + * of, Basis Technology Corp. It is given in confidence by Basis Technology + * and may only be used as permitted under the license agreement under which + * it has been distributed, and in no other way. + * + * Copyright (c) 2024 Basis Technology Corporation All rights reserved. + * + * The technical data and information provided herein are provided with + * `limited rights', and the computer software provided herein is provided + * with `restricted rights' as those terms are defined in DAR and ASPR + * 7-104.9(a). + * + ******************************************************************************/ package com.rosette.elasticsearch; import com.basistech.util.LanguageCode; import org.elasticsearch.ingest.IngestDocument; import org.elasticsearch.ingest.RandomDocumentPicks; -import org.elasticsearch.test.ESSingleNodeTestCase; -import org.hamcrest.Matchers; +import org.elasticsearch.test.ESTestCase; import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; import org.junit.Test; import java.util.HashMap; import java.util.List; import java.util.Map; -public class EntitiesProcessorTest extends ESSingleNodeTestCase { +public class EntitiesProcessorTest extends ESTestCase { private static final String INPUTTEXT = "Original Ghostbuster Dan Aykroyd, who also co-wrote the 1984 Ghostbusters " + "film, couldn’t be more pleased with the new all-female Ghostbusters cast, telling The Hollywood " @@ -35,85 +34,97 @@ public class EntitiesProcessorTest extends ESSingleNodeTestCase { + "most magnificent women in comedy."; @Test - public void testEntities() throws Exception { - EntitiesProcessor processor = new EntitiesProcessor(new RosetteApiWrapper(), randomUnicodeOfLength(10), + public void testEntities() { + try (EntitiesProcessor processor = new EntitiesProcessor(new RosetteApiWrapper(), randomUnicodeOfLength(10), "description", "text", "entities", false, false, - LanguageCode.ENGLISH, false); - - Map document = new HashMap<>(); - document.put("text", INPUTTEXT); - IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); - processor.execute(ingestDocument); - - Map source = ingestDocument.getSourceAndMetadata(); - assertFalse("Entity processor failed to return entities", ((List)source.get("entities")).isEmpty()); - - Map entity = (Map)((List)source.get("entities")).get(0); - MatcherAssert.assertThat(entity.get("mention"), Matchers.equalTo("Dan Aykroyd")); - //There shouldn't be any sentiment, translations, or offsets - MatcherAssert.assertThat(entity.get("sentiment"), Matchers.nullValue()); - MatcherAssert.assertThat(entity.get("translation"), Matchers.nullValue()); - MatcherAssert.assertThat(entity.get("offsets"), Matchers.nullValue()); + LanguageCode.ENGLISH, false)) { + + Map document = new HashMap<>(); + document.put("text", INPUTTEXT); + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); + processor.execute(ingestDocument); + + Map source = ingestDocument.getSourceAndMetadata(); + assertFalse("Entity processor failed to return entities", ((List) source.get("entities")).isEmpty()); + + Map entity = (HashMap) ((List) source.get("entities")).get(0); + assertTrue(((String) entity.get("mention")).contains("Dan Aykroyd")); + //There shouldn't be any sentiment, translations, or offsets + MatcherAssert.assertThat(entity.get("sentiment"), Matchers.nullValue()); + MatcherAssert.assertThat(entity.get("translation"), Matchers.nullValue()); + MatcherAssert.assertThat(entity.get("offsets"), Matchers.nullValue()); + } catch (Exception e) { + assertNull(e); + } } @Test - public void testOffsets() throws Exception { - EntitiesProcessor processor = new EntitiesProcessor(new RosetteApiWrapper(), randomUnicodeOfLength(10), + public void testOffsets() { + try (EntitiesProcessor processor = new EntitiesProcessor(new RosetteApiWrapper(), randomUnicodeOfLength(10), "description", "text", "entities", true, false, - LanguageCode.ENGLISH, false); - - Map document = new HashMap<>(); - document.put("text", INPUTTEXT); - IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); - processor.execute(ingestDocument); - - Map source = ingestDocument.getSourceAndMetadata(); - assertFalse("Entity processor failed to return entities", ((List)source.get("entities")).isEmpty()); - Map entity = (Map)((List)source.get("entities")).get(0); - assertFalse("Entities are missing offsets", ((List)entity.get("offsets")).isEmpty()); - assertFalse("Entity offsets are empty", ((Map)((List)entity.get("offsets")).get(0)).isEmpty()); - - MatcherAssert.assertThat(entity.get("sentiment"), Matchers.nullValue()); - MatcherAssert.assertThat(entity.get("translation"), Matchers.nullValue()); + LanguageCode.ENGLISH, false)) { + + Map document = new HashMap<>(); + document.put("text", INPUTTEXT); + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); + processor.execute(ingestDocument); + + Map source = ingestDocument.getSourceAndMetadata(); + assertFalse("Entity processor failed to return entities", ((List) source.get("entities")).isEmpty()); + Map entity = (HashMap) ((List) source.get("entities")).get(0); + assertFalse("Entities are missing offsets", ((List) entity.get("offsets")).isEmpty()); + assertFalse("Entity offsets are empty", ((Map) ((List) entity.get("offsets")).get(0)).isEmpty()); + + MatcherAssert.assertThat(entity.get("sentiment"), Matchers.nullValue()); + MatcherAssert.assertThat(entity.get("translation"), Matchers.nullValue()); + } catch (Exception e) { + assertNull(e); + } } @Test - public void testSentiment() throws Exception { - EntitiesProcessor processor = new EntitiesProcessor(new RosetteApiWrapper(), randomUnicodeOfLength(10), + public void testSentiment() { + try (EntitiesProcessor processor = new EntitiesProcessor(new RosetteApiWrapper(), randomUnicodeOfLength(10), "description", "text", "entities", false, false, - LanguageCode.ENGLISH, true); - - Map document = new HashMap<>(); - document.put("text", INPUTTEXT); - IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); - processor.execute(ingestDocument); - - Map source = ingestDocument.getSourceAndMetadata(); - assertFalse("Entity processor failed to return entities", ((List)source.get("entities")).isEmpty()); - Map entity = (Map)((List)source.get("entities")).get(0); - MatcherAssert.assertThat(entity.get("sentiment"), Matchers.anything()); - - MatcherAssert.assertThat(entity.get("translation"), Matchers.nullValue()); - MatcherAssert.assertThat(entity.get("offsets"), Matchers.nullValue()); + LanguageCode.ENGLISH, true)) { + + Map document = new HashMap<>(); + document.put("text", INPUTTEXT); + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); + processor.execute(ingestDocument); + + Map source = ingestDocument.getSourceAndMetadata(); + assertFalse("Entity processor failed to return entities", ((List) source.get("entities")).isEmpty()); + Map entity = (HashMap) ((List) source.get("entities")).get(0); + MatcherAssert.assertThat(entity.get("sentiment"), Matchers.anything()); + + MatcherAssert.assertThat(entity.get("translation"), Matchers.nullValue()); + MatcherAssert.assertThat(entity.get("offsets"), Matchers.nullValue()); + } catch (Exception e) { + assertNull(e); + } } @Test - public void testTranslate() throws Exception { - EntitiesProcessor processor = new EntitiesProcessor(new RosetteApiWrapper(), randomUnicodeOfLength(10), + public void testTranslate() { + try (EntitiesProcessor processor = new EntitiesProcessor(new RosetteApiWrapper(), randomUnicodeOfLength(10), "description", "text", "entities", false, true, - LanguageCode.KOREAN, false); - - Map document = new HashMap<>(); - document.put("text", INPUTTEXT); - IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); - processor.execute(ingestDocument); - - Map source = ingestDocument.getSourceAndMetadata(); - assertFalse("Entity processor failed to return entities", ((List)source.get("entities")).isEmpty()); - Map entity = (Map)((List)source.get("entities")).get(0); - MatcherAssert.assertThat(entity.get("translation"), Matchers.anything()); - - MatcherAssert.assertThat(entity.get("sentiment"), Matchers.nullValue()); - MatcherAssert.assertThat(entity.get("offsets"), Matchers.nullValue()); + LanguageCode.KOREAN, false)) { + + Map document = new HashMap<>(); + document.put("text", INPUTTEXT); + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); + processor.execute(ingestDocument); + + Map source = ingestDocument.getSourceAndMetadata(); + assertFalse("Entity processor failed to return entities", ((List) source.get("entities")).isEmpty()); + Map entity = (HashMap) ((List) source.get("entities")).get(0); + MatcherAssert.assertThat(entity.get("translation"), Matchers.anything()); + + MatcherAssert.assertThat(entity.get("sentiment"), Matchers.nullValue()); + MatcherAssert.assertThat(entity.get("offsets"), Matchers.nullValue()); + } catch (Exception e) { + assertNull(e); + } } } diff --git a/plugin/src/test/java/com/rosette/elasticsearch/LanguageProcessorTest.java b/plugin/src/test/java/com/rosette/elasticsearch/LanguageProcessorTest.java index 50c48b8..15381de 100644 --- a/plugin/src/test/java/com/rosette/elasticsearch/LanguageProcessorTest.java +++ b/plugin/src/test/java/com/rosette/elasticsearch/LanguageProcessorTest.java @@ -1,45 +1,48 @@ -/* -* Copyright 2020 Basis Technology Corp. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ +/******************************************************************************* + * This data and information is proprietary to, and a valuable trade secret + * of, Basis Technology Corp. It is given in confidence by Basis Technology + * and may only be used as permitted under the license agreement under which + * it has been distributed, and in no other way. + * + * Copyright (c) 2024 Basis Technology Corporation All rights reserved. + * + * The technical data and information provided herein are provided with + * `limited rights', and the computer software provided herein is provided + * with `restricted rights' as those terms are defined in DAR and ASPR + * 7-104.9(a). + * + ******************************************************************************/ package com.rosette.elasticsearch; import org.elasticsearch.ingest.IngestDocument; import org.elasticsearch.ingest.RandomDocumentPicks; -import org.elasticsearch.test.ESSingleNodeTestCase; +import org.elasticsearch.test.ESTestCase; import org.hamcrest.MatcherAssert; import org.hamcrest.Matchers; import org.junit.Test; import java.util.HashMap; import java.util.Map; +import java.util.Random; -public class LanguageProcessorTest extends ESSingleNodeTestCase { +public class LanguageProcessorTest extends ESTestCase { @Test - public void testLangId() throws Exception { - LanguageProcessor processor = new LanguageProcessor(new RosetteApiWrapper(), randomUnicodeOfLength(10), - "description", "text", "language"); + public void testLangId() { + try (LanguageProcessor processor = new LanguageProcessor(new RosetteApiWrapper(), randomUnicodeOfLength(10), + "description", "text", "language")) { - String inputText = "This is a very English document. It should be identified as English."; + String inputText = "This is a very English document. It should be identified as English."; - Map document = new HashMap<>(); - document.put("text", inputText); - IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); - processor.execute(ingestDocument); + Map document = new HashMap<>(); + document.put("text", inputText); + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(new Random(), document); + processor.execute(ingestDocument); - MatcherAssert.assertThat(ingestDocument.getSourceAndMetadata().get("text"), Matchers.equalTo(inputText)); - MatcherAssert.assertThat(ingestDocument.getSourceAndMetadata().get("language"), Matchers.equalTo("eng")); + MatcherAssert.assertThat(ingestDocument.getSourceAndMetadata().get("text"), Matchers.equalTo(inputText)); + MatcherAssert.assertThat(ingestDocument.getSourceAndMetadata().get("language"), Matchers.equalTo("eng")); + } catch (Exception e) { + assertNull(e); + } } } diff --git a/plugin/src/test/java/com/rosette/elasticsearch/MockRosetteInitialization.java b/plugin/src/test/java/com/rosette/elasticsearch/MockRosetteInitialization.java index 26c4ff4..5b2085c 100644 --- a/plugin/src/test/java/com/rosette/elasticsearch/MockRosetteInitialization.java +++ b/plugin/src/test/java/com/rosette/elasticsearch/MockRosetteInitialization.java @@ -1,27 +1,25 @@ -/* -* Copyright 2020 Basis Technology Corp. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ +/******************************************************************************* + * This data and information is proprietary to, and a valuable trade secret + * of, Basis Technology Corp. It is given in confidence by Basis Technology + * and may only be used as permitted under the license agreement under which + * it has been distributed, and in no other way. + * + * Copyright (c) 2024 Basis Technology Corporation All rights reserved. + * + * The technical data and information provided herein are provided with + * `limited rights', and the computer software provided herein is provided + * with `restricted rights' as those terms are defined in DAR and ASPR + * 7-104.9(a). + * + ******************************************************************************/ package com.rosette.elasticsearch; import org.apache.http.HttpHeaders; import org.mockserver.client.MockServerClient; +import org.mockserver.client.initialize.PluginExpectationInitializer; import org.mockserver.model.Header; import org.mockserver.model.HttpRequest; import org.mockserver.model.HttpResponse; -import org.mockserver.model.Parameter; -import org.mockserver.client.initialize.PluginExpectationInitializer; import java.io.BufferedReader; import java.io.IOException; @@ -29,15 +27,13 @@ import java.io.InputStreamReader; import java.nio.charset.StandardCharsets; + //Mock-server is launched before unit tests run and serves up a mocked json response for each endpoint public class MockRosetteInitialization implements PluginExpectationInitializer { @Override public void initializeExpectations(MockServerClient mockServerClient) { String baseURL = System.getProperty("mockserver.baseurl", "/rest/worker/v1/"); - //Specific case for when the EntityProcessor calls sentiment expecting an ADM - //This has to be called before the other sentiment endpoint is added - addSentimentADM(baseURL, mockServerClient); addEndpoint(baseURL, "categories", mockServerClient); addEndpoint(baseURL, "sentiment", mockServerClient); @@ -64,28 +60,10 @@ private void addEndpoint(String baseURL, String endpointName, MockServerClient m } } - private void addSentimentADM(String baseURL, MockServerClient mockServerClient) { - try (InputStream is = getClass().getClassLoader() - .getResourceAsStream("mock_responses/sentiment_adm_response.json")) { - String response = getStringFromResource(is); - mockServerClient.when(HttpRequest.request() - .withMethod("POST") - .withPath(baseURL + "sentiment").withQueryStringParameter(new Parameter("output", "rosette"))) - .respond(HttpResponse.response() - .withStatusCode(200) - .withHeaders( - new Header(HttpHeaders.CONTENT_TYPE, "application/json") - ) - .withBody(response)); - } catch (IOException ioe) { - throw new RuntimeException(ioe); - } - } - private String getStringFromResource(InputStream is) throws IOException { StringBuilder sb = new StringBuilder(); String line; - try (BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8.name()))) { + try (BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) { while ((line = br.readLine()) != null) { sb.append(line); } diff --git a/plugin/src/test/java/com/rosette/elasticsearch/NameTranslationProcessorTest.java b/plugin/src/test/java/com/rosette/elasticsearch/NameTranslationProcessorTest.java index f4d6e22..65fa9fa 100644 --- a/plugin/src/test/java/com/rosette/elasticsearch/NameTranslationProcessorTest.java +++ b/plugin/src/test/java/com/rosette/elasticsearch/NameTranslationProcessorTest.java @@ -1,25 +1,24 @@ -/* -* Copyright 2020 Basis Technology Corp. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ +/******************************************************************************* + * This data and information is proprietary to, and a valuable trade secret + * of, Basis Technology Corp. It is given in confidence by Basis Technology + * and may only be used as permitted under the license agreement under which + * it has been distributed, and in no other way. + * + * Copyright (c) 2024 Basis Technology Corporation All rights reserved. + * + * The technical data and information provided herein are provided with + * `limited rights', and the computer software provided herein is provided + * with `restricted rights' as those terms are defined in DAR and ASPR + * 7-104.9(a). + * + ******************************************************************************/ package com.rosette.elasticsearch; import com.basistech.util.ISO15924; import com.basistech.util.LanguageCode; import org.elasticsearch.ingest.IngestDocument; import org.elasticsearch.ingest.RandomDocumentPicks; -import org.elasticsearch.test.ESSingleNodeTestCase; +import org.elasticsearch.test.ESTestCase; import org.hamcrest.MatcherAssert; import org.hamcrest.Matchers; import org.junit.Test; @@ -27,24 +26,27 @@ import java.util.HashMap; import java.util.Map; -public class NameTranslationProcessorTest extends ESSingleNodeTestCase { +public class NameTranslationProcessorTest extends ESTestCase { @Test - public void testTranslateToEnglish() throws Exception { - NameTranslationProcessor processor = new NameTranslationProcessor(new RosetteApiWrapper(), + public void testTranslateToEnglish() { + try (NameTranslationProcessor processor = new NameTranslationProcessor(new RosetteApiWrapper(), randomUnicodeOfLength(10), "description", "text", "translation", LanguageCode.ENGLISH, ISO15924.Latn, "PERSON", LanguageCode.RUSSIAN, ISO15924.Cyrl, - LanguageCode.UNKNOWN); + LanguageCode.UNKNOWN)) { - String inputText = "Владимир Путин"; + String inputText = "Владимир Путин"; - Map document = new HashMap<>(); - document.put("text", inputText); - IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); - processor.execute(ingestDocument); + Map document = new HashMap<>(); + document.put("text", inputText); + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); + processor.execute(ingestDocument); - MatcherAssert.assertThat(ingestDocument.getSourceAndMetadata().get("text"), Matchers.equalTo(inputText)); - MatcherAssert.assertThat(ingestDocument.getSourceAndMetadata().get("translation"), - Matchers.equalTo("Vladimir Putin")); + MatcherAssert.assertThat(ingestDocument.getSourceAndMetadata().get("text"), Matchers.equalTo(inputText)); + MatcherAssert.assertThat(ingestDocument.getSourceAndMetadata().get("translation"), + Matchers.equalTo("Vladimir Putin")); + } catch (Exception e) { + assertNull(e); + } } } diff --git a/plugin/src/test/java/com/rosette/elasticsearch/RosetteAbstractProcessorTest.java b/plugin/src/test/java/com/rosette/elasticsearch/RosetteAbstractProcessorTest.java index f7478e8..c0d48a1 100644 --- a/plugin/src/test/java/com/rosette/elasticsearch/RosetteAbstractProcessorTest.java +++ b/plugin/src/test/java/com/rosette/elasticsearch/RosetteAbstractProcessorTest.java @@ -1,71 +1,54 @@ -/* -* Copyright 2020 Basis Technology Corp. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ +/******************************************************************************* + * This data and information is proprietary to, and a valuable trade secret + * of, Basis Technology Corp. It is given in confidence by Basis Technology + * and may only be used as permitted under the license agreement under which + * it has been distributed, and in no other way. + * + * Copyright (c) 2024 Basis Technology Corporation All rights reserved. + * + * The technical data and information provided herein are provided with + * `limited rights', and the computer software provided herein is provided + * with `restricted rights' as those terms are defined in DAR and ASPR + * 7-104.9(a). + * + ******************************************************************************/ package com.rosette.elasticsearch; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ingest.IngestDocument; import org.elasticsearch.ingest.RandomDocumentPicks; -import org.elasticsearch.test.ESSingleNodeTestCase; -import org.hamcrest.MatcherAssert; -import org.hamcrest.Matchers; +import org.elasticsearch.test.ESTestCase; import org.junit.Test; import java.util.HashMap; import java.util.Map; -public class RosetteAbstractProcessorTest extends ESSingleNodeTestCase { +public class RosetteAbstractProcessorTest extends ESTestCase { - class MockProcessor extends RosetteAbstractProcessor { + static class MockProcessor extends RosetteAbstractProcessor { MockProcessor(RosetteApiWrapper rosAPI, String tag, String description, String inputField, String targetField) { super(rosAPI, tag, description, "mock_processor", inputField, targetField); } @Override - public void processDocument(String inputText, IngestDocument ingestDocument) throws Exception { + public void processDocument(String inputText, IngestDocument ingestDocument) { ingestDocument.setFieldValue(targetField, "Processed!"); } } - @Test - public void testEmptyField() throws Exception { - MockProcessor processor = new MockProcessor(new RosetteApiWrapper(), randomUnicodeOfLength(10), - "description", "text", "target"); - - //Process document with an empty "text" field - Map document = new HashMap<>(); - IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); - processor.execute(ingestDocument); - - //Nothing should get placed in the target field - MatcherAssert.assertThat("Processor should not process empty ingest field", - ingestDocument.getSourceAndMetadata().get("target"), Matchers.nullValue()); - } - @Test(expected = ElasticsearchException.class) public void testOverwrite() throws Exception { - MockProcessor processor = new MockProcessor(new RosetteApiWrapper(), randomUnicodeOfLength(10), - "description", "text", "target"); + try (MockProcessor processor = new MockProcessor(new RosetteApiWrapper(), randomUnicodeOfLength(10), + "description", "text", "target")) { - //Process document with a value already in the target field - Map document = new HashMap<>(); - document.put("text", "input text"); - document.put("target", "don't overwrite me!"); - IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); + //Process document with a value already in the target field + Map document = new HashMap<>(); + document.put("text", "input text"); + document.put("target", "don't overwrite me!"); + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); - //We expect an exception to be thrown - processor.execute(ingestDocument); + //We expect an exception to be thrown + processor.execute(ingestDocument); + } } } diff --git a/plugin/src/test/java/com/rosette/elasticsearch/RosetteTextAnalysisPluginIT.java b/plugin/src/test/java/com/rosette/elasticsearch/RosetteTextAnalysisPluginIT.java index 0a52c57..ff053c3 100644 --- a/plugin/src/test/java/com/rosette/elasticsearch/RosetteTextAnalysisPluginIT.java +++ b/plugin/src/test/java/com/rosette/elasticsearch/RosetteTextAnalysisPluginIT.java @@ -1,37 +1,32 @@ -/* -* Copyright 2020 Basis Technology Corp. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ +/******************************************************************************* + * This data and information is proprietary to, and a valuable trade secret + * of, Basis Technology Corp. It is given in confidence by Basis Technology + * and may only be used as permitted under the license agreement under which + * it has been distributed, and in no other way. + * + * Copyright (c) 2024 Basis Technology Corporation All rights reserved. + * + * The technical data and information provided herein are provided with + * `limited rights', and the computer software provided herein is provided + * with `restricted rights' as those terms are defined in DAR and ASPR + * 7-104.9(a). + * + ******************************************************************************/ package com.rosette.elasticsearch; +import org.elasticsearch.action.DocWriteResponse; import org.elasticsearch.action.admin.cluster.node.info.NodeInfo; -import org.elasticsearch.action.admin.cluster.node.info.NodesInfoRequest; import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse; import org.elasticsearch.action.admin.cluster.node.info.PluginsAndModules; -import org.elasticsearch.action.index.IndexResponse; +import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.action.support.master.AcknowledgedResponse; -import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.xcontent.XContentFactory; -import org.elasticsearch.xcontent.XContentType; -import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.plugins.Plugin; -import org.elasticsearch.plugins.PluginInfo; +import org.elasticsearch.plugins.PluginRuntimeInfo; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.test.ESIntegTestCase; -import org.elasticsearch.test.hamcrest.ElasticsearchAssertions; +import org.elasticsearch.xcontent.XContentType; import org.hamcrest.MatcherAssert; import org.hamcrest.Matchers; import org.junit.Test; @@ -43,12 +38,19 @@ import java.nio.charset.StandardCharsets; import java.util.Collection; import java.util.Collections; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -//Tests all processors against an running embedded ES instance using the deployed Rosette API +//Tests all processors against a running embedded ES instance using the deployed Rosette API public class RosetteTextAnalysisPluginIT extends ESIntegTestCase { + @Override + public void tearDown() throws Exception { + logger.info("Shutting down ES"); + super.tearDown(); + } + @Override protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { return Settings.builder() @@ -63,82 +65,89 @@ protected Collection> nodePlugins() { } @Test - public void testPluginIsLoaded() throws Exception { - NodesInfoResponse response = client().admin().cluster().prepareNodesInfo() - .addMetric(NodesInfoRequest.Metric.PLUGINS.metricName()).get(); + public void testPluginIsLoaded() { + logger.info("Checking if plugin was loaded"); + NodesInfoResponse response = client().admin().cluster().prepareNodesInfo().get(); + boolean pluginFound = false; for (NodeInfo nodeInfo : response.getNodes()) { - boolean pluginFound = false; - for (PluginInfo pluginInfo : nodeInfo.getInfo(PluginsAndModules.class).getPluginInfos()) { - if (pluginInfo.getName().equals(RosetteTextAnalysisPlugin.class.getName())) { + for (PluginRuntimeInfo pluginInfo : nodeInfo.getInfo(PluginsAndModules.class).getPluginInfos()) { + String name = pluginInfo.descriptor().getName(); + if ("com.rosette.elasticsearch.RosetteTextAnalysisPlugin".equals(name)) { pluginFound = true; break; } } assertTrue(pluginFound); } + logger.info("Plugin was loaded"); } - //Tests the language processor @Test public void testLanguage() throws Exception { - String inputText = "This is a very English document. It should be identified as English."; - SearchResponse response = exercisePipeline(inputText, "language"); + logger.info("Testing language processor"); + GetResponse response = exerciseGetPipeline(inputText, "language"); //Check the source for the expected language - MatcherAssert.assertThat(response.getHits().getAt(0).getSourceAsMap() + MatcherAssert.assertThat(response.getSourceAsMap() .get(LanguageProcessor.Parameters.TARGET_FIELD.defaultValue), Matchers.equalTo("eng")); + logger.info("Language processor test complete"); } @Test public void testCategories() throws Exception { - String inputText = "The people played lots of sports like soccer and hockey. The score was very high. " + "Touchdown!"; - SearchResponse response = exercisePipeline(inputText, "categories"); + logger.info("Testing categories processor"); + GetResponse response = exerciseGetPipeline(inputText, "categories"); //Check the source for the expected category - MatcherAssert.assertThat(response.getHits().getAt(0).getSourceAsMap() + MatcherAssert.assertThat(response.getSourceAsMap() .get(CategoriesProcessor.Parameters.TARGET_FIELD.defaultValue), Matchers.equalTo("SPORTS")); + + logger.info("Categories processor test complete"); } @Test public void testSentiment() throws Exception { String inputText = "I love this sentence so much I want to marry it!"; - - SearchResponse response = exercisePipeline(inputText, "sentiment"); + logger.info("Testing sentiment processor"); + GetResponse response = exerciseGetPipeline(inputText, "sentiment"); //Check the source for the expected sentiment - MatcherAssert.assertThat(response.getHits().getAt(0).getSourceAsMap() + MatcherAssert.assertThat(response.getSourceAsMap() .get(SentimentProcessor.Parameters.TARGET_FIELD.defaultValue), Matchers.equalTo("pos")); + logger.info("Sentiment processor test complete"); } @Test public void testTranslateToEnglish() throws Exception { - String inputText = "Владимир Путин"; - - SearchResponse response = exercisePipeline(inputText, "translate_eng"); + logger.info("Testing name translation processor, translating from Russian to English"); + GetResponse response = exerciseGetPipeline(inputText, "translate_eng"); //Check the source for the expected English translation - MatcherAssert.assertThat(response.getHits().getAt(0).getSourceAsMap() + MatcherAssert.assertThat(response.getSourceAsMap() .get(NameTranslationProcessor.Parameters.TARGET_FIELD.defaultValue), Matchers.equalTo("Vladimir Putin")); + logger.info("Translating from Russian to English Name translation processor test complete"); } @Test public void testTranslateFromEnglish() throws Exception { String inputText = "Vladimir Putin"; - SearchResponse response = exercisePipeline(inputText, "translate_rus"); + logger.info("Testing name translation processor, translating from English to Russian"); + GetResponse response = exerciseGetPipeline(inputText, "translate_rus"); //Check the source for the expected Russian translation - MatcherAssert.assertThat(response.getHits().getAt(0).getSourceAsMap() + MatcherAssert.assertThat(response.getSourceAsMap() .get(NameTranslationProcessor.Parameters.TARGET_FIELD.defaultValue), Matchers.equalTo("Владимир Путин")); + logger.info("Name translation processor test complete"); } @Test @@ -148,15 +157,16 @@ public void testEntities() throws Exception { + "be more pleased with the new all-female Ghostbusters cast, telling The Hollywood Reporter, “The " + "Aykroyd family is delighted by this inheritance of the Ghostbusters torch by these most magnificent " + "women in comedy.”"; - - SearchResponse response = exercisePipeline(inputText, "entities"); + logger.info("Testing entities processor"); + GetResponse response = exerciseGetPipeline(inputText, "entities"); //Check the source for the expected entity result - assertFalse(((List)response.getHits().getAt(0).getSourceAsMap() + assertFalse(((List)response.getSourceAsMap() .get(EntitiesProcessor.Parameters.TARGET_FIELD.defaultValue)).isEmpty()); - Map entity = (Map)((List)response.getHits().getAt(0).getSourceAsMap() + Map entity = (LinkedHashMap)((List)response.getSourceAsMap() .get(EntitiesProcessor.Parameters.TARGET_FIELD.defaultValue)).get(0); MatcherAssert.assertThat(entity.get("mention"), Matchers.equalTo("Original Ghostbuster Dan Aykroyd")); + logger.info("Entities processor test complete"); } @Test @@ -167,12 +177,12 @@ public void testEntitiesWithSentiment() throws Exception { + "Aykroyd family is delighted by this inheritance of the Ghostbusters torch by these most magnificent " + "women in comedy.”"; - SearchResponse response = exercisePipeline(inputText, "entities_sentiment"); + GetResponse response = exerciseGetPipeline(inputText, "entities_sentiment"); //Check the source for the expected entity level sentiment - assertFalse(((List)response.getHits().getAt(0).getSourceAsMap() + assertFalse(((List)response.getSourceAsMap() .get(EntitiesProcessor.Parameters.TARGET_FIELD.defaultValue)).isEmpty()); - Map entity = (Map)((List)response.getHits().getAt(0).getSourceAsMap() + Map entity = (LinkedHashMap)((List)response.getSourceAsMap() .get(EntitiesProcessor.Parameters.TARGET_FIELD.defaultValue)).get(0); MatcherAssert.assertThat(entity.get("mention"), Matchers.equalTo("Original Ghostbuster Dan Aykroyd")); MatcherAssert.assertThat(entity.get("sentiment"), Matchers.equalTo("pos")); @@ -186,11 +196,11 @@ public void testAll() throws Exception { + "be more pleased with the new all-female Ghostbusters cast, telling The Hollywood Reporter, “The " + "Aykroyd family is delighted by this inheritance of the Ghostbusters torch by these most magnificent " + "women in comedy.”"; - - SearchResponse response = exercisePipeline(inputText, "all"); + logger.info("Testing all processors together"); + GetResponse response = exerciseGetPipeline(inputText, "all"); //Check the source for the expected entity result - Map source = response.getHits().getAt(0).getSourceAsMap(); + Map source = response.getSourceAsMap(); MatcherAssert.assertThat(source.get(LanguageProcessor.Parameters.TARGET_FIELD.defaultValue), Matchers.equalTo("eng")); MatcherAssert.assertThat(source.get(CategoriesProcessor.Parameters.TARGET_FIELD.defaultValue), @@ -198,38 +208,30 @@ public void testAll() throws Exception { MatcherAssert.assertThat(source.get(SentimentProcessor.Parameters.TARGET_FIELD.defaultValue), Matchers.equalTo("pos")); - assertFalse(((List)source.get(EntitiesProcessor.Parameters.TARGET_FIELD.defaultValue)).isEmpty()); - Map entity = (Map)((List)source.get(EntitiesProcessor.Parameters.TARGET_FIELD.defaultValue)).get(0); + assertFalse(((List)source.get(EntitiesProcessor.Parameters.TARGET_FIELD.defaultValue)).isEmpty()); + Map entity = (LinkedHashMap)((List)source.get(EntitiesProcessor.Parameters.TARGET_FIELD.defaultValue)).get(0); MatcherAssert.assertThat(entity.get("mention"), Matchers.equalTo("Original Ghostbuster Dan Aykroyd")); + logger.info("All processors test complete"); } - - private SearchResponse exercisePipeline(String inputText, String pipelineName) throws IOException { - + private GetResponse exerciseGetPipeline(String inputText, String pipelineName) throws IOException { //Add the ingest pipeline AcknowledgedResponse pipelineResponse = client().admin().cluster() .preparePutPipeline(pipelineName, getProcessorConfig(pipelineName), XContentType.JSON).get(); assertTrue("Failed to add ingest pipeline", pipelineResponse.isAcknowledged()); - //Add a document that uses the ingest pipeline - IndexResponse indexResponse = client().prepareIndex("test", "test").setPipeline(pipelineName) - .setSource(XContentFactory.jsonBuilder().startObject().field("text", inputText) - .endObject()).get(); - assertEquals("Failed to index document correctly", RestStatus.CREATED, indexResponse.status()); - //Force index refresh - refresh("test"); - - //Find the document - SearchResponse response = client().prepareSearch("test").setQuery(QueryBuilders.matchAllQuery()).get(); - ElasticsearchAssertions.assertNoFailures(response); - - return response; + DocWriteResponse docWriteResponse = client().prepareIndex("test").setId("myid").setPipeline(pipelineName) + .setSource("text", inputText).get(); + assertEquals("Failed to index document correctly", RestStatus.CREATED, docWriteResponse.status()); + GetResponse getResponse = client().prepareGet("test", "myid").get(); + assertTrue("Failed to find indexed document", getResponse.isExists()); + return getResponse; } - private BytesArray getProcessorConfig(String name) throws IOException { try (InputStream is = getClass().getClassLoader().getResourceAsStream("it_processors/" + name + ".json")) { StringBuilder sb = new StringBuilder(); - String line; - try (BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8.name()))) { + assert is != null; + try (BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) { + String line; while ((line = br.readLine()) != null) { sb.append(line); } diff --git a/plugin/src/test/java/com/rosette/elasticsearch/SentimentProcessorTest.java b/plugin/src/test/java/com/rosette/elasticsearch/SentimentProcessorTest.java index bce34a8..db030a9 100644 --- a/plugin/src/test/java/com/rosette/elasticsearch/SentimentProcessorTest.java +++ b/plugin/src/test/java/com/rosette/elasticsearch/SentimentProcessorTest.java @@ -1,23 +1,22 @@ -/* -* Copyright 2020 Basis Technology Corp. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ +/******************************************************************************* + * This data and information is proprietary to, and a valuable trade secret + * of, Basis Technology Corp. It is given in confidence by Basis Technology + * and may only be used as permitted under the license agreement under which + * it has been distributed, and in no other way. + * + * Copyright (c) 2024 Basis Technology Corporation All rights reserved. + * + * The technical data and information provided herein are provided with + * `limited rights', and the computer software provided herein is provided + * with `restricted rights' as those terms are defined in DAR and ASPR + * 7-104.9(a). + * + ******************************************************************************/ package com.rosette.elasticsearch; import org.elasticsearch.ingest.IngestDocument; import org.elasticsearch.ingest.RandomDocumentPicks; -import org.elasticsearch.test.ESSingleNodeTestCase; +import org.elasticsearch.test.ESTestCase; import org.hamcrest.MatcherAssert; import org.hamcrest.Matchers; import org.junit.Test; @@ -25,21 +24,24 @@ import java.util.HashMap; import java.util.Map; -public class SentimentProcessorTest extends ESSingleNodeTestCase { +public class SentimentProcessorTest extends ESTestCase { @Test - public void testSentiment() throws Exception { - SentimentProcessor processor = new SentimentProcessor(new RosetteApiWrapper(), randomUnicodeOfLength(10), - "description", "text", "sentiment"); + public void testSentiment() { + try (SentimentProcessor processor = new SentimentProcessor(new RosetteApiWrapper(), randomUnicodeOfLength(10), + "description", "text", "sentiment")) { - String inputText = "I love this sentence so much I want to marry it!"; + String inputText = "I love this sentence so much I want to marry it!"; - Map document = new HashMap<>(); - document.put("text", inputText); - IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); - processor.execute(ingestDocument); + Map document = new HashMap<>(); + document.put("text", inputText); + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); + processor.execute(ingestDocument); - MatcherAssert.assertThat(ingestDocument.getSourceAndMetadata().get("text"), Matchers.equalTo(inputText)); - MatcherAssert.assertThat(ingestDocument.getSourceAndMetadata().get("sentiment"), Matchers.equalTo("pos")); + MatcherAssert.assertThat(ingestDocument.getSourceAndMetadata().get("text"), Matchers.equalTo(inputText)); + MatcherAssert.assertThat(ingestDocument.getSourceAndMetadata().get("sentiment"), Matchers.equalTo("pos")); + } catch (Exception e) { + assertNull(e); + } } } diff --git a/plugin/src/test/resources/log4j2.xml b/plugin/src/test/resources/log4j2.xml new file mode 100644 index 0000000..4d6b6c5 --- /dev/null +++ b/plugin/src/test/resources/log4j2.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/plugin/src/test/resources/mock_responses/entities_response.json b/plugin/src/test/resources/mock_responses/entities_response.json index 086c7e7..19a896d 100644 --- a/plugin/src/test/resources/mock_responses/entities_response.json +++ b/plugin/src/test/resources/mock_responses/entities_response.json @@ -1,386 +1,36 @@ { - "version":"1.1.0", - "data":"Original Ghostbuster Dan Aykroyd, who also co-wrote the 1984 Ghostbusters film, couldn’t be more pleased with the new all-female Ghostbusters cast, telling The Hollywood Reporter, “The Aykroyd family is delighted by this inheritance of the Ghostbusters torch by these most magnificent women in comedy.", - "attributes":{ - "scriptRegion":{ - "type":"list", - "itemType":"scriptRegion", - "items":[ + "entities": [ + { + "type": "PERSON", + "mention": "Original Ghostbuster Dan Aykroyd", + "normalized": "Original Ghostbuster Dan Aykroyd", + "count": 2, + "mentionOffsets": [ { - "startOffset":0, - "endOffset":301, - "script":"Latn" - } - ] - }, - "sentence":{ - "type":"list", - "itemType":"sentence", - "items":[ - { - "startOffset":0, - "endOffset":301 - } - ] - }, - "languageDetection":{ - "type":"languageDetection", - "startOffset":0, - "endOffset":301, - "detectionResults":[ - { - "language":"eng", - "encoding":"UTF-16BE", - "script":"Latn", - "confidence":0.02120806448228041 - } - ] - }, - "token":{ - "type":"list", - "itemType":"token", - "items":[ - { - "startOffset":0, - "endOffset":8, - "text":"Original" - }, - { - "startOffset":9, - "endOffset":20, - "text":"Ghostbuster" - }, - { - "startOffset":21, - "endOffset":24, - "text":"Dan" - }, - { - "startOffset":25, - "endOffset":32, - "text":"Aykroyd" - }, - { - "startOffset":32, - "endOffset":33, - "text":"," - }, - { - "startOffset":34, - "endOffset":37, - "text":"who" - }, - { - "startOffset":38, - "endOffset":42, - "text":"also" - }, - { - "startOffset":43, - "endOffset":45, - "text":"co" - }, - { - "startOffset":45, - "endOffset":46, - "text":"-" - }, - { - "startOffset":46, - "endOffset":51, - "text":"wrote" - }, - { - "startOffset":52, - "endOffset":55, - "text":"the" - }, - { - "startOffset":56, - "endOffset":60, - "text":"1984" - }, - { - "startOffset":61, - "endOffset":73, - "text":"Ghostbusters" - }, - { - "startOffset":74, - "endOffset":78, - "text":"film" - }, - { - "startOffset":78, - "endOffset":79, - "text":"," - }, - { - "startOffset":80, - "endOffset":88, - "text":"couldn’t" - }, - { - "startOffset":89, - "endOffset":91, - "text":"be" - }, - { - "startOffset":92, - "endOffset":96, - "text":"more" - }, - { - "startOffset":97, - "endOffset":104, - "text":"pleased" - }, - { - "startOffset":105, - "endOffset":109, - "text":"with" - }, - { - "startOffset":110, - "endOffset":113, - "text":"the" - }, - { - "startOffset":114, - "endOffset":117, - "text":"new" - }, - { - "startOffset":118, - "endOffset":121, - "text":"all" - }, - { - "startOffset":121, - "endOffset":122, - "text":"-" - }, - { - "startOffset":122, - "endOffset":128, - "text":"female" - }, - { - "startOffset":129, - "endOffset":141, - "text":"Ghostbusters" + "startOffset": 0, + "endOffset": 32 }, { - "startOffset":142, - "endOffset":146, - "text":"cast" - }, - { - "startOffset":146, - "endOffset":147, - "text":"," - }, - { - "startOffset":148, - "endOffset":155, - "text":"telling" - }, - { - "startOffset":156, - "endOffset":159, - "text":"The" - }, - { - "startOffset":160, - "endOffset":169, - "text":"Hollywood" - }, - { - "startOffset":170, - "endOffset":178, - "text":"Reporter" - }, - { - "startOffset":178, - "endOffset":179, - "text":"," - }, - { - "startOffset":180, - "endOffset":181, - "text":"“" - }, - { - "startOffset":181, - "endOffset":184, - "text":"The" - }, - { - "startOffset":185, - "endOffset":192, - "text":"Aykroyd" - }, - { - "startOffset":193, - "endOffset":199, - "text":"family" - }, - { - "startOffset":200, - "endOffset":202, - "text":"is" - }, - { - "startOffset":203, - "endOffset":212, - "text":"delighted" - }, - { - "startOffset":213, - "endOffset":215, - "text":"by" - }, - { - "startOffset":216, - "endOffset":220, - "text":"this" - }, - { - "startOffset":221, - "endOffset":232, - "text":"inheritance" - }, - { - "startOffset":233, - "endOffset":235, - "text":"of" - }, - { - "startOffset":236, - "endOffset":239, - "text":"the" - }, - { - "startOffset":240, - "endOffset":252, - "text":"Ghostbusters" - }, - { - "startOffset":253, - "endOffset":258, - "text":"torch" - }, - { - "startOffset":259, - "endOffset":261, - "text":"by" - }, - { - "startOffset":262, - "endOffset":267, - "text":"these" - }, - { - "startOffset":268, - "endOffset":272, - "text":"most" - }, - { - "startOffset":273, - "endOffset":284, - "text":"magnificent" - }, - { - "startOffset":285, - "endOffset":290, - "text":"women" - }, - { - "startOffset":291, - "endOffset":293, - "text":"in" - }, - { - "startOffset":294, - "endOffset":300, - "text":"comedy" - }, - { - "startOffset":300, - "endOffset":301, - "text":"." + "startOffset": 185, + "endOffset": 192 } - ] + ], + "entityId": "T0", + "confidence": 0.44447255 }, - "entities":{ - "type":"list", - "itemType":"entities", - "items":[ - { - "mentions":[ - { - "startOffset":21, - "endOffset":32, - "source":"kb-linker", - "normalized":"Dan Aykroyd" - }, - { - "startOffset":185, - "endOffset":192, - "source":"statistical", - "subsource":"/data/roots/rex/7.24.1.c58.3/data/statistical/eng/model-LE.bin", - "normalized":"Aykroyd" - } - ], - "headMentionIndex":0, - "type":"PERSON", - "entityId":"Q105221" - }, - { - "mentions":[ - { - "startOffset":61, - "endOffset":73, - "source":"kb-linker", - "normalized":"Ghostbusters" - }, - { - "startOffset":129, - "endOffset":141, - "source":"kb-linker", - "normalized":"Ghostbusters" - }, - { - "startOffset":240, - "endOffset":252, - "source":"kb-linker", - "normalized":"Ghostbusters" - } - ], - "headMentionIndex":0, - "type":"PRODUCT", - "entityId":"Q108745" - }, - { - "mentions":[ - { - "startOffset":156, - "endOffset":178, - "source":"kb-linker", - "normalized":"The Hollywood Reporter" - } - ], - "headMentionIndex":0, - "type":"ORGANIZATION", - "entityId":"Q61503" + { + "type": "PRODUCT", + "mention": "The Hollywood Reporter", + "normalized": "The Hollywood Reporter", + "count": 1, + "mentionOffsets": [ + { + "startOffset": 156, + "endOffset": 178 } - ] + ], + "entityId": "Q61503", + "linkingConfidence": 0.58811317 } - }, - "documentMetadata":{ - "processedBy":[ - "whole-document-language@10.28.73.67", - "entity-extraction@10.28.77.104" - ] - } -} \ No newline at end of file + ] +} diff --git a/plugin/src/test/resources/mock_responses/sentiment_adm_response.json b/plugin/src/test/resources/mock_responses/sentiment_adm_response.json deleted file mode 100644 index abcc81a..0000000 --- a/plugin/src/test/resources/mock_responses/sentiment_adm_response.json +++ /dev/null @@ -1,447 +0,0 @@ -{ - "version":"1.1.0", - "data":"Original Ghostbuster Dan Aykroyd, who also co-wrote the 1984 Ghostbusters film, couldn’t be more pleased with the new all-female Ghostbusters cast, telling The Hollywood Reporter, “The Aykroyd family is delighted by this inheritance of the Ghostbusters torch by these most magnificent women in comedy.", - "attributes":{ - "sentence":{ - "type":"list", - "itemType":"sentence", - "items":[ - { - "startOffset":0, - "endOffset":301 - } - ] - }, - "languageDetection":{ - "type":"languageDetection", - "startOffset":0, - "endOffset":301, - "detectionResults":[ - { - "language":"eng", - "encoding":"UTF-16BE", - "script":"Latn", - "confidence":0.02120806448228041 - } - ] - }, - "scriptRegion":{ - "type":"list", - "itemType":"scriptRegion", - "items":[ - { - "startOffset":0, - "endOffset":301, - "script":"Latn" - } - ] - }, - "sentimentResults":{ - "type":"list", - "itemType":"categorizerResults", - "items":[ - { - "label":"pos", - "score":0.6234125839546323, - "confidence":0.7962072011038756, - "explanationSet":[ - "pleased", - "hollywood", - "wrote", - "*POS_LEX*", - "new" - ] - } - ] - }, - "token":{ - "type":"list", - "itemType":"token", - "items":[ - { - "startOffset":0, - "endOffset":8, - "text":"Original" - }, - { - "startOffset":9, - "endOffset":20, - "text":"Ghostbuster" - }, - { - "startOffset":21, - "endOffset":24, - "text":"Dan" - }, - { - "startOffset":25, - "endOffset":32, - "text":"Aykroyd" - }, - { - "startOffset":32, - "endOffset":33, - "text":"," - }, - { - "startOffset":34, - "endOffset":37, - "text":"who" - }, - { - "startOffset":38, - "endOffset":42, - "text":"also" - }, - { - "startOffset":43, - "endOffset":45, - "text":"co" - }, - { - "startOffset":45, - "endOffset":46, - "text":"-" - }, - { - "startOffset":46, - "endOffset":51, - "text":"wrote" - }, - { - "startOffset":52, - "endOffset":55, - "text":"the" - }, - { - "startOffset":56, - "endOffset":60, - "text":"1984" - }, - { - "startOffset":61, - "endOffset":73, - "text":"Ghostbusters" - }, - { - "startOffset":74, - "endOffset":78, - "text":"film" - }, - { - "startOffset":78, - "endOffset":79, - "text":"," - }, - { - "startOffset":80, - "endOffset":88, - "text":"couldn’t" - }, - { - "startOffset":89, - "endOffset":91, - "text":"be" - }, - { - "startOffset":92, - "endOffset":96, - "text":"more" - }, - { - "startOffset":97, - "endOffset":104, - "text":"pleased" - }, - { - "startOffset":105, - "endOffset":109, - "text":"with" - }, - { - "startOffset":110, - "endOffset":113, - "text":"the" - }, - { - "startOffset":114, - "endOffset":117, - "text":"new" - }, - { - "startOffset":118, - "endOffset":121, - "text":"all" - }, - { - "startOffset":121, - "endOffset":122, - "text":"-" - }, - { - "startOffset":122, - "endOffset":128, - "text":"female" - }, - { - "startOffset":129, - "endOffset":141, - "text":"Ghostbusters" - }, - { - "startOffset":142, - "endOffset":146, - "text":"cast" - }, - { - "startOffset":146, - "endOffset":147, - "text":"," - }, - { - "startOffset":148, - "endOffset":155, - "text":"telling" - }, - { - "startOffset":156, - "endOffset":159, - "text":"The" - }, - { - "startOffset":160, - "endOffset":169, - "text":"Hollywood" - }, - { - "startOffset":170, - "endOffset":178, - "text":"Reporter" - }, - { - "startOffset":178, - "endOffset":179, - "text":"," - }, - { - "startOffset":180, - "endOffset":181, - "text":"“" - }, - { - "startOffset":181, - "endOffset":184, - "text":"The" - }, - { - "startOffset":185, - "endOffset":192, - "text":"Aykroyd" - }, - { - "startOffset":193, - "endOffset":199, - "text":"family" - }, - { - "startOffset":200, - "endOffset":202, - "text":"is" - }, - { - "startOffset":203, - "endOffset":212, - "text":"delighted" - }, - { - "startOffset":213, - "endOffset":215, - "text":"by" - }, - { - "startOffset":216, - "endOffset":220, - "text":"this" - }, - { - "startOffset":221, - "endOffset":232, - "text":"inheritance" - }, - { - "startOffset":233, - "endOffset":235, - "text":"of" - }, - { - "startOffset":236, - "endOffset":239, - "text":"the" - }, - { - "startOffset":240, - "endOffset":252, - "text":"Ghostbusters" - }, - { - "startOffset":253, - "endOffset":258, - "text":"torch" - }, - { - "startOffset":259, - "endOffset":261, - "text":"by" - }, - { - "startOffset":262, - "endOffset":267, - "text":"these" - }, - { - "startOffset":268, - "endOffset":272, - "text":"most" - }, - { - "startOffset":273, - "endOffset":284, - "text":"magnificent" - }, - { - "startOffset":285, - "endOffset":290, - "text":"women" - }, - { - "startOffset":291, - "endOffset":293, - "text":"in" - }, - { - "startOffset":294, - "endOffset":300, - "text":"comedy" - }, - { - "startOffset":300, - "endOffset":301, - "text":"." - } - ] - }, - "entities":{ - "type":"list", - "itemType":"entities", - "items":[ - { - "mentions":[ - { - "startOffset":21, - "endOffset":32, - "source":"kb-linker", - "normalized":"Dan Aykroyd" - }, - { - "startOffset":185, - "endOffset":192, - "source":"statistical", - "subsource":"/data/roots/rex/7.24.1.c58.3/data/statistical/eng/model-LE.bin", - "normalized":"Aykroyd" - } - ], - "headMentionIndex":0, - "type":"PERSON", - "entityId":"Q105221", - "sentiment":[ - { - "label":"pos", - "score":0.2378945518285036, - "confidence":0.6385089278441162, - "explanationSet":[ - "hollywood", - "wrote", - "*POS_LEX*", - "delighted", - "reporter" - ] - } - ] - }, - { - "mentions":[ - { - "startOffset":61, - "endOffset":73, - "source":"kb-linker", - "normalized":"Ghostbusters" - }, - { - "startOffset":129, - "endOffset":141, - "source":"kb-linker", - "normalized":"Ghostbusters" - }, - { - "startOffset":240, - "endOffset":252, - "source":"kb-linker", - "normalized":"Ghostbusters" - } - ], - "headMentionIndex":0, - "type":"PRODUCT", - "entityId":"Q108745", - "sentiment":[ - { - "label":"pos", - "score":0.6744932839646935, - "confidence":0.8111117726721541, - "explanationSet":[ - "pleased", - "hollywood", - "wrote", - "*POS_LEX*", - "new" - ] - } - ] - }, - { - "mentions":[ - { - "startOffset":156, - "endOffset":178, - "source":"kb-linker", - "normalized":"The Hollywood Reporter" - } - ], - "headMentionIndex":0, - "type":"ORGANIZATION", - "entityId":"Q61503", - "sentiment":[ - { - "label":"pos", - "score":-0.02252599410712719, - "confidence":0.5338094035254866, - "explanationSet":[ - "*POS_LEX*", - "new", - "delighted", - "reporter", - "family" - ] - } - ] - } - ] - } - }, - "documentMetadata":{ - "processedBy":[ - "whole-document-language@10.28.73.206", - "entity-extraction@10.28.177.218", - "sentiment@10.28.177.218" - ] - } -} \ No newline at end of file diff --git a/plugin/src/test/resources/mock_responses/sentiment_response.json b/plugin/src/test/resources/mock_responses/sentiment_response.json index c832625..3a5bd28 100644 --- a/plugin/src/test/resources/mock_responses/sentiment_response.json +++ b/plugin/src/test/resources/mock_responses/sentiment_response.json @@ -1,7 +1,48 @@ { "document": { "label": "pos", - "confidence": 0.6410158927447778 + "confidence": 0.7962072 }, - "entities": [] -} \ No newline at end of file + "entities": [ + { + "type": "PERSON", + "mention": "Original Ghostbuster Dan Aykroyd", + "normalized": "Original Ghostbuster Dan Aykroyd", + "count": 2, + "mentionOffsets": [ + { + "startOffset": 0, + "endOffset": 32 + }, + { + "startOffset": 185, + "endOffset": 192 + } + ], + "entityId": "T0", + "confidence": 0.44447255, + "sentiment": { + "label": "pos", + "confidence": 0.70096395 + } + }, + { + "type": "PRODUCT", + "mention": "The Hollywood Reporter", + "normalized": "The Hollywood Reporter", + "count": 1, + "mentionOffsets": [ + { + "startOffset": 156, + "endOffset": 178 + } + ], + "entityId": "Q61503", + "linkingConfidence": 0.58811317, + "sentiment": { + "label": "pos", + "confidence": 0.49633306 + } + } + ] +} diff --git a/pom.xml b/pom.xml index 9b4d67b..3c092ea 100644 --- a/pom.xml +++ b/pom.xml @@ -1,31 +1,30 @@ - + 4.0.0 com.rosette.elasticsearch rosette-elasticsearch-parent - 7.17.0.1-SNAPSHOT + 8.15.0.0-SNAPSHOT pom - open-source-parent com.basistech - 9.0.1 - + textanalytics + 74.0.0 + 2017 http://rosette-api.github.io/rosette-elasticsearch-plugin @@ -41,18 +40,15 @@ - 1.8 - ${jdk.version} - ${jdk.version} - 3.2.0 - 7.17.0 - 2.17.1 - 3.0.0-M3 - 1.6 - 3.3.0 - - https://s01.oss.sonatype.org - 1.20.0 + 17 + 17 + 8.15.0 + 2.23.1 + 3.5.0 + 3.5.0 + 3.2.7 + 3.10.0 + https://issues.sonatype.org/browse/OSSRH-76262 plugin @@ -60,26 +56,11 @@ - - com.basistech.rosette - rosette-api - ${rosette.api.binding.version} - org.elasticsearch elasticsearch ${elasticsearch.version} - - org.apache.logging.log4j - log4j-api - ${log4j.version} - - - org.apache.logging.log4j - log4j-core - ${log4j.version} - org.elasticsearch.test framework @@ -96,35 +77,12 @@ install - - org.apache.maven.plugins - maven-enforcer-plugin - ${maven-enforcer-plugin.version} - - - enforce-maven - - enforce - - - - - 3.6.3 - - - - - - org.apache.maven.plugins maven-compiler-plugin utf-8 - ${jdk.version} - ${jdk.version} - true - true + ${maven.compiler.target} @@ -132,7 +90,7 @@ maven-javadoc-plugin ${maven-javadoc-plugin.version} - ${jdk.version} + ${maven.compiler.source} true @@ -209,7 +167,7 @@ maven-javadoc-plugin ${maven-javadoc-plugin.version} - ${jdk.version} + ${maven.compiler.source} true diff --git a/tools/release.sh b/tools/release.sh index 2a44f9a..3657300 100755 --- a/tools/release.sh +++ b/tools/release.sh @@ -1,5 +1,20 @@ #!/usr/bin/env bash +################################################################################ +# This data and information is proprietary to, and a valuable trade secret +# of, Basis Technology Corp. It is given in confidence by Basis Technology +# and may only be used as permitted under the license agreement under which +# it has been distributed, and in no other way. +# +# Copyright (c) 2024 Basis Technology Corporation All rights reserved. +# +# The technical data and information provided herein are provided with +# `limited rights', and the computer software provided herein is provided +# with `restricted rights' as those terms are defined in DAR and ASPR +# 7-104.9(a). +# +################################################################################ + #Expects two arguments: #- ELASTIC_VERSION: The version of elastic for which you are building the plugin #- GITHUB_ACCESS_TOKEN: Personal OAuth access token with full repo permissions.