diff --git a/pom.xml b/pom.xml index 60dc1473a2..f0889f2a0a 100644 --- a/pom.xml +++ b/pom.xml @@ -80,6 +80,12 @@ ${icu4j.version} + + org.apache.commons + commons-text + 1.12.0 + + diff --git a/webapp/src/main/java/com/box/l10n/mojito/service/tm/TMService.java b/webapp/src/main/java/com/box/l10n/mojito/service/tm/TMService.java index 5bb8e238dc..9f20aef549 100644 --- a/webapp/src/main/java/com/box/l10n/mojito/service/tm/TMService.java +++ b/webapp/src/main/java/com/box/l10n/mojito/service/tm/TMService.java @@ -61,6 +61,7 @@ import com.google.common.base.Preconditions; import com.ibm.icu.text.MessageFormat; import io.micrometer.core.instrument.MeterRegistry; +import io.micrometer.core.instrument.Tags; import io.micrometer.core.instrument.Timer; import jakarta.persistence.EntityManager; import java.io.ByteArrayOutputStream; @@ -79,6 +80,7 @@ import net.sf.okapi.steps.common.FilterEventsWriterStep; import net.sf.okapi.steps.common.RawDocumentToFilterEventsStep; import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.text.similarity.LevenshteinDistance; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -150,6 +152,15 @@ public class TMService { @Value("${l10n.tmService.quartz.schedulerName:" + DEFAULT_SCHEDULER_NAME + "}") String schedulerName; + @Value("${l10n.ai.translation.similarity.review.editDistanceMax:100}") + int editDistanceMax; + + @Value("${l10n.ai.translation.review.similarity.highPercentage:90}") + int aiTranslationSimilarityHighPercentage; + + @Value("${l10n.ai.translation.review.similarity.mediumPercentage:70}") + int aiTranslationSimilarityMediumPercentage; + /** * Adds a {@link TMTextUnit} in a {@link TM}. * @@ -608,6 +619,11 @@ public AddTMTextUnitCurrentVariantResult addTMTextUnitCurrentVariantWithResult( boolean overridden = checkOverridden && currentTmTextUnitVariant.getStatus() == TMTextUnitVariant.Status.OVERRIDDEN; + if (currentTmTextUnitVariant.getStatus() == TMTextUnitVariant.Status.MT_REVIEW_NEEDED + && status == TMTextUnitVariant.Status.APPROVED) { + + logAiReviewMetrics(content, currentTmTextUnitVariant); + } boolean updateNeeded = !overridden && isUpdateNeededForTmTextUnitVariant( @@ -653,6 +669,64 @@ && isUpdateNeededForTmTextUnitVariant( return new AddTMTextUnitCurrentVariantResult(!noUpdate, tmTextUnitCurrentVariant); } + private void logAiReviewMetrics( + String reviewedTranslation, TMTextUnitVariant currentTmTextUnitVariant) { + if (currentTmTextUnitVariant.getContent().equals(reviewedTranslation)) { + meterRegistry + .counter( + "AiTranslation.review.similarity.match", + Tags.of("locale", currentTmTextUnitVariant.getLocale().getBcp47Tag())) + .increment(); + } else { + // Translation has been updated in review, check similarity of original to new + logSimilarityMetrics(reviewedTranslation, currentTmTextUnitVariant); + } + } + + private void logSimilarityMetrics( + String reviewedTranslation, TMTextUnitVariant currentTmTextUnitVariant) { + LevenshteinDistance levenshteinDistance = new LevenshteinDistance(editDistanceMax); + int editDistance = + levenshteinDistance.apply(currentTmTextUnitVariant.getContent(), reviewedTranslation); + if (editDistance < 0) { + // Negative edit distance means the edit distance threshold was exceeded, log as low + // similarity + meterRegistry + .counter( + "AiTranslation.review.similarity.low", + Tags.of("locale", currentTmTextUnitVariant.getLocale().getBcp47Tag())) + .increment(); + } else { + double similarityPercentage = + calculateSimilarityPercentage( + currentTmTextUnitVariant.getContent(), reviewedTranslation, editDistance); + if (similarityPercentage >= aiTranslationSimilarityHighPercentage) { + meterRegistry + .counter( + "AiTranslation.review.similarity.high", + Tags.of("locale", currentTmTextUnitVariant.getLocale().getBcp47Tag())) + .increment(); + } else if (similarityPercentage >= aiTranslationSimilarityMediumPercentage) { + meterRegistry + .counter( + "AiTranslation.review.similarity.medium", + Tags.of("locale", currentTmTextUnitVariant.getLocale().getBcp47Tag())) + .increment(); + } else { + meterRegistry + .counter( + "AiTranslation.review.similarity.low", + Tags.of("locale", currentTmTextUnitVariant.getLocale().getBcp47Tag())) + .increment(); + } + } + } + + private double calculateSimilarityPercentage(String original, String updated, int editDistance) { + int maxLength = Math.max(original.length(), updated.length()); + return ((double) (maxLength - editDistance) / maxLength) * 100; + } + public AddTMTextUnitCurrentVariantResult addTMTextUnitCurrentVariantWithResult( TMTextUnitCurrentVariant tmTextUnitCurrentVariant, Long tmId,