Skip to content

Commit

Permalink
Added AI translation quality metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
maallen committed Nov 15, 2024
1 parent 7234882 commit a41399e
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 0 deletions.
6 changes: 6 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@
<version>${icu4j.version}</version>
</dependency>

<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-text</artifactId>
<version>1.12.0</version>
</dependency>

</dependencies>

<!-- SCM setup to push changes to the Github repo on release -->
Expand Down
74 changes: 74 additions & 0 deletions webapp/src/main/java/com/box/l10n/mojito/service/tm/TMService.java
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
import com.google.common.base.Preconditions;
import com.ibm.icu.text.MessageFormat;
import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.Tags;
import io.micrometer.core.instrument.Timer;
import jakarta.persistence.EntityManager;
import java.io.ByteArrayOutputStream;
Expand All @@ -79,6 +80,7 @@
import net.sf.okapi.steps.common.FilterEventsWriterStep;
import net.sf.okapi.steps.common.RawDocumentToFilterEventsStep;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.text.similarity.LevenshteinDistance;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
Expand Down Expand Up @@ -150,6 +152,15 @@ public class TMService {
@Value("${l10n.tmService.quartz.schedulerName:" + DEFAULT_SCHEDULER_NAME + "}")
String schedulerName;

@Value("${l10n.ai.translation.similarity.review.editDistanceMax:100}")
int editDistanceMax;

@Value("${l10n.ai.translation.review.similarity.highPercentage:90}")
int aiTranslationSimilarityHighPercentage;

@Value("${l10n.ai.translation.review.similarity.mediumPercentage:70}")
int aiTranslationSimilarityMediumPercentage;

/**
* Adds a {@link TMTextUnit} in a {@link TM}.
*
Expand Down Expand Up @@ -608,6 +619,11 @@ public AddTMTextUnitCurrentVariantResult addTMTextUnitCurrentVariantWithResult(
boolean overridden =
checkOverridden
&& currentTmTextUnitVariant.getStatus() == TMTextUnitVariant.Status.OVERRIDDEN;
if (currentTmTextUnitVariant.getStatus() == TMTextUnitVariant.Status.MT_REVIEW_NEEDED
&& status == TMTextUnitVariant.Status.APPROVED) {

logAiReviewMetrics(content, currentTmTextUnitVariant);
}
boolean updateNeeded =
!overridden
&& isUpdateNeededForTmTextUnitVariant(
Expand Down Expand Up @@ -653,6 +669,64 @@ && isUpdateNeededForTmTextUnitVariant(
return new AddTMTextUnitCurrentVariantResult(!noUpdate, tmTextUnitCurrentVariant);
}

private void logAiReviewMetrics(
String reviewedTranslation, TMTextUnitVariant currentTmTextUnitVariant) {
if (currentTmTextUnitVariant.getContent().equals(reviewedTranslation)) {
meterRegistry
.counter(
"AiTranslation.review.similarity.match",
Tags.of("locale", currentTmTextUnitVariant.getLocale().getBcp47Tag()))
.increment();
} else {
// Translation has been updated in review, check similarity of original to new
logSimilarityMetrics(reviewedTranslation, currentTmTextUnitVariant);
}
}

private void logSimilarityMetrics(
String reviewedTranslation, TMTextUnitVariant currentTmTextUnitVariant) {
LevenshteinDistance levenshteinDistance = new LevenshteinDistance(editDistanceMax);
int editDistance =
levenshteinDistance.apply(currentTmTextUnitVariant.getContent(), reviewedTranslation);
if (editDistance < 0) {
// Negative edit distance means the edit distance threshold was exceeded, log as low
// similarity
meterRegistry
.counter(
"AiTranslation.review.similarity.low",
Tags.of("locale", currentTmTextUnitVariant.getLocale().getBcp47Tag()))
.increment();
} else {
double similarityPercentage =
calculateSimilarityPercentage(
currentTmTextUnitVariant.getContent(), reviewedTranslation, editDistance);
if (similarityPercentage >= aiTranslationSimilarityHighPercentage) {
meterRegistry
.counter(
"AiTranslation.review.similarity.high",
Tags.of("locale", currentTmTextUnitVariant.getLocale().getBcp47Tag()))
.increment();
} else if (similarityPercentage >= aiTranslationSimilarityMediumPercentage) {
meterRegistry
.counter(
"AiTranslation.review.similarity.medium",
Tags.of("locale", currentTmTextUnitVariant.getLocale().getBcp47Tag()))
.increment();
} else {
meterRegistry
.counter(
"AiTranslation.review.similarity.low",
Tags.of("locale", currentTmTextUnitVariant.getLocale().getBcp47Tag()))
.increment();
}
}
}

private double calculateSimilarityPercentage(String original, String updated, int editDistance) {
int maxLength = Math.max(original.length(), updated.length());
return ((double) (maxLength - editDistance) / maxLength) * 100;
}

public AddTMTextUnitCurrentVariantResult addTMTextUnitCurrentVariantWithResult(
TMTextUnitCurrentVariant tmTextUnitCurrentVariant,
Long tmId,
Expand Down

0 comments on commit a41399e

Please sign in to comment.