Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added AI translation quality metrics #193

Merged
merged 6 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
<docker.compose.remove.volumes>true</docker.compose.remove.volumes>
<aspectj.version>1.9.21</aspectj.version>
<jackson.version>2.13.5</jackson.version>
<commons.text.version>1.12.0</commons.text.version>
</properties>

<dependencies>
Expand Down Expand Up @@ -80,6 +81,12 @@
<version>${icu4j.version}</version>
</dependency>

<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-text</artifactId>
<version>${commons.text.version}</version>
</dependency>

</dependencies>

<!-- SCM setup to push changes to the Github repo on release -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
import com.google.common.base.Preconditions;
import com.ibm.icu.text.MessageFormat;
import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.Tags;
import io.micrometer.core.instrument.Timer;
import jakarta.persistence.EntityManager;
import java.io.ByteArrayOutputStream;
Expand All @@ -79,6 +80,7 @@
import net.sf.okapi.steps.common.FilterEventsWriterStep;
import net.sf.okapi.steps.common.RawDocumentToFilterEventsStep;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.text.similarity.LevenshteinDistance;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
Expand Down Expand Up @@ -150,6 +152,15 @@ public class TMService {
@Value("${l10n.tmService.quartz.schedulerName:" + DEFAULT_SCHEDULER_NAME + "}")
String schedulerName;

@Value("${l10n.ai.translation.review.similarity.editDistanceMax:50}")
int editDistanceMax;

@Value("${l10n.ai.translation.review.similarity.highPercentage:90}")
int aiTranslationSimilarityHighPercentage;

@Value("${l10n.ai.translation.review.similarity.mediumPercentage:70}")
int aiTranslationSimilarityMediumPercentage;

/**
* Adds a {@link TMTextUnit} in a {@link TM}.
*
Expand Down Expand Up @@ -608,6 +619,10 @@ public AddTMTextUnitCurrentVariantResult addTMTextUnitCurrentVariantWithResult(
boolean overridden =
checkOverridden
&& currentTmTextUnitVariant.getStatus() == TMTextUnitVariant.Status.OVERRIDDEN;
if (currentTmTextUnitVariant.getStatus() == TMTextUnitVariant.Status.MT_REVIEW_NEEDED
&& status == TMTextUnitVariant.Status.APPROVED) {
logAiReviewMetrics(content, currentTmTextUnitVariant, localeId);
}
boolean updateNeeded =
!overridden
&& isUpdateNeededForTmTextUnitVariant(
Expand Down Expand Up @@ -653,6 +668,64 @@ && isUpdateNeededForTmTextUnitVariant(
return new AddTMTextUnitCurrentVariantResult(!noUpdate, tmTextUnitCurrentVariant);
}

private void logAiReviewMetrics(
String reviewedTranslation, TMTextUnitVariant currentTmTextUnitVariant, Long localeId) {
if (currentTmTextUnitVariant.getContent().equals(reviewedTranslation)) {
meterRegistry
.counter(
"AiTranslation.review.similarity.match",
Tags.of("locale", localeService.findById(localeId).getBcp47Tag()))
.increment();
} else {
// Translation has been updated in review, check similarity of original to new
logSimilarityMetrics(reviewedTranslation, currentTmTextUnitVariant, localeId);
}
}

private void logSimilarityMetrics(
String reviewedTranslation, TMTextUnitVariant currentTmTextUnitVariant, Long localeId) {
LevenshteinDistance levenshteinDistance = new LevenshteinDistance(editDistanceMax);
int editDistance =
levenshteinDistance.apply(currentTmTextUnitVariant.getContent(), reviewedTranslation);
if (editDistance < 0) {
// Negative edit distance means the edit distance threshold was exceeded, log as low
// similarity
meterRegistry
.counter(
"AiTranslation.review.similarity.low",
Tags.of("locale", localeService.findById(localeId).getBcp47Tag()))
.increment();
} else {
double similarityPercentage =
calculateSimilarityPercentage(
currentTmTextUnitVariant.getContent(), reviewedTranslation, editDistance);
if (similarityPercentage >= aiTranslationSimilarityHighPercentage) {
meterRegistry
.counter(
"AiTranslation.review.similarity.high",
Tags.of("locale", localeService.findById(localeId).getBcp47Tag()))
.increment();
} else if (similarityPercentage >= aiTranslationSimilarityMediumPercentage) {
meterRegistry
.counter(
"AiTranslation.review.similarity.medium",
Tags.of("locale", localeService.findById(localeId).getBcp47Tag()))
.increment();
} else {
meterRegistry
.counter(
"AiTranslation.review.similarity.low",
Tags.of("locale", localeService.findById(localeId).getBcp47Tag()))
.increment();
}
}
}

private double calculateSimilarityPercentage(String original, String updated, int editDistance) {
int maxLength = Math.max(original.length(), updated.length());
return ((double) (maxLength - editDistance) / maxLength) * 100;
}

public AddTMTextUnitCurrentVariantResult addTMTextUnitCurrentVariantWithResult(
TMTextUnitCurrentVariant tmTextUnitCurrentVariant,
Long tmId,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@
import com.google.common.base.Function;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.Lists;
import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.Tags;
import io.micrometer.core.instrument.simple.SimpleMeterRegistry;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
Expand All @@ -63,6 +66,7 @@
import org.junit.Ignore;
import org.junit.Rule;
import org.junit.Test;
import org.mockito.Mockito;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
Expand Down Expand Up @@ -4808,4 +4812,234 @@ public void testAddTMTextUnitWithOverriddenStatus() throws RepositoryNameAlready
assertEquals("this is the newest content", textUnitDTOFromSearch.getTarget());
assertEquals(TMTextUnitVariant.Status.APPROVED, textUnitDTOFromSearch.getStatus());
}

@Test
public void testMTReviewMetricsLoggingTranslationUpdatedMediumSimilarity()
throws RepositoryNameAlreadyUsedException {
MeterRegistry meterRegistry = Mockito.spy(new SimpleMeterRegistry());
this.tmService.meterRegistry = meterRegistry;
createTestData();

Long textUnitId =
addTextUnitAndCheck(
this.tmId,
this.assetId,
"mtReviewMetricsLogging",
"mt translation content",
"some comment",
"3212c3beb09db681379b7a1ed9f37bfe",
"5f3ca19eb49f50b55326065f4185dadd");

Locale targetLocale = this.localeService.findByBcp47Tag("fr-FR");

TMTextUnitCurrentVariant tmTextUnitCurrentVariant =
this.tmService.addTMTextUnitCurrentVariant(
textUnitId,
targetLocale.getId(),
"mt translation content",
"some comment",
TMTextUnitVariant.Status.MT_REVIEW_NEEDED,
false);

this.tmService.addTMTextUnitCurrentVariantWithResult(
tmTextUnitCurrentVariant,
this.tmId,
this.assetId,
textUnitId,
tmTextUnitCurrentVariant.getLocale().getId(),
"mt translation content changed",
"some comment",
TMTextUnitVariant.Status.APPROVED,
true,
JSR310Migration.dateTimeNow(),
null,
false);

Mockito.verify(meterRegistry, Mockito.times(1))
.counter("AiTranslation.review.similarity.medium", Tags.of("locale", "fr-FR"));
}

@Test
public void testMTReviewMetricsLoggingTranslationUpdatedHighSimilarity()
throws RepositoryNameAlreadyUsedException {
MeterRegistry meterRegistry = Mockito.spy(new SimpleMeterRegistry());
this.tmService.meterRegistry = meterRegistry;
createTestData();

Long textUnitId =
addTextUnitAndCheck(
this.tmId,
this.assetId,
"mtReviewMetricsLogging",
"mt translation content",
"some comment",
"3212c3beb09db681379b7a1ed9f37bfe",
"5f3ca19eb49f50b55326065f4185dadd");

Locale targetLocale = this.localeService.findByBcp47Tag("fr-FR");

TMTextUnitCurrentVariant tmTextUnitCurrentVariant =
this.tmService.addTMTextUnitCurrentVariant(
textUnitId,
targetLocale.getId(),
"mt translation content",
"some comment",
TMTextUnitVariant.Status.MT_REVIEW_NEEDED,
false);

this.tmService.addTMTextUnitCurrentVariantWithResult(
tmTextUnitCurrentVariant,
this.tmId,
this.assetId,
textUnitId,
tmTextUnitCurrentVariant.getLocale().getId(),
"mt translations content",
"some comment",
TMTextUnitVariant.Status.APPROVED,
true,
JSR310Migration.dateTimeNow(),
null,
false);

Mockito.verify(meterRegistry, Mockito.times(1))
.counter("AiTranslation.review.similarity.high", Tags.of("locale", "fr-FR"));
}

@Test
public void testMTReviewMetricsLoggingTranslationUpdatedLowSimilarity()
throws RepositoryNameAlreadyUsedException {
MeterRegistry meterRegistry = Mockito.spy(new SimpleMeterRegistry());
this.tmService.meterRegistry = meterRegistry;
createTestData();

Long textUnitId =
addTextUnitAndCheck(
this.tmId,
this.assetId,
"mtReviewMetricsLogging",
"mt translation content",
"some comment",
"3212c3beb09db681379b7a1ed9f37bfe",
"5f3ca19eb49f50b55326065f4185dadd");

Locale targetLocale = this.localeService.findByBcp47Tag("fr-FR");

TMTextUnitCurrentVariant tmTextUnitCurrentVariant =
this.tmService.addTMTextUnitCurrentVariant(
textUnitId,
targetLocale.getId(),
"mt translation content",
"some comment",
TMTextUnitVariant.Status.MT_REVIEW_NEEDED,
false);

this.tmService.addTMTextUnitCurrentVariantWithResult(
tmTextUnitCurrentVariant,
this.tmId,
this.assetId,
textUnitId,
tmTextUnitCurrentVariant.getLocale().getId(),
"completely different",
"some comment",
TMTextUnitVariant.Status.APPROVED,
true,
JSR310Migration.dateTimeNow(),
null,
false);

Mockito.verify(meterRegistry, Mockito.times(1))
.counter("AiTranslation.review.similarity.low", Tags.of("locale", "fr-FR"));
}

@Test
public void testMTReviewMetricsLoggingTranslationMatch()
throws RepositoryNameAlreadyUsedException {
MeterRegistry meterRegistry = Mockito.spy(new SimpleMeterRegistry());
this.tmService.meterRegistry = meterRegistry;
createTestData();

Long textUnitId =
addTextUnitAndCheck(
this.tmId,
this.assetId,
"mtReviewMetricsLogging",
"mt translation content",
"some comment",
"3212c3beb09db681379b7a1ed9f37bfe",
"5f3ca19eb49f50b55326065f4185dadd");

Locale targetLocale = this.localeService.findByBcp47Tag("fr-FR");

TMTextUnitCurrentVariant tmTextUnitCurrentVariant =
this.tmService.addTMTextUnitCurrentVariant(
textUnitId,
targetLocale.getId(),
"mt translation content",
"some comment",
TMTextUnitVariant.Status.MT_REVIEW_NEEDED,
false);

this.tmService.addTMTextUnitCurrentVariantWithResult(
tmTextUnitCurrentVariant,
this.tmId,
this.assetId,
textUnitId,
tmTextUnitCurrentVariant.getLocale().getId(),
"mt translation content",
"some comment",
TMTextUnitVariant.Status.APPROVED,
true,
JSR310Migration.dateTimeNow(),
null,
false);

Mockito.verify(meterRegistry, Mockito.times(1))
.counter("AiTranslation.review.similarity.match", Tags.of("locale", "fr-FR"));
}

@Test
public void testMTReviewMetricsLoggingTranslationNotApproved()
throws RepositoryNameAlreadyUsedException {
MeterRegistry meterRegistry = Mockito.spy(new SimpleMeterRegistry());
this.tmService.meterRegistry = meterRegistry;
createTestData();

Long textUnitId =
addTextUnitAndCheck(
this.tmId,
this.assetId,
"mtReviewMetricsLogging",
"mt translation content",
"some comment",
"3212c3beb09db681379b7a1ed9f37bfe",
"5f3ca19eb49f50b55326065f4185dadd");

Locale targetLocale = this.localeService.findByBcp47Tag("fr-FR");

TMTextUnitCurrentVariant tmTextUnitCurrentVariant =
this.tmService.addTMTextUnitCurrentVariant(
textUnitId,
targetLocale.getId(),
"mt translation content",
"some comment",
TMTextUnitVariant.Status.MT_REVIEW_NEEDED,
false);

this.tmService.addTMTextUnitCurrentVariantWithResult(
tmTextUnitCurrentVariant,
this.tmId,
this.assetId,
textUnitId,
tmTextUnitCurrentVariant.getLocale().getId(),
"mt translation content",
"some comment",
TMTextUnitVariant.Status.REVIEW_NEEDED,
true,
JSR310Migration.dateTimeNow(),
null,
false);

Mockito.verify(meterRegistry, Mockito.times(0))
.counter("AiTranslation.review.similarity.match", Tags.of("locale", "fr-FR"));
}
}