Skip to content

Commit

Permalink
Merge pull request #753 from kusumotolab/fix-patch-encoding
Browse files Browse the repository at this point in the history
patchのエンコーディングのバグを修正
  • Loading branch information
tt-kuma authored Sep 7, 2020
2 parents 6e40150 + aa8713a commit 199d910
Show file tree
Hide file tree
Showing 10 changed files with 243 additions and 4 deletions.
1 change: 1 addition & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ dependencies {
implementation 'org.eclipse.platform:org.eclipse.text:3.8.0'
implementation 'org.jacoco:org.jacoco.core:0.8.1'
implementation 'org.slf4j:slf4j-api:1.7.25'
implementation 'com.github.albfernandez:juniversalchardet:2.3.2'

testImplementation 'org.assertj:assertj-core:3.10.0'
testImplementation 'org.mockito:mockito-core:2.+'
Expand Down
19 changes: 19 additions & 0 deletions example/BuildSuccess24/src/example/Foo.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package example;

public class Foo {

/**
* 2つの整数のうち大きい整数を返す
*
* @param n 整数
* @param m 整数
* @return n, mのうち大きい整数
*/
public int max(int n, int m) {
if (n < m) {
return m;
} else {
return n;
}
}
}
19 changes: 19 additions & 0 deletions example/BuildSuccess25/src/example/Foo.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package example;

public class Foo {

/**
* 2つの整数のうち大きい整数を返す
*
* @param n 整数
* @param m 整数
* @return n, mのうち大きい整数
*/
public int max(int n, int m) {
if (n < m) {
return m;
} else {
return n;
}
}
}
12 changes: 12 additions & 0 deletions example/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,18 @@ resourceファイルへのアクセスを試す題材.
2実装クラスと1テストクラス.
クラスローダ経由でのバイナリresourceのアクセスを試す題材.

### BuildSuccess23
1実装クラスと1テストクラス.
クラスローダの委譲処理が適切にスキップされるかどうかを試す題材.

### BuildSuccess24
日本語が含まれているコード.エンコーディングはUTF-8.
パッチにエンコーディングの違いが含まれていないかを試す題材.

### BuildSuccess25
日本語が含まれているコード.エンコーディングはShift-JIS.
内容は`BuildSuccess24`と同じ.

# CloseToZero
APR用の題材.
整数をゼロに一つ近づけるメソッド`close_to_zero(n)`の修正を試みる.
Expand Down
33 changes: 33 additions & 0 deletions src/main/java/jp/kusumotolab/kgenprog/CharsetDetector.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package jp.kusumotolab.kgenprog;

import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Path;
import org.mozilla.universalchardet.UniversalDetector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* ファイルの文字コードを調べるクラス
*/
public class CharsetDetector {

private final static Logger log = LoggerFactory.getLogger(CharsetDetector.class);

/**
* 与えられたファイルの文字コードを調べる.
* 文字コードがわからなかった場合はデフォルトの文字コードを返す
*
* @param path 文字コードを調べたいファイル
* @return ファイルの文字コード
*/
public Charset detect(final Path path) {
try {
final String charsetName = UniversalDetector.detectCharset(path);
return charsetName != null ? Charset.forName(charsetName) : Charset.defaultCharset();
} catch (final IOException e) {
log.error(e.getMessage(), e);
return Charset.defaultCharset();
}
}
}
17 changes: 17 additions & 0 deletions src/main/java/jp/kusumotolab/kgenprog/output/FileDiff.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package jp.kusumotolab.kgenprog.output;

import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.stream.Collectors;

Expand Down Expand Up @@ -46,4 +48,19 @@ public String toString() {
return diff.stream()
.collect(Collectors.joining(System.lineSeparator()));
}

/**
* デフォルトエンコーディングに変換したdiffを返す
*/
public String toStringWithDefaultEncoding() {
final Charset defaultEncoding = Charset.defaultCharset();
if (defaultEncoding.equals(StandardCharsets.UTF_8)) {
return toString();
} else {
return diff.stream()
.map(e -> e.getBytes(defaultEncoding))
.map(e -> new String(e, defaultEncoding))
.collect(Collectors.joining(System.lineSeparator()));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ public void export(final VariantStore variantStore) {
private void writeLog(final Patch patch) {
patch.getFileDiffs()
.forEach(fd -> log.info(String.format("patch (v%d)%s%s",
patch.getVariantId(), System.lineSeparator(), fd))
patch.getVariantId(), System.lineSeparator(), fd.toStringWithDefaultEncoding()))
);
}

Expand Down
16 changes: 15 additions & 1 deletion src/main/java/jp/kusumotolab/kgenprog/output/PatchGenerator.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package jp.kusumotolab.kgenprog.output;

import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Collections;
Expand All @@ -20,6 +22,7 @@
import com.github.difflib.DiffUtils;
import com.github.difflib.UnifiedDiffUtils;
import com.github.difflib.algorithm.DiffException;
import jp.kusumotolab.kgenprog.CharsetDetector;
import jp.kusumotolab.kgenprog.ga.variant.Variant;
import jp.kusumotolab.kgenprog.project.GeneratedAST;

Expand Down Expand Up @@ -57,8 +60,19 @@ private List<String> parseModifiedSource(final GeneratedAST<?> ast) {
private List<String> readOriginalSource(final GeneratedAST<?> ast) {
final Path originalPath = ast.getSourcePath()
.getResolvedPath();

try {
return Files.readAllLines(originalPath);
final CharsetDetector detector = new CharsetDetector();
final Charset charset = detector.detect(originalPath);
if (charset.equals(StandardCharsets.UTF_8)) {
return Files.readAllLines(originalPath, StandardCharsets.UTF_8);
} else {
return Files.readAllLines(originalPath, charset)
.stream()
.map(e -> e.getBytes(StandardCharsets.UTF_8))
.map(e -> new String(e, StandardCharsets.UTF_8))
.collect(Collectors.toList());
}
} catch (final IOException e) {
log.error(e.getMessage(), e);
return Collections.emptyList();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package jp.kusumotolab.kgenprog.project.jdt;

import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
Expand All @@ -17,6 +19,7 @@
import org.eclipse.jdt.core.dom.CompilationUnit;
import org.eclipse.jdt.core.dom.FileASTRequestor;
import org.eclipse.jdt.core.formatter.DefaultCodeFormatterConstants;
import jp.kusumotolab.kgenprog.CharsetDetector;
import jp.kusumotolab.kgenprog.project.GeneratedAST;
import jp.kusumotolab.kgenprog.project.GeneratedSourceCode;
import jp.kusumotolab.kgenprog.project.GenerationFailedSourceCode;
Expand Down Expand Up @@ -107,7 +110,10 @@ public static ASTParser createNewParser() {

private String loadAsString(final String path) {
try {
return new String(Files.readAllBytes(Paths.get(path)));
final CharsetDetector detector = new CharsetDetector();
final Charset charset = detector.detect(Paths.get(path));
final String code = Files.readString(Paths.get(path), charset);
return new String(code.getBytes(StandardCharsets.UTF_8), StandardCharsets.UTF_8);
} catch (final IOException e) {
throw new RuntimeException(e);
}
Expand Down
120 changes: 119 additions & 1 deletion src/test/java/jp/kusumotolab/kgenprog/output/PatchGeneratorTest.java
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
package jp.kusumotolab.kgenprog.output;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatCode;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.when;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import org.junit.Test;
import org.slf4j.LoggerFactory;
import ch.qos.logback.classic.Logger;
import ch.qos.logback.classic.spi.ILoggingEvent;
import ch.qos.logback.core.read.ListAppender;
import jp.kusumotolab.kgenprog.ga.variant.Variant;
import jp.kusumotolab.kgenprog.ga.variant.VariantStore;
import jp.kusumotolab.kgenprog.project.GeneratedAST;
Expand All @@ -22,6 +27,8 @@

public class PatchGeneratorTest {

private static final Charset SHIFT_JIS = Charset.forName("shift-jis");

@Test
public void testExport() throws IOException {
final Path outdir = TestUtil.createVirtualDir();
Expand Down Expand Up @@ -151,13 +158,124 @@ public void testExportWithPreviousResults() throws IOException {
assertThat(true).isTrue(); // to prevent smoke test in sonarlint
}

@Test
public void testPatchGenerationWithShiftJISEncodedSourceCode() throws IOException {
final Logger logger = (Logger) LoggerFactory.getLogger(PatchExporter.class);
ListAppender<ILoggingEvent> listAppender = new ListAppender<>();
listAppender.start();
logger.addAppender(listAppender);

final Path outdir = TestUtil.createVirtualDir();
final GeneratedSourceCode base = createGeneratedSourceCodeFromShiftJISEncodedFiles();

final Variant variant = createModifiedVariant(base, "Foo.java", "return m;", "return n;", 0);
final VariantStore variantStore = createMockedVariantStore(variant);

final Exporter patchExporter = new PatchExporter(outdir);
patchExporter.export(variantStore);

assertThat(outdir.resolve("patch-v0")).exists();

final String diff = Files.readString(outdir.resolve("patch-v0/example.Foo.diff"),
StandardCharsets.UTF_8);
// エンコーディングの違いによるdiffがないか確かめる
assertThat(diff)
.doesNotContain(
new String("2つの整数のうち大きい整数を返す".getBytes(SHIFT_JIS), StandardCharsets.UTF_8))
.doesNotContain(
new String("整数".getBytes(SHIFT_JIS), StandardCharsets.UTF_8))
.doesNotContain(
new String("n, mのうち大きい整数".getBytes(SHIFT_JIS), StandardCharsets.UTF_8))
.contains("public int max")
.containsPattern("- +return m;")
.containsPattern("\\+ +return n;");

// logの確認
final List<ILoggingEvent> logs = listAppender.list;
final String message = logs.get(0)
.getMessage();
assertThat(message)
.doesNotContain(
new String("2つの整数のうち大きい整数を返す".getBytes(StandardCharsets.UTF_8),
Charset.defaultCharset()))
.doesNotContain(
new String("整数".getBytes(StandardCharsets.UTF_8), Charset.defaultCharset()))
.doesNotContain(
new String("n, mのうち大きい整数".getBytes(StandardCharsets.UTF_8), Charset.defaultCharset()))
.contains("public int max")
.containsPattern("- +return m;")
.containsPattern("\\+ +return n;");
}

@Test
public void testPatchGenerationWithUTF8EncodedSourceCode() throws IOException {
final Logger logger = (Logger) LoggerFactory.getLogger(PatchExporter.class);
ListAppender<ILoggingEvent> listAppender = new ListAppender<>();
listAppender.start();
logger.addAppender(listAppender);

final Path outdir = TestUtil.createVirtualDir();
final GeneratedSourceCode base = createGeneratedSourceCodeFromUTF8EncodedFiles();

final Variant variant = createModifiedVariant(base, "Foo.java", "return m;", "return n;", 0);
final VariantStore variantStore = createMockedVariantStore(variant);

final Exporter patchExporter = new PatchExporter(outdir);
patchExporter.export(variantStore);

assertThat(outdir.resolve("patch-v0")).exists();

final String diff = Files.readString(outdir.resolve("patch-v0/example.Foo.diff"),
StandardCharsets.UTF_8);
// エンコーディングの違いによるdiffがないか確かめる
assertThat(diff)
.doesNotContain(
new String("2つの整数のうち大きい整数を返す".getBytes(Charset.defaultCharset()),
StandardCharsets.UTF_8))
.doesNotContain(
new String("整数".getBytes(Charset.defaultCharset()), StandardCharsets.UTF_8))
.doesNotContain(
new String("n, mのうち大きい整数".getBytes(Charset.defaultCharset()), StandardCharsets.UTF_8))
.contains("public int max")
.containsPattern("- +return m;")
.containsPattern("\\+ +return n;");

// logの確認
final List<ILoggingEvent> logs = listAppender.list;
final String message = logs.get(0)
.getMessage();
assertThat(message)
.doesNotContain(
new String("2つの整数のうち大きい整数を返す".getBytes(StandardCharsets.UTF_8),
Charset.defaultCharset()))
.doesNotContain(
new String("整数".getBytes(StandardCharsets.UTF_8), Charset.defaultCharset()))
.doesNotContain(
new String("n, mのうち大きい整数".getBytes(StandardCharsets.UTF_8), Charset.defaultCharset()))
.contains("public int max")
.containsPattern("- +return m;")
.containsPattern("\\+ +return n;");
}

private GeneratedSourceCode createGeneratedSourceCode() {
// setup original source from BS03
final Path rootPath = Paths.get("example/BuildSuccess03");
final TargetProject targetProject = TargetProjectFactory.create(rootPath);
return TestUtil.createGeneratedSourceCode(targetProject);
}

private GeneratedSourceCode createGeneratedSourceCodeFromUTF8EncodedFiles() {
final Path rootPath = Paths.get("example/BuildSuccess24");
final TargetProject targetProject = TargetProjectFactory.create(rootPath);
return TestUtil.createGeneratedSourceCode(targetProject);
}

private GeneratedSourceCode createGeneratedSourceCodeFromShiftJISEncodedFiles() {
final Path rootPath = Paths.get("example/BuildSuccess25");
final TargetProject targetProject = TargetProjectFactory.create(rootPath);
return TestUtil.createGeneratedSourceCode(targetProject);
}

@SuppressWarnings({"rawtypes", "unchecked"}) // suppress warnings for jdt constructions
private Variant createModifiedVariant(final GeneratedSourceCode baseSourceCode,
final String replacedFile,
Expand Down

0 comments on commit 199d910

Please sign in to comment.