Skip to content

Commit

Permalink
Add more file metadata validation tests (apache#3774)
Browse files Browse the repository at this point in the history
This addresses part of apache#3766 and adds more tests to verify file metadata
validation works properly for columns that used StoredTabletFile
metadata. Tests include verifying old format is invalid, the json
format is correctly validated and rows are a row range
  • Loading branch information
cshannon authored Oct 1, 2023
1 parent eaa1384 commit 191ea25
Show file tree
Hide file tree
Showing 2 changed files with 167 additions and 50 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,14 @@ public static StoredTabletFile of(final Path path, Range range) {
private static TabletFileCq deserialize(String json) {
final TabletFileCqMetadataGson metadata =
gson.fromJson(Objects.requireNonNull(json), TabletFileCqMetadataGson.class);

// Check each field and provide better error messages if null as all fields should be set
Objects.requireNonNull(metadata.path, "Serialized StoredTabletFile path must not be null");
Objects.requireNonNull(metadata.startRow,
"Serialized StoredTabletFile range startRow must not be null");
Objects.requireNonNull(metadata.endRow,
"Serialized StoredTabletFile range endRow must not be null");

// Recreate the exact Range that was originally stored in Metadata. Stored ranges are originally
// constructed with inclusive/exclusive for the start and end key inclusivity settings.
// (Except for Ranges with no start/endkey as then the inclusivity flags do not matter)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,12 @@
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;

import java.lang.reflect.Method;
import java.net.URI;
import java.util.Base64;
import java.util.List;

import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
Expand Down Expand Up @@ -167,10 +170,7 @@ public void testBulkFileCheck() {
.of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new Range())
.getMetadataText(),
new DataFileValue(1, 1).encodeAsValue());
violations = mc.check(createEnv(), m);
assertNotNull(violations);
assertEquals(1, violations.size());
assertEquals(Short.valueOf((short) 8), violations.get(0));
assertViolation(mc, m, (short) 8);

// txid that throws exception
m = new Mutation(new Text("0;foo"));
Expand All @@ -184,10 +184,7 @@ public void testBulkFileCheck() {
.of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new Range())
.getMetadataText(),
new DataFileValue(1, 1).encodeAsValue());
violations = mc.check(createEnv(), m);
assertNotNull(violations);
assertEquals(1, violations.size());
assertEquals(Short.valueOf((short) 8), violations.get(0));
assertViolation(mc, m, (short) 8);

// active txid w/ file
m = new Mutation(new Text("0;foo"));
Expand All @@ -211,10 +208,7 @@ public void testBulkFileCheck() {
.of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new Range())
.getMetadataText(),
new Value("5"));
violations = mc.check(createEnv(), m);
assertNotNull(violations);
assertEquals(1, violations.size());
assertEquals(Short.valueOf((short) 8), violations.get(0));
assertViolation(mc, m, (short) 8);

// two active txids w/ files
m = new Mutation(new Text("0;foo"));
Expand All @@ -238,10 +232,7 @@ public void testBulkFileCheck() {
.of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile2"), new Range())
.getMetadataText(),
new DataFileValue(1, 1).encodeAsValue());
violations = mc.check(createEnv(), m);
assertNotNull(violations);
assertEquals(1, violations.size());
assertEquals(Short.valueOf((short) 8), violations.get(0));
assertViolation(mc, m, (short) 8);

// two files w/ one active txid
m = new Mutation(new Text("0;foo"));
Expand Down Expand Up @@ -285,10 +276,7 @@ public void testBulkFileCheck() {
.of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile2"), new Range())
.getMetadataText(),
new Value("5"));
violations = mc.check(createEnv(), m);
assertNotNull(violations);
assertEquals(1, violations.size());
assertEquals(Short.valueOf((short) 8), violations.get(0));
assertViolation(mc, m, (short) 8);

// active txid, mutation that looks like split
m = new Mutation(new Text("0;foo"));
Expand Down Expand Up @@ -349,11 +337,7 @@ public void testBulkFileCheck() {
.of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new Range())
.getMetadata().replace("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile", "/someFile")),
new Value("5"));
violations = mc.check(createEnv(), m);
assertNotNull(violations);
assertEquals(1, violations.size());
assertEquals(Short.valueOf((short) 9), violations.get(0));
assertNotNull(mc.getViolationDescription(violations.get(0)));
assertViolation(mc, m, (short) 9);

// Missing tables directory in path
m = new Mutation(new Text("0;foo"));
Expand All @@ -363,10 +347,7 @@ public void testBulkFileCheck() {
.getMetadata().replace("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile",
"hdfs://1.2.3.4/accumulo/2a/t-0003/someFile")),
new Value("5"));
violations = mc.check(createEnv(), m);
assertNotNull(violations);
assertEquals(1, violations.size());
assertEquals(Short.valueOf((short) 9), violations.get(0));
assertViolation(mc, m, (short) 9);

// No DataFileColumnFamily included
m = new Mutation(new Text("0;foo"));
Expand All @@ -375,11 +356,67 @@ public void testBulkFileCheck() {
.of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new Range())
.getMetadataText(),
new Value("5"));
violations = mc.check(createEnv(), m);
assertNotNull(violations);
assertEquals(1, violations.size());
assertEquals(Short.valueOf((short) 8), violations.get(0));
assertNotNull(mc.getViolationDescription(violations.get(0)));
assertViolation(mc, m, (short) 8);

// Bad Json - only path (old format) so should fail parsing
m = new Mutation(new Text("0;foo"));
m.put(BulkFileColumnFamily.NAME, new Text("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"),
new Value("5"));
assertViolation(mc, m, (short) 9);

// Bad Json - test startRow key is missing so validation should fail
// {"path":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","endRow":""}
m = new Mutation(new Text("0;foo"));
m.put(BulkFileColumnFamily.NAME,
new Text(
"{\"path\":\"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile\",\"endRow\":\"\"}"),
new Value("5"));
assertViolation(mc, m, (short) 9);

// Bad Json - test path key replaced with empty string so validation should fail
// {"":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","startRow":"","endRow":""}
m = new Mutation(new Text("0;foo"));
m.put(
BulkFileColumnFamily.NAME, new Text(StoredTabletFile
.serialize("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile").replace("path", "")),
new Value("5"));
assertViolation(mc, m, (short) 9);

// Bad Json - test path value missing
// {"path":"","startRow":"","endRow":""}
m = new Mutation(new Text("0;foo"));
m.put(BulkFileColumnFamily.NAME,
new Text(StoredTabletFile
.of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new Range())
.getMetadata().replaceFirst("\"path\":\".*\",\"startRow", "\"path\":\"\",\"startRow")),
new Value("5"));
assertViolation(mc, m, (short) 9);

// Bad Json - test startRow key replaced with empty string so validation should fail
// {"path":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","":"","endRow":""}
m = new Mutation(new Text("0;foo"));
m.put(BulkFileColumnFamily.NAME, new Text(StoredTabletFile
.serialize("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile").replace("startRow", "")),
new Value("5"));
assertViolation(mc, m, (short) 9);

// Bad Json - test endRow key missing so validation should fail
m = new Mutation(new Text("0;foo"));
m.put(
BulkFileColumnFamily.NAME, new Text(StoredTabletFile
.serialize("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile").replace("endRow", "")),
new Value("5"));
assertViolation(mc, m, (short) 9);

// Bad Json - endRow will be replaced with encoded row without the exclusive byte 0x00 which is
// required for an endRow so will fail validation
m = new Mutation(new Text("0;foo"));
m.put(BulkFileColumnFamily.NAME, new Text(StoredTabletFile
.of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new Range("a", "b"))
.getMetadata()
.replaceFirst("\"endRow\":\".*\"", "\"endRow\":\"" + encodeRowForMetadata("bad") + "\"")),
new Value("5"));
assertViolation(mc, m, (short) 9);

}

Expand All @@ -404,20 +441,67 @@ private void testFileMetadataValidation(Text columnFamily, Value value) {
.of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new Range())
.getMetadata().replace("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile", "/someFile")),
value);
violations = mc.check(createEnv(), m);
assertNotNull(violations);
assertEquals(1, violations.size());
assertEquals(Short.valueOf((short) 9), violations.get(0));
assertNotNull(mc.getViolationDescription(violations.get(0)));
assertViolation(mc, m, (short) 9);

// Bad Json - only path so should fail parsing
// Bad Json - only path (old format) so should fail parsing
m = new Mutation(new Text("0;foo"));
m.put(columnFamily, new Text("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), value);
violations = mc.check(createEnv(), m);
assertNotNull(violations);
assertEquals(1, violations.size());
assertEquals(Short.valueOf((short) 9), violations.get(0));
assertNotNull(mc.getViolationDescription(violations.get(0)));
assertViolation(mc, m, (short) 9);

// Bad Json - test path key replaced with empty string so validation should fail
// {"":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","startRow":"","endRow":""}
m = new Mutation(new Text("0;foo"));
m.put(
columnFamily, new Text(StoredTabletFile
.serialize("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile").replace("path", "")),
value);
assertViolation(mc, m, (short) 9);

// Bad Json - test path value missing
// {"path":"","startRow":"","endRow":""}
m = new Mutation(new Text("0;foo"));
m.put(columnFamily,
new Text(StoredTabletFile
.of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new Range())
.getMetadata().replaceFirst("\"path\":\".*\",\"startRow", "\"path\":\"\",\"startRow")),
value);
assertViolation(mc, m, (short) 9);

// Bad Json - test startRow key replaced with empty string so validation should fail
// {"path":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","":"","endRow":""}
m = new Mutation(new Text("0;foo"));
m.put(columnFamily, new Text(StoredTabletFile
.serialize("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile").replace("startRow", "")),
value);
assertViolation(mc, m, (short) 9);

// Bad Json - test startRow key is missing so validation should fail
// {"path":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","endRow":""}
m = new Mutation(new Text("0;foo"));
m.put(columnFamily,
new Text(
"{\"path\":\"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile\",\"endRow\":\"\"}"),
value);
assertViolation(mc, m, (short) 9);

// Bad Json - test endRow key replaced with empty string so validation should fail
// {"path":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","":"","endRow":""}
m = new Mutation(new Text("0;foo"));
m.put(
columnFamily, new Text(StoredTabletFile
.serialize("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile").replace("endRow", "")),
value);
assertViolation(mc, m, (short) 9);

// Bad Json - endRow will be replaced with encoded row without the exclusive byte 0x00 which is
// required for an endRow so this will fail validation
m = new Mutation(new Text("0;foo"));
m.put(columnFamily, new Text(StoredTabletFile
.of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new Range("a", "b"))
.getMetadata()
.replaceFirst("\"endRow\":\".*\"", "\"endRow\":\"" + encodeRowForMetadata("b") + "\"")),
value);
assertViolation(mc, m, (short) 9);

// Missing tables directory in path
m = new Mutation(new Text("0;foo"));
Expand All @@ -427,12 +511,9 @@ private void testFileMetadataValidation(Text columnFamily, Value value) {
.getMetadata().replace("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile",
"hdfs://1.2.3.4/accumulo/2a/t-0003/someFile")),
new DataFileValue(1, 1).encodeAsValue());
violations = mc.check(createEnv(), m);
assertNotNull(violations);
assertEquals(1, violations.size());
assertEquals(Short.valueOf((short) 9), violations.get(0));
assertViolation(mc, m, (short) 9);

// Should pass validation
// Should pass validation (inf range)
m = new Mutation(new Text("0;foo"));
m.put(columnFamily,
StoredTabletFile
Expand All @@ -442,7 +523,35 @@ private void testFileMetadataValidation(Text columnFamily, Value value) {
violations = mc.check(createEnv(), m);
assertNull(violations);

// Should pass validation with range set
m = new Mutation(new Text("0;foo"));
m.put(columnFamily, StoredTabletFile
.of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new Range("a", "b"))
.getMetadataText(), new DataFileValue(1, 1).encodeAsValue());
violations = mc.check(createEnv(), m);
assertNull(violations);

assertNotNull(mc.getViolationDescription((short) 9));
}

// Encode a row how it would appear in Json
private static String encodeRowForMetadata(String row) {
try {
Method method = StoredTabletFile.class.getDeclaredMethod("encodeRow", Key.class);
method.setAccessible(true);
return Base64.getUrlEncoder()
.encodeToString((byte[]) method.invoke(StoredTabletFile.class, new Key(row)));
} catch (Exception e) {
throw new RuntimeException(e);
}
}

private void assertViolation(MetadataConstraints mc, Mutation m, Short violation) {
List<Short> violations = mc.check(createEnv(), m);
assertNotNull(violations);
assertEquals(1, violations.size());
assertEquals(violation, violations.get(0));
assertNotNull(mc.getViolationDescription(violations.get(0)));
}

}

0 comments on commit 191ea25

Please sign in to comment.