diff --git a/core/src/main/java/org/apache/accumulo/core/metadata/StoredTabletFile.java b/core/src/main/java/org/apache/accumulo/core/metadata/StoredTabletFile.java index 68475ae65c3..7a69d4f6f85 100644 --- a/core/src/main/java/org/apache/accumulo/core/metadata/StoredTabletFile.java +++ b/core/src/main/java/org/apache/accumulo/core/metadata/StoredTabletFile.java @@ -177,6 +177,14 @@ public static StoredTabletFile of(final Path path, Range range) { private static TabletFileCq deserialize(String json) { final TabletFileCqMetadataGson metadata = gson.fromJson(Objects.requireNonNull(json), TabletFileCqMetadataGson.class); + + // Check each field and provide better error messages if null as all fields should be set + Objects.requireNonNull(metadata.path, "Serialized StoredTabletFile path must not be null"); + Objects.requireNonNull(metadata.startRow, + "Serialized StoredTabletFile range startRow must not be null"); + Objects.requireNonNull(metadata.endRow, + "Serialized StoredTabletFile range endRow must not be null"); + // Recreate the exact Range that was originally stored in Metadata. Stored ranges are originally // constructed with inclusive/exclusive for the start and end key inclusivity settings. // (Except for Ranges with no start/endkey as then the inclusivity flags do not matter) diff --git a/server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java b/server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java index 1aeadb6736d..f891a0b1322 100644 --- a/server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java +++ b/server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java @@ -22,9 +22,12 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; +import java.lang.reflect.Method; import java.net.URI; +import java.util.Base64; import java.util.List; +import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Mutation; import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.Value; @@ -167,10 +170,7 @@ public void testBulkFileCheck() { .of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new Range()) .getMetadataText(), new DataFileValue(1, 1).encodeAsValue()); - violations = mc.check(createEnv(), m); - assertNotNull(violations); - assertEquals(1, violations.size()); - assertEquals(Short.valueOf((short) 8), violations.get(0)); + assertViolation(mc, m, (short) 8); // txid that throws exception m = new Mutation(new Text("0;foo")); @@ -184,10 +184,7 @@ public void testBulkFileCheck() { .of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new Range()) .getMetadataText(), new DataFileValue(1, 1).encodeAsValue()); - violations = mc.check(createEnv(), m); - assertNotNull(violations); - assertEquals(1, violations.size()); - assertEquals(Short.valueOf((short) 8), violations.get(0)); + assertViolation(mc, m, (short) 8); // active txid w/ file m = new Mutation(new Text("0;foo")); @@ -211,10 +208,7 @@ public void testBulkFileCheck() { .of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new Range()) .getMetadataText(), new Value("5")); - violations = mc.check(createEnv(), m); - assertNotNull(violations); - assertEquals(1, violations.size()); - assertEquals(Short.valueOf((short) 8), violations.get(0)); + assertViolation(mc, m, (short) 8); // two active txids w/ files m = new Mutation(new Text("0;foo")); @@ -238,10 +232,7 @@ public void testBulkFileCheck() { .of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile2"), new Range()) .getMetadataText(), new DataFileValue(1, 1).encodeAsValue()); - violations = mc.check(createEnv(), m); - assertNotNull(violations); - assertEquals(1, violations.size()); - assertEquals(Short.valueOf((short) 8), violations.get(0)); + assertViolation(mc, m, (short) 8); // two files w/ one active txid m = new Mutation(new Text("0;foo")); @@ -285,10 +276,7 @@ public void testBulkFileCheck() { .of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile2"), new Range()) .getMetadataText(), new Value("5")); - violations = mc.check(createEnv(), m); - assertNotNull(violations); - assertEquals(1, violations.size()); - assertEquals(Short.valueOf((short) 8), violations.get(0)); + assertViolation(mc, m, (short) 8); // active txid, mutation that looks like split m = new Mutation(new Text("0;foo")); @@ -349,11 +337,7 @@ public void testBulkFileCheck() { .of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new Range()) .getMetadata().replace("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile", "/someFile")), new Value("5")); - violations = mc.check(createEnv(), m); - assertNotNull(violations); - assertEquals(1, violations.size()); - assertEquals(Short.valueOf((short) 9), violations.get(0)); - assertNotNull(mc.getViolationDescription(violations.get(0))); + assertViolation(mc, m, (short) 9); // Missing tables directory in path m = new Mutation(new Text("0;foo")); @@ -363,10 +347,7 @@ public void testBulkFileCheck() { .getMetadata().replace("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile", "hdfs://1.2.3.4/accumulo/2a/t-0003/someFile")), new Value("5")); - violations = mc.check(createEnv(), m); - assertNotNull(violations); - assertEquals(1, violations.size()); - assertEquals(Short.valueOf((short) 9), violations.get(0)); + assertViolation(mc, m, (short) 9); // No DataFileColumnFamily included m = new Mutation(new Text("0;foo")); @@ -375,11 +356,67 @@ public void testBulkFileCheck() { .of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new Range()) .getMetadataText(), new Value("5")); - violations = mc.check(createEnv(), m); - assertNotNull(violations); - assertEquals(1, violations.size()); - assertEquals(Short.valueOf((short) 8), violations.get(0)); - assertNotNull(mc.getViolationDescription(violations.get(0))); + assertViolation(mc, m, (short) 8); + + // Bad Json - only path (old format) so should fail parsing + m = new Mutation(new Text("0;foo")); + m.put(BulkFileColumnFamily.NAME, new Text("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), + new Value("5")); + assertViolation(mc, m, (short) 9); + + // Bad Json - test startRow key is missing so validation should fail + // {"path":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","endRow":""} + m = new Mutation(new Text("0;foo")); + m.put(BulkFileColumnFamily.NAME, + new Text( + "{\"path\":\"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile\",\"endRow\":\"\"}"), + new Value("5")); + assertViolation(mc, m, (short) 9); + + // Bad Json - test path key replaced with empty string so validation should fail + // {"":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","startRow":"","endRow":""} + m = new Mutation(new Text("0;foo")); + m.put( + BulkFileColumnFamily.NAME, new Text(StoredTabletFile + .serialize("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile").replace("path", "")), + new Value("5")); + assertViolation(mc, m, (short) 9); + + // Bad Json - test path value missing + // {"path":"","startRow":"","endRow":""} + m = new Mutation(new Text("0;foo")); + m.put(BulkFileColumnFamily.NAME, + new Text(StoredTabletFile + .of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new Range()) + .getMetadata().replaceFirst("\"path\":\".*\",\"startRow", "\"path\":\"\",\"startRow")), + new Value("5")); + assertViolation(mc, m, (short) 9); + + // Bad Json - test startRow key replaced with empty string so validation should fail + // {"path":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","":"","endRow":""} + m = new Mutation(new Text("0;foo")); + m.put(BulkFileColumnFamily.NAME, new Text(StoredTabletFile + .serialize("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile").replace("startRow", "")), + new Value("5")); + assertViolation(mc, m, (short) 9); + + // Bad Json - test endRow key missing so validation should fail + m = new Mutation(new Text("0;foo")); + m.put( + BulkFileColumnFamily.NAME, new Text(StoredTabletFile + .serialize("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile").replace("endRow", "")), + new Value("5")); + assertViolation(mc, m, (short) 9); + + // Bad Json - endRow will be replaced with encoded row without the exclusive byte 0x00 which is + // required for an endRow so will fail validation + m = new Mutation(new Text("0;foo")); + m.put(BulkFileColumnFamily.NAME, new Text(StoredTabletFile + .of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new Range("a", "b")) + .getMetadata() + .replaceFirst("\"endRow\":\".*\"", "\"endRow\":\"" + encodeRowForMetadata("bad") + "\"")), + new Value("5")); + assertViolation(mc, m, (short) 9); } @@ -404,20 +441,67 @@ private void testFileMetadataValidation(Text columnFamily, Value value) { .of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new Range()) .getMetadata().replace("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile", "/someFile")), value); - violations = mc.check(createEnv(), m); - assertNotNull(violations); - assertEquals(1, violations.size()); - assertEquals(Short.valueOf((short) 9), violations.get(0)); - assertNotNull(mc.getViolationDescription(violations.get(0))); + assertViolation(mc, m, (short) 9); - // Bad Json - only path so should fail parsing + // Bad Json - only path (old format) so should fail parsing m = new Mutation(new Text("0;foo")); m.put(columnFamily, new Text("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), value); - violations = mc.check(createEnv(), m); - assertNotNull(violations); - assertEquals(1, violations.size()); - assertEquals(Short.valueOf((short) 9), violations.get(0)); - assertNotNull(mc.getViolationDescription(violations.get(0))); + assertViolation(mc, m, (short) 9); + + // Bad Json - test path key replaced with empty string so validation should fail + // {"":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","startRow":"","endRow":""} + m = new Mutation(new Text("0;foo")); + m.put( + columnFamily, new Text(StoredTabletFile + .serialize("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile").replace("path", "")), + value); + assertViolation(mc, m, (short) 9); + + // Bad Json - test path value missing + // {"path":"","startRow":"","endRow":""} + m = new Mutation(new Text("0;foo")); + m.put(columnFamily, + new Text(StoredTabletFile + .of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new Range()) + .getMetadata().replaceFirst("\"path\":\".*\",\"startRow", "\"path\":\"\",\"startRow")), + value); + assertViolation(mc, m, (short) 9); + + // Bad Json - test startRow key replaced with empty string so validation should fail + // {"path":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","":"","endRow":""} + m = new Mutation(new Text("0;foo")); + m.put(columnFamily, new Text(StoredTabletFile + .serialize("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile").replace("startRow", "")), + value); + assertViolation(mc, m, (short) 9); + + // Bad Json - test startRow key is missing so validation should fail + // {"path":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","endRow":""} + m = new Mutation(new Text("0;foo")); + m.put(columnFamily, + new Text( + "{\"path\":\"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile\",\"endRow\":\"\"}"), + value); + assertViolation(mc, m, (short) 9); + + // Bad Json - test endRow key replaced with empty string so validation should fail + // {"path":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","":"","endRow":""} + m = new Mutation(new Text("0;foo")); + m.put( + columnFamily, new Text(StoredTabletFile + .serialize("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile").replace("endRow", "")), + value); + assertViolation(mc, m, (short) 9); + + // Bad Json - endRow will be replaced with encoded row without the exclusive byte 0x00 which is + // required for an endRow so this will fail validation + m = new Mutation(new Text("0;foo")); + m.put(columnFamily, new Text(StoredTabletFile + .of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new Range("a", "b")) + .getMetadata() + .replaceFirst("\"endRow\":\".*\"", "\"endRow\":\"" + encodeRowForMetadata("b") + "\"")), + value); + assertViolation(mc, m, (short) 9); // Missing tables directory in path m = new Mutation(new Text("0;foo")); @@ -427,12 +511,9 @@ private void testFileMetadataValidation(Text columnFamily, Value value) { .getMetadata().replace("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile", "hdfs://1.2.3.4/accumulo/2a/t-0003/someFile")), new DataFileValue(1, 1).encodeAsValue()); - violations = mc.check(createEnv(), m); - assertNotNull(violations); - assertEquals(1, violations.size()); - assertEquals(Short.valueOf((short) 9), violations.get(0)); + assertViolation(mc, m, (short) 9); - // Should pass validation + // Should pass validation (inf range) m = new Mutation(new Text("0;foo")); m.put(columnFamily, StoredTabletFile @@ -442,7 +523,35 @@ private void testFileMetadataValidation(Text columnFamily, Value value) { violations = mc.check(createEnv(), m); assertNull(violations); + // Should pass validation with range set + m = new Mutation(new Text("0;foo")); + m.put(columnFamily, StoredTabletFile + .of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new Range("a", "b")) + .getMetadataText(), new DataFileValue(1, 1).encodeAsValue()); + violations = mc.check(createEnv(), m); + assertNull(violations); + assertNotNull(mc.getViolationDescription((short) 9)); } + // Encode a row how it would appear in Json + private static String encodeRowForMetadata(String row) { + try { + Method method = StoredTabletFile.class.getDeclaredMethod("encodeRow", Key.class); + method.setAccessible(true); + return Base64.getUrlEncoder() + .encodeToString((byte[]) method.invoke(StoredTabletFile.class, new Key(row))); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private void assertViolation(MetadataConstraints mc, Mutation m, Short violation) { + List violations = mc.check(createEnv(), m); + assertNotNull(violations); + assertEquals(1, violations.size()); + assertEquals(violation, violations.get(0)); + assertNotNull(mc.getViolationDescription(violations.get(0))); + } + }