Skip to content

Commit

Permalink
Buffering the input stream for metadata read
Browse files Browse the repository at this point in the history
  • Loading branch information
Yaliang committed Jul 5, 2018
1 parent c2428ab commit 01812d5
Showing 1 changed file with 5 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import parquet.schema.Type.Repetition;
import parquet.schema.Types;

import java.io.BufferedInputStream;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
Expand Down Expand Up @@ -85,10 +86,11 @@ public static ParquetMetadata readFooter(FSDataInputStream inputStream, Path fil
long metadataLengthIndex = fileSize - PARQUET_METADATA_LENGTH - MAGIC.length;

inputStream.seek(metadataLengthIndex);
int metadataLength = readIntLittleEndian(inputStream);
InputStream in = new BufferedInputStream(inputStream, PARQUET_METADATA_LENGTH + MAGIC.length);
int metadataLength = readIntLittleEndian(in);

byte[] magic = new byte[MAGIC.length];
inputStream.readFully(magic);
validateParquet(in.read(magic) == magic.length, "No enough data for MAGIC");
validateParquet(Arrays.equals(MAGIC, magic), "Not valid Parquet file: %s expected magic number: %s got: %s", file, Arrays.toString(MAGIC), Arrays.toString(magic));

long metadataIndex = metadataLengthIndex - metadataLength;
Expand All @@ -98,7 +100,7 @@ public static ParquetMetadata readFooter(FSDataInputStream inputStream, Path fil
file,
metadataIndex);
inputStream.seek(metadataIndex);
FileMetaData fileMetaData = readFileMetaData(inputStream);
FileMetaData fileMetaData = readFileMetaData(new BufferedInputStream(inputStream, metadataLength));
List<SchemaElement> schema = fileMetaData.getSchema();
validateParquet(!schema.isEmpty(), "Empty Parquet schema in file: %s", file);

Expand Down

0 comments on commit 01812d5

Please sign in to comment.