Skip to content

Commit

Permalink
Handle incorrect/missing length of stream-objects. Fixes empira#73
Browse files Browse the repository at this point in the history
  • Loading branch information
packdat committed Jan 15, 2024
1 parent b28bc32 commit ad5b7df
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 8 deletions.
101 changes: 100 additions & 1 deletion src/foundation/src/PDFsharp/src/PdfSharp/Pdf.IO/Lexer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ public Symbol ScanNextToken()
/// </summary>
public byte[] ReadStream(int length)
{
// TODO: use "MoveToStartOfStream()"
int pos;

// Skip illegal blanks behind «stream».
Expand All @@ -166,8 +167,10 @@ public byte[] ReadStream(int length)
else
pos = _idxChar + 1;
}
else
else if (_currChar == Chars.LF)
pos = _idxChar + 1;
else
pos = _idxChar;

_pdfStream.Position = pos;
byte[] bytes = new byte[length];
Expand All @@ -184,6 +187,102 @@ public byte[] ReadStream(int length)
return bytes;
}

/// <summary>
/// Moves the current position to the first byte of a stream-object.<br></br>
/// The current position is expected be be located right after the "stream" keyword.
/// </summary>
/// <returns>The position of the first byte of a stream</returns>
private int MoveToStartOfStream()
{
int pos;

// Skip illegal blanks behind «stream».
while (_currChar == Chars.SP)
ScanNextChar(true);

// Skip new line behind «stream».
if (_currChar == Chars.CR)
{
if (_nextChar == Chars.LF)
pos = _idxChar + 2;
else
pos = _idxChar + 1;
}
else if (_currChar == Chars.LF)
pos = _idxChar + 1;
else
pos = _idxChar;
return pos;
}

/// <summary>
/// Searches for the end of a stream, starting at the current position.<br></br>
/// The current position is expected be be located right after the "stream" keyword.<br></br>
/// After the search, the position in the input-stream is the first byte of the stream (if found) or the end of the input-stream.
/// </summary>
/// <param name="streamLength">Receives the length of the stream, if the end of the stream was found</param>
/// <returns><b>true</b> if the end of a stream was found, otherwise <b>false</b></returns>
internal bool SearchEndOfStream(out int streamLength)
{
// locate first byte of the stream
Position = MoveToStartOfStream();

streamLength = 0;
var streamStart = Position;
// "endstream" shall be preceded by an end-of-line marker according to the spec
var marker = PdfEncoders.RawEncoding.GetBytes("\nendstream");
if (!SearchForMarker(marker, out var markerPosition))
return false;

// the "endstream" keyword may be preceded by either LF or CR+LF
// check the byte right before the marker for CR (LF is already included in the marker)
var length = markerPosition - streamStart;
_pdfStream.Position = markerPosition - 1;
var prefix = _pdfStream.ReadByte();
if (prefix == 13)
length--; // exclude CR from stream-data
streamLength = length;
// reset position to start of stream
Position = streamStart;
return true;
}

/// <summary>
/// Scans the input stream for the specified marker, starting from the current position.<br></br>
/// After the search, the position in the input-stream is the byte right after the marker (if found) or the end of the input-stream.
/// </summary>
/// <param name="marker">The marker to scan for</param>
/// <param name="markerPosition">Receives the position of the marker in the input stream (if found)</param>
/// <returns><b>true</b> if the marker was found, otherwise <b>false</b></returns>
internal bool SearchForMarker(byte[] marker, out int markerPosition)
{
while (true)
{
var markerIndex = 0;
while (_currChar != Chars.EOF && _currChar != marker[markerIndex])
{
ScanNextChar(false);
}
while (_currChar != Chars.EOF && markerIndex < marker.Length && _currChar == marker[markerIndex])
{
markerIndex++;
ScanNextChar(false);
}
if (_currChar == Chars.EOF || markerIndex == marker.Length)
{
if (markerIndex == marker.Length)
{
markerPosition = Position - marker.Length;
return true;
}
break;
}
// only part of the marker was found, continue
}
markerPosition = 0;
return false;
}

/// <summary>
/// Reads a string in raw encoding.
/// </summary>
Expand Down
35 changes: 28 additions & 7 deletions src/foundation/src/PDFsharp/src/PdfSharp/Pdf.IO/Parser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,11 @@ public Parser(PdfDocument document)
/// </summary>
public int MoveToObject(PdfObjectID objectID)
{
int position = _document.IrefTable[objectID]?.Position ?? throw new AggregateException("Invalid object ID.");
int position = _document.IrefTable[objectID]?.Position
?? throw new PdfReaderException($"Invalid object ID. The object ({objectID}) could not be located in the reference-table.");
if (position < 0)
{
throw new AggregateException($"Invalid position {position} for object ID {objectID}.");
throw new PdfReaderException($"Invalid position {position} for object ID {objectID}.");
}
return _lexer.Position = position;
}
Expand Down Expand Up @@ -274,8 +275,11 @@ public PdfObject ReadObject(PdfObject? pdfObject, PdfObjectID objectID, bool inc
#if true_
ReadStream(dict);
#else
int length = GetStreamLength(dict!); // NRT HACK
byte[] bytes = _lexer.ReadStream(length);
var startOfStream = _lexer.Position;
try
{
int length = GetStreamLength(dict!); // NRT HACK
byte[] bytes = _lexer.ReadStream(length);
#if true_
if (dict.Elements.GetString("/Filter") == "/FlateDecode")
{
Expand All @@ -301,9 +305,26 @@ public PdfObject ReadObject(PdfObject? pdfObject, PdfObjectID objectID, bool inc
End: ;
}
#endif
var stream = new PdfDictionary.PdfStream(bytes, dict ?? NRT.ThrowOnNull<PdfDictionary>());
dict.Stream = stream;
ReadSymbol(Symbol.EndStream);
var stream = new PdfStream(bytes, dict ?? NRT.ThrowOnNull<PdfDictionary>());
dict.Stream = stream;
ReadSymbol(Symbol.EndStream);
}
catch (NotImplementedException)
{
throw; // File-streams
}
catch
{
// most likely there is a problem with the stream-length.
// it may be incorrect or completely missing.
// scan manually for the end of the stream
_lexer.Position = startOfStream;
if (!_lexer.SearchEndOfStream(out var streamLength))
throw;
var bytes = _lexer.ReadStream(streamLength);
dict!.Stream = new PdfStream(bytes, dict ?? NRT.ThrowOnNull<PdfDictionary>());
ReadSymbol(Symbol.EndStream);
}
symbol = ScanNextToken();
#endif
}
Expand Down

0 comments on commit ad5b7df

Please sign in to comment.