Skip to content

Commit

Permalink
Read index responses with 'iter_content'
Browse files Browse the repository at this point in the history
To use automatic decompression in 'requests'. Fixes #63
  • Loading branch information
EpicWink committed Dec 9, 2024
1 parent 3094b60 commit 7e476f2
Showing 1 changed file with 49 additions and 2 deletions.
51 changes: 49 additions & 2 deletions src/proxpi/_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,49 @@ def add_miss(self, key: str) -> None:
stat.misses += 1


class _ResponseReader:
"""File-like interface for decoded response body."""

def __init__(
self,
make_iter: t.Callable[[t.Union[int, None]], t.Iterator[bytes]],
close: t.Callable[[], None],
) -> None:
"""Initialise reader.
Args:
make_iter: constructs iterator of response body chunks,
accepting chunk size
close: closes response connection
"""

self.make_iter = make_iter
self.close = close
self._iter: t.Union[t.Iterator[bytes], None] = None

@classmethod
def from_response(cls, response: requests.Response) -> "_ResponseReader":
"""Construct from response."""
return cls(response.iter_content, response.close)

def read(self, n: t.Union[int, None] = None) -> bytes:
"""Read response body chunk.
Args:
n: chunk size
Returns:
response body chunk
"""

if self._iter is None:
self._iter = self.make_iter(n)
try:
return next(self._iter)
except StopIteration:
return b""


class _IndexCache:
"""Cache for an index.
Expand Down Expand Up @@ -474,7 +517,9 @@ def _list_packages(self):
)
return

for _, child in lxml.etree.iterparse(response.raw, tag="a", html=True):
stream = _ResponseReader.from_response(response)

for _, child in lxml.etree.iterparse(stream, tag="a", html=True):
if True: # minimise Git diff
name = _name_normalise_re.sub("-", child.text).lower()
self._index[name] = child.attrib["href"]
Expand Down Expand Up @@ -542,7 +587,9 @@ def _list_files(self, package_name: str):
logger.debug(f"Finished listing files in package '{package_name}'")
return

for _, child in lxml.etree.iterparse(response.raw, tag="a", html=True):
stream = _ResponseReader.from_response(response)

for _, child in lxml.etree.iterparse(stream, tag="a", html=True):
if True: # minimise Git diff
file = FileFromHTML.from_html_element(child, response.request.url)
package.files[file.name] = file
Expand Down

0 comments on commit 7e476f2

Please sign in to comment.