From 7e476f21d0ea0a861b2cecb6f178dabe4637a31d Mon Sep 17 00:00:00 2001 From: Laurie O Date: Mon, 9 Dec 2024 11:58:07 +1000 Subject: [PATCH] Read index responses with 'iter_content' To use automatic decompression in 'requests'. Fixes #63 --- src/proxpi/_cache.py | 51 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/src/proxpi/_cache.py b/src/proxpi/_cache.py index 0958996..f4677cb 100644 --- a/src/proxpi/_cache.py +++ b/src/proxpi/_cache.py @@ -414,6 +414,49 @@ def add_miss(self, key: str) -> None: stat.misses += 1 +class _ResponseReader: + """File-like interface for decoded response body.""" + + def __init__( + self, + make_iter: t.Callable[[t.Union[int, None]], t.Iterator[bytes]], + close: t.Callable[[], None], + ) -> None: + """Initialise reader. + + Args: + make_iter: constructs iterator of response body chunks, + accepting chunk size + close: closes response connection + """ + + self.make_iter = make_iter + self.close = close + self._iter: t.Union[t.Iterator[bytes], None] = None + + @classmethod + def from_response(cls, response: requests.Response) -> "_ResponseReader": + """Construct from response.""" + return cls(response.iter_content, response.close) + + def read(self, n: t.Union[int, None] = None) -> bytes: + """Read response body chunk. + + Args: + n: chunk size + + Returns: + response body chunk + """ + + if self._iter is None: + self._iter = self.make_iter(n) + try: + return next(self._iter) + except StopIteration: + return b"" + + class _IndexCache: """Cache for an index. @@ -474,7 +517,9 @@ def _list_packages(self): ) return - for _, child in lxml.etree.iterparse(response.raw, tag="a", html=True): + stream = _ResponseReader.from_response(response) + + for _, child in lxml.etree.iterparse(stream, tag="a", html=True): if True: # minimise Git diff name = _name_normalise_re.sub("-", child.text).lower() self._index[name] = child.attrib["href"] @@ -542,7 +587,9 @@ def _list_files(self, package_name: str): logger.debug(f"Finished listing files in package '{package_name}'") return - for _, child in lxml.etree.iterparse(response.raw, tag="a", html=True): + stream = _ResponseReader.from_response(response) + + for _, child in lxml.etree.iterparse(stream, tag="a", html=True): if True: # minimise Git diff file = FileFromHTML.from_html_element(child, response.request.url) package.files[file.name] = file