Merge pull request #5 from rishiraj/fix-multiple-download

Fix multiple download
rishiraj · Oct 18, 2024 · 7779432 · 7779432
2 parents d2b2ff6 + 03941b6
commit 7779432
Show file tree

Hide file tree

Showing 2 changed files with 74 additions and 36 deletions.
diff --git a/README.md b/README.md
@@ -51,6 +51,8 @@ fr.download(url)
 - `--filename` (optional): The name to save the downloaded file. Defaults to filename from URL.
 - `--max_files` (optional): The number of concurrent file chunks. Defaults to 10.
 - `--chunk_size` (optional): The size of each chunk in bytes. Defaults to `2 * 1024 * 1024` (2 MB).
+- `--headers` (optional): A dictionary of headers to include in the download request.
+- `--show_progress` (optional): Whether to show a progress bar. Defaults to True for single file downloads, and False for multiple files.
 
 ## Real-World Speed Test 🏎️
 
@@ -75,17 +77,19 @@ Downloading on 🔥: 100%|██████████| 3.42G/3.42G [02:38<00:
 ```python
 from firerequests import FireRequests
 
-url = "https://example.com/largefile.iso"
-filename = "largefile.iso"
+urls = ["https://example.com/file1.iso", "https://example.com/file2.iso"]
+filenames = ["file1.iso", "file2.iso"]
 
 fr = FireRequests()
-fr.download(url, filename, max_files=10, chunk_size=2 * 1024 * 1024)
+fr.download(urls, filenames, max_files=10, chunk_size=2 * 1024 * 1024, headers={"Authorization": "Bearer token"}, show_progress=True)
 ```
 
-- **`url`**: The URL of the file to download.
-- **`filename`**: The local filename to save the downloaded file.
-- **`max_files`**: The maximum number of concurrent chunk downloads.
-- **`chunk_size`**: The size of each chunk in bytes.
+- **`urls`**: The URL or list of URLs of the file(s) to download.
+- **`filenames`**: The filename(s) to save the downloaded file(s). If not provided, filenames are extracted from the URLs.
+- **`max_files`**: The maximum number of concurrent chunk downloads. Defaults to 10.
+- **`chunk_size`**: The size of each chunk in bytes. Defaults to `2 * 1024 * 1024` (2 MB).
+- **`headers`**: A dictionary of headers to include in the download request (optional).
+- **`show_progress`**: Whether to show a progress bar during download. Defaults to `True` for a single file, and `False` for multiple files (optional).
 
 ### Uploading Files
 
@@ -96,9 +100,15 @@ file_path = "largefile.iso"
 parts_urls = ["https://example.com/upload_part1", "https://example.com/upload_part2", ...]
 
 fr = FireRequests()
-fr.upload(file_path, parts_urls, chunk_size=2 * 1024 * 1024, max_files=10)
+fr.upload(file_path, parts_urls, chunk_size=2 * 1024 * 1024, max_files=10, show_progress=True)
 ```
 
+- **`file_path`**: The local path to the file to upload.
+- **`parts_urls`**: A list of URLs where each part of the file will be uploaded.
+- **`chunk_size`**: The size of each chunk in bytes. Defaults to `2 * 1024 * 1024` (2 MB).
+- **`max_files`**: The maximum number of concurrent chunk uploads. Defaults to 10.
+- **`show_progress`**: Whether to show a progress bar during upload. Defaults to `True`.
+
 ### Comparing Download Speed
 
 ```python

diff --git a/firerequests/main.py b/firerequests/main.py
@@ -13,7 +13,7 @@
 from tqdm.asyncio import tqdm
 from functools import partial
 from concurrent.futures import ThreadPoolExecutor
-from typing import Dict, Any, List, Optional
+from typing import Union, Dict, Any, List, Optional
 
 # Enable nested event loops for environments like Jupyter
 nest_asyncio.apply()
@@ -49,7 +49,7 @@ async def download_chunk(
 
     async def download_file(
         self, url: str, filename: str, max_files: int, chunk_size: int, headers: Optional[Dict[str, str]] = None, 
-        parallel_failures: int = 3, max_retries: int = 5, callback: Optional[Any] = None
+        parallel_failures: int = 3, max_retries: int = 5, callback: Optional[Any] = None, show_progress: bool = True
     ):
         headers = headers or {"User-Agent": "Wget/1.21.2", "Accept": "*/*", "Accept-Encoding": "identity", "Connection": "Keep-Alive"}
         try:
@@ -82,14 +82,20 @@ async def download_file(
                     tasks.append(self.download_chunk_with_retries(
                         session, url, filename, start, stop, headers, semaphore, parallel_failures, max_retries
                     ))
-
-                progress_bar = tqdm(total=file_size, unit="B", unit_scale=True, desc="Downloading on 🔥")
+
+                if show_progress:
+                    progress_bar = tqdm(total=file_size, unit="B", unit_scale=True, desc="Downloading on 🔥")
+
                 for chunk_result in asyncio.as_completed(tasks):
                     downloaded = await chunk_result
-                    progress_bar.update(downloaded)
+                    if show_progress:
+                        progress_bar.update(downloaded)
                     if callback:
                         await callback(downloaded)
-                progress_bar.close()
+
+                if show_progress:
+                    progress_bar.close()
+
         except Exception as e:
             print(f"Error in download_file: {e}")
 
@@ -111,38 +117,41 @@ async def download_chunk_with_retries(
 
     async def upload_file(
         self, file_path: str, parts_urls: List[str], chunk_size: int, max_files: int, 
-        parallel_failures: int = 3, max_retries: int = 5, callback: Optional[Any] = None
+        parallel_failures: int = 3, max_retries: int = 5, callback: Optional[Any] = None, show_progress: bool = True
     ):
         file_size = os.path.getsize(file_path)
         part_size = file_size // len(parts_urls)
         last_part_size = file_size - part_size * (len(parts_urls) - 1)  # To handle any remaining bytes
-
+    
         semaphore = asyncio.Semaphore(max_files)
         tasks = []
         try:
             async with aiohttp.ClientSession() as session:
                 for part_number, part_url in enumerate(parts_urls):
-                    # Calculate start and stop positions for each part
-                    if part_number == len(parts_urls) - 1:  # For the last part, ensure we include the remaining bytes
+                    if part_number == len(parts_urls) - 1:
                         start = part_number * part_size
                         size = last_part_size
                     else:
                         start = part_number * part_size
                         size = part_size
-
-                    # Start uploading the chunks for the given part
+
                     tasks.append(self.upload_chunk_with_retries(
                         session, part_url, file_path, start, size, chunk_size, semaphore, parallel_failures, max_retries
                     ))
-
-                # Track progress using a progress bar
-                progress_bar = tqdm(total=file_size, unit="B", unit_scale=True, desc="Uploading on 🔥")
+
+                if show_progress:
+                    progress_bar = tqdm(total=file_size, unit="B", unit_scale=True, desc="Uploading on 🔥")
+
                 for chunk_result in asyncio.as_completed(tasks):
                     uploaded = await chunk_result
-                    progress_bar.update(uploaded)
+                    if show_progress:
+                        progress_bar.update(uploaded)
                     if callback:
                         await callback(uploaded)
-                progress_bar.close()
+
+                if show_progress:
+                    progress_bar.close()
+
         except Exception as e:
             print(f"Error in upload_file: {e}")
 
@@ -185,25 +194,43 @@ async def upload_chunks(
             print(f"Error in upload_chunks: {e}")
             return 0
 
-    def download(self, url: str, filename: Optional[str] = None, max_files: int = 10, chunk_size: int = 2 * 1024 * 1024):
+    def download(self, urls: Union[str, List[str]], filenames: Optional[Union[str, List[str]]] = None, headers: Optional[Dict[str, str]] = None, max_files: int = 10, chunk_size: int = 2 * 1024 * 1024, show_progress: Optional[bool] = None):
         """
-        Downloads a file from a given URL asynchronously in chunks, with support for parallel downloads.
+        Downloads files from a given URL or a list of URLs asynchronously in chunks, with support for parallel downloads.
     
         Args:
-            url (str): The URL of the file to download.
-            filename (Optional[str]): The name of the file to save locally. If not provided, it will be extracted from the URL.
+            urls (Union[str, List[str]]): The URL or list of URLs of the files to download.
+            filenames (Optional[Union[str, List[str]]]): The filename or list of filenames to save locally. 
+                If not provided, filenames will be extracted from the URLs.
+            headers (Optional[Dict[str, str]]): Headers to include in the download requests.
             max_files (int): The maximum number of concurrent file download chunks. Defaults to 10.
             chunk_size (int): The size of each chunk to download, in bytes. Defaults to 2MB.
+            show_progress (Optional[bool]): Whether to show a progress bar. Defaults to True for single file, False for multiple files.
     
         Usage:
-            - This function downloads the file in parallel chunks, speeding up the process.
+            - This function downloads the files in parallel chunks, speeding up the process.
         """
-        # Extract filename from URL if not provided
-        if filename is None:
-            filename = os.path.basename(urlparse(url).path)
-        asyncio.run(self.download_file(url, filename, max_files, chunk_size))
+        if isinstance(urls, str):
+            urls = [urls]
+        if isinstance(filenames, str):
+            filenames = [filenames]
+
+        if filenames is None:
+            filenames = [os.path.basename(urlparse(url).path) for url in urls]
+        elif len(filenames) != len(urls):
+            raise ValueError("The number of filenames must match the number of URLs")
+
+        # Set default for show_progress based on whether it's a single file or list
+        if show_progress is None:
+            show_progress = len(urls) == 1
+
+        async def download_all():
+            tasks = [self.download_file(url, filename, max_files, chunk_size, headers, show_progress=show_progress) for url, filename in zip(urls, filenames)]
+            await asyncio.gather(*tasks)
+
+        asyncio.run(download_all())
 
-    def upload(self, file_path: str, parts_urls: List[str], chunk_size: int = 2 * 1024 * 1024, max_files: int = 10):
+    def upload(self, file_path: str, parts_urls: List[str], chunk_size: int = 2 * 1024 * 1024, max_files: int = 10, show_progress: Optional[bool] = True):
         """
         Uploads a file to multiple URLs in chunks asynchronously, with support for parallel uploads.
     
@@ -212,11 +239,12 @@ def upload(self, file_path: str, parts_urls: List[str], chunk_size: int = 2 * 10
             parts_urls (List[str]): A list of URLs where each part of the file will be uploaded.
             chunk_size (int): The size of each chunk to upload, in bytes. Defaults to 2MB.
             max_files (int): The maximum number of concurrent file upload chunks. Defaults to 10.
+            show_progress (bool): Whether to show a progress bar during upload. Defaults to True.
     
         Usage:
             - The function divides the file into smaller chunks and uploads them in parallel to different URLs.
         """
-        asyncio.run(self.upload_file(file_path, parts_urls, chunk_size, max_files))
+        asyncio.run(self.upload_file(file_path, parts_urls, chunk_size, max_files, show_progress=show_progress))
 
     def normal_download(self, url: str, filename: str):
         response = requests.get(url, stream=True)