Skip to content

Commit

Permalink
Merge pull request #5 from rishiraj/fix-multiple-download
Browse files Browse the repository at this point in the history
Fix multiple download
  • Loading branch information
rishiraj authored Oct 18, 2024
2 parents d2b2ff6 + 03941b6 commit 7779432
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 36 deletions.
26 changes: 18 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ fr.download(url)
- `--filename` (optional): The name to save the downloaded file. Defaults to filename from URL.
- `--max_files` (optional): The number of concurrent file chunks. Defaults to 10.
- `--chunk_size` (optional): The size of each chunk in bytes. Defaults to `2 * 1024 * 1024` (2 MB).
- `--headers` (optional): A dictionary of headers to include in the download request.
- `--show_progress` (optional): Whether to show a progress bar. Defaults to True for single file downloads, and False for multiple files.

## Real-World Speed Test 🏎️

Expand All @@ -75,17 +77,19 @@ Downloading on 🔥: 100%|██████████| 3.42G/3.42G [02:38<00:
```python
from firerequests import FireRequests

url = "https://example.com/largefile.iso"
filename = "largefile.iso"
urls = ["https://example.com/file1.iso", "https://example.com/file2.iso"]
filenames = ["file1.iso", "file2.iso"]

fr = FireRequests()
fr.download(url, filename, max_files=10, chunk_size=2 * 1024 * 1024)
fr.download(urls, filenames, max_files=10, chunk_size=2 * 1024 * 1024, headers={"Authorization": "Bearer token"}, show_progress=True)
```

- **`url`**: The URL of the file to download.
- **`filename`**: The local filename to save the downloaded file.
- **`max_files`**: The maximum number of concurrent chunk downloads.
- **`chunk_size`**: The size of each chunk in bytes.
- **`urls`**: The URL or list of URLs of the file(s) to download.
- **`filenames`**: The filename(s) to save the downloaded file(s). If not provided, filenames are extracted from the URLs.
- **`max_files`**: The maximum number of concurrent chunk downloads. Defaults to 10.
- **`chunk_size`**: The size of each chunk in bytes. Defaults to `2 * 1024 * 1024` (2 MB).
- **`headers`**: A dictionary of headers to include in the download request (optional).
- **`show_progress`**: Whether to show a progress bar during download. Defaults to `True` for a single file, and `False` for multiple files (optional).

### Uploading Files

Expand All @@ -96,9 +100,15 @@ file_path = "largefile.iso"
parts_urls = ["https://example.com/upload_part1", "https://example.com/upload_part2", ...]

fr = FireRequests()
fr.upload(file_path, parts_urls, chunk_size=2 * 1024 * 1024, max_files=10)
fr.upload(file_path, parts_urls, chunk_size=2 * 1024 * 1024, max_files=10, show_progress=True)
```

- **`file_path`**: The local path to the file to upload.
- **`parts_urls`**: A list of URLs where each part of the file will be uploaded.
- **`chunk_size`**: The size of each chunk in bytes. Defaults to `2 * 1024 * 1024` (2 MB).
- **`max_files`**: The maximum number of concurrent chunk uploads. Defaults to 10.
- **`show_progress`**: Whether to show a progress bar during upload. Defaults to `True`.

### Comparing Download Speed

```python
Expand Down
84 changes: 56 additions & 28 deletions firerequests/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from tqdm.asyncio import tqdm
from functools import partial
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, Any, List, Optional
from typing import Union, Dict, Any, List, Optional

# Enable nested event loops for environments like Jupyter
nest_asyncio.apply()
Expand Down Expand Up @@ -49,7 +49,7 @@ async def download_chunk(

async def download_file(
self, url: str, filename: str, max_files: int, chunk_size: int, headers: Optional[Dict[str, str]] = None,
parallel_failures: int = 3, max_retries: int = 5, callback: Optional[Any] = None
parallel_failures: int = 3, max_retries: int = 5, callback: Optional[Any] = None, show_progress: bool = True
):
headers = headers or {"User-Agent": "Wget/1.21.2", "Accept": "*/*", "Accept-Encoding": "identity", "Connection": "Keep-Alive"}
try:
Expand Down Expand Up @@ -82,14 +82,20 @@ async def download_file(
tasks.append(self.download_chunk_with_retries(
session, url, filename, start, stop, headers, semaphore, parallel_failures, max_retries
))

progress_bar = tqdm(total=file_size, unit="B", unit_scale=True, desc="Downloading on 🔥")

if show_progress:
progress_bar = tqdm(total=file_size, unit="B", unit_scale=True, desc="Downloading on 🔥")

for chunk_result in asyncio.as_completed(tasks):
downloaded = await chunk_result
progress_bar.update(downloaded)
if show_progress:
progress_bar.update(downloaded)
if callback:
await callback(downloaded)
progress_bar.close()

if show_progress:
progress_bar.close()

except Exception as e:
print(f"Error in download_file: {e}")

Expand All @@ -111,38 +117,41 @@ async def download_chunk_with_retries(

async def upload_file(
self, file_path: str, parts_urls: List[str], chunk_size: int, max_files: int,
parallel_failures: int = 3, max_retries: int = 5, callback: Optional[Any] = None
parallel_failures: int = 3, max_retries: int = 5, callback: Optional[Any] = None, show_progress: bool = True
):
file_size = os.path.getsize(file_path)
part_size = file_size // len(parts_urls)
last_part_size = file_size - part_size * (len(parts_urls) - 1) # To handle any remaining bytes

semaphore = asyncio.Semaphore(max_files)
tasks = []
try:
async with aiohttp.ClientSession() as session:
for part_number, part_url in enumerate(parts_urls):
# Calculate start and stop positions for each part
if part_number == len(parts_urls) - 1: # For the last part, ensure we include the remaining bytes
if part_number == len(parts_urls) - 1:
start = part_number * part_size
size = last_part_size
else:
start = part_number * part_size
size = part_size

# Start uploading the chunks for the given part

tasks.append(self.upload_chunk_with_retries(
session, part_url, file_path, start, size, chunk_size, semaphore, parallel_failures, max_retries
))

# Track progress using a progress bar
progress_bar = tqdm(total=file_size, unit="B", unit_scale=True, desc="Uploading on 🔥")

if show_progress:
progress_bar = tqdm(total=file_size, unit="B", unit_scale=True, desc="Uploading on 🔥")

for chunk_result in asyncio.as_completed(tasks):
uploaded = await chunk_result
progress_bar.update(uploaded)
if show_progress:
progress_bar.update(uploaded)
if callback:
await callback(uploaded)
progress_bar.close()

if show_progress:
progress_bar.close()

except Exception as e:
print(f"Error in upload_file: {e}")

Expand Down Expand Up @@ -185,25 +194,43 @@ async def upload_chunks(
print(f"Error in upload_chunks: {e}")
return 0

def download(self, url: str, filename: Optional[str] = None, max_files: int = 10, chunk_size: int = 2 * 1024 * 1024):
def download(self, urls: Union[str, List[str]], filenames: Optional[Union[str, List[str]]] = None, headers: Optional[Dict[str, str]] = None, max_files: int = 10, chunk_size: int = 2 * 1024 * 1024, show_progress: Optional[bool] = None):
"""
Downloads a file from a given URL asynchronously in chunks, with support for parallel downloads.
Downloads files from a given URL or a list of URLs asynchronously in chunks, with support for parallel downloads.
Args:
url (str): The URL of the file to download.
filename (Optional[str]): The name of the file to save locally. If not provided, it will be extracted from the URL.
urls (Union[str, List[str]]): The URL or list of URLs of the files to download.
filenames (Optional[Union[str, List[str]]]): The filename or list of filenames to save locally.
If not provided, filenames will be extracted from the URLs.
headers (Optional[Dict[str, str]]): Headers to include in the download requests.
max_files (int): The maximum number of concurrent file download chunks. Defaults to 10.
chunk_size (int): The size of each chunk to download, in bytes. Defaults to 2MB.
show_progress (Optional[bool]): Whether to show a progress bar. Defaults to True for single file, False for multiple files.
Usage:
- This function downloads the file in parallel chunks, speeding up the process.
- This function downloads the files in parallel chunks, speeding up the process.
"""
# Extract filename from URL if not provided
if filename is None:
filename = os.path.basename(urlparse(url).path)
asyncio.run(self.download_file(url, filename, max_files, chunk_size))
if isinstance(urls, str):
urls = [urls]
if isinstance(filenames, str):
filenames = [filenames]

if filenames is None:
filenames = [os.path.basename(urlparse(url).path) for url in urls]
elif len(filenames) != len(urls):
raise ValueError("The number of filenames must match the number of URLs")

# Set default for show_progress based on whether it's a single file or list
if show_progress is None:
show_progress = len(urls) == 1

async def download_all():
tasks = [self.download_file(url, filename, max_files, chunk_size, headers, show_progress=show_progress) for url, filename in zip(urls, filenames)]
await asyncio.gather(*tasks)

asyncio.run(download_all())

def upload(self, file_path: str, parts_urls: List[str], chunk_size: int = 2 * 1024 * 1024, max_files: int = 10):
def upload(self, file_path: str, parts_urls: List[str], chunk_size: int = 2 * 1024 * 1024, max_files: int = 10, show_progress: Optional[bool] = True):
"""
Uploads a file to multiple URLs in chunks asynchronously, with support for parallel uploads.
Expand All @@ -212,11 +239,12 @@ def upload(self, file_path: str, parts_urls: List[str], chunk_size: int = 2 * 10
parts_urls (List[str]): A list of URLs where each part of the file will be uploaded.
chunk_size (int): The size of each chunk to upload, in bytes. Defaults to 2MB.
max_files (int): The maximum number of concurrent file upload chunks. Defaults to 10.
show_progress (bool): Whether to show a progress bar during upload. Defaults to True.
Usage:
- The function divides the file into smaller chunks and uploads them in parallel to different URLs.
"""
asyncio.run(self.upload_file(file_path, parts_urls, chunk_size, max_files))
asyncio.run(self.upload_file(file_path, parts_urls, chunk_size, max_files, show_progress=show_progress))

def normal_download(self, url: str, filename: str):
response = requests.get(url, stream=True)
Expand Down

0 comments on commit 7779432

Please sign in to comment.