Source code for rllm.utils.download

import os
import os.path as osp
import ssl
from tqdm import tqdm
import urllib
from typing import Optional


[docs] def download_url( url: str, folder: str, filename: Optional[str] = None, ): r"""Download the content of a URL to a specific folder. Args: url (str): The URL to download from. folder (str): The destination folder. filename (str, optional): The filename of the downloaded file. If set to :obj:`None`, the filename is inferred from the URL. (default: :obj:`None`) Returns: str: The local path to the downloaded file. """ if filename is None: filename = url.rpartition('/')[2] filename = filename if filename[0] == '?' else filename.split('?')[0] path = osp.join(folder, filename) os.makedirs(folder, exist_ok=True) # context = ssl._create_unverified_context() context = ssl.create_default_context() # safe check: assert url[:4].lower() == "http", 'Only HTTP or HTTPS is supported.' data = urllib.request.urlopen(url, context=context) with open(path, 'wb') as f, tqdm( desc=f'Downloading {url}', total=int(data.info().get('Content-Length', -1)), unit='B', unit_scale=True, unit_divisor=1024, ) as bar: while True: chunk = data.read(10 * 1024 * 1024) if not chunk: break size = f.write(chunk) bar.update(size) return path
[docs] def download_google_url( id: str, folder: str, filename: str, ): r"""Download the content of a Google Drive file to a specific folder. Args: id (str): The Google Drive file ID. folder (str): The destination folder. filename (str): The filename of the downloaded file. Returns: str: The local path to the downloaded file. """ url = f'https://drive.usercontent.google.com/download?id={id}&confirm=t' return download_url(url, folder, filename)