Source code for rllm.utils.download
import os
import os.path as osp
import ssl
from tqdm import tqdm
import urllib
from typing import Optional
[docs]
def download_url(
url: str,
folder: str,
filename: Optional[str] = None,
):
r"""Download the content of a URL to a specific folder.
Args:
url (str): The URL to download from.
folder (str): The destination folder.
filename (str, optional): The filename of the downloaded file. If set
to :obj:`None`, the filename is inferred from the URL.
(default: :obj:`None`)
Returns:
str: The local path to the downloaded file.
"""
if filename is None:
filename = url.rpartition('/')[2]
filename = filename if filename[0] == '?' else filename.split('?')[0]
path = osp.join(folder, filename)
os.makedirs(folder, exist_ok=True)
# context = ssl._create_unverified_context()
context = ssl.create_default_context()
# safe check:
assert url[:4].lower() == "http", 'Only HTTP or HTTPS is supported.'
data = urllib.request.urlopen(url, context=context)
with open(path, 'wb') as f, tqdm(
desc=f'Downloading {url}',
total=int(data.info().get('Content-Length', -1)),
unit='B',
unit_scale=True,
unit_divisor=1024,
) as bar:
while True:
chunk = data.read(10 * 1024 * 1024)
if not chunk:
break
size = f.write(chunk)
bar.update(size)
return path
[docs]
def download_google_url(
id: str,
folder: str,
filename: str,
):
r"""Download the content of a Google Drive file to a specific folder.
Args:
id (str): The Google Drive file ID.
folder (str): The destination folder.
filename (str): The filename of the downloaded file.
Returns:
str: The local path to the downloaded file.
"""
url = f'https://drive.usercontent.google.com/download?id={id}&confirm=t'
return download_url(url, folder, filename)