diff --git a/client/setup.py b/client/setup.py index 0a248fec..6c2c8a8d 100644 --- a/client/setup.py +++ b/client/setup.py @@ -43,7 +43,7 @@ setup_requires=['setuptools>=18.0', 'wheel'], install_requires=[ 'jina>=3.12.0', - 'docarray[common]>=0.19.0,<0.30.0', + 'docarray[common]==0.21.0', 'packaging', ], extras_require={ diff --git a/server/clip_server/model/clip_onnx.py b/server/clip_server/model/clip_onnx.py index 90a9c0f0..2a27c5aa 100644 --- a/server/clip_server/model/clip_onnx.py +++ b/server/clip_server/model/clip_onnx.py @@ -1,5 +1,6 @@ import os from typing import Dict, Optional +import requests from clip_server.model.pretrained_models import ( download_model, @@ -8,10 +9,7 @@ ) from clip_server.model.clip_model import BaseCLIPModel -_S3_BUCKET = ( - 'https://clip-as-service.s3.us-east-2.amazonaws.com/models/onnx/' # Deprecated -) -_S3_BUCKET_V2 = 'https://clip-as-service.s3.us-east-2.amazonaws.com/models-436c69702d61732d53657276696365/onnx/' +_HUGGINGFACE_ONNX_BUCKET = 'https://huggingface.co/jinaai/clip-models/' _MODELS = { 'RN50::openai': ( ('RN50/textual.onnx', '722418bfe47a1f5c79d1f44884bb3103'), @@ -213,14 +211,14 @@ def __init__( ) textual_model_name, textual_model_md5 = _MODELS[name][0] self._textual_path = download_model( - url=_S3_BUCKET_V2 + textual_model_name, + url=self.get_onnx_model_url(name=textual_model_name), target_folder=cache_dir, md5sum=textual_model_md5, with_resume=True, ) visual_model_name, visual_model_md5 = _MODELS[name][1] self._visual_path = download_model( - url=_S3_BUCKET_V2 + visual_model_name, + url=self.get_onnx_model_url(name=visual_model_name), target_folder=cache_dir, md5sum=visual_model_md5, with_resume=True, @@ -261,6 +259,23 @@ def get_model_name(name: str): return name + @staticmethod + def get_onnx_model_url(name: str): + hf_download_url = ( + _HUGGINGFACE_ONNX_BUCKET + + 'resolve/main/' + + name.split('/')[0] + + '-' + + name.split('/')[1] + + '?download=true' + ) + try: + response = requests.head(hf_download_url, timeout=10) + if response.status_code in [200, 302]: + return hf_download_url + except Exception: + raise ValueError('Invalid model url.') + def start_sessions( self, dtype, diff --git a/server/clip_server/model/pretrained_models.py b/server/clip_server/model/pretrained_models.py index 3494bee4..da33461c 100644 --- a/server/clip_server/model/pretrained_models.py +++ b/server/clip_server/model/pretrained_models.py @@ -2,9 +2,10 @@ import hashlib import shutil import urllib +import requests -_OPENCLIP_S3_BUCKET = 'https://clip-as-service.s3.us-east-2.amazonaws.com/models/torch' +_OPENCLIP_HUGGINGFACE_BUCKET = 'https://huggingface.co/jinaai/clip-models/' _OPENCLIP_MODELS = { 'RN50::openai': ('RN50.pt', '9140964eaaf9f68c95aa8df6ca13777c'), 'RN50::yfcc15m': ('RN50-yfcc15m.pt', 'e9c564f91ae7dc754d9043fdcd2a9f22'), @@ -132,7 +133,7 @@ def md5file(filename: str): hash_md5 = hashlib.md5() with open(filename, 'rb') as f: - for chunk in iter(lambda: f.read(4096), b""): + for chunk in iter(lambda: f.read(4096), b''): hash_md5.update(chunk) return hash_md5.hexdigest() @@ -143,18 +144,32 @@ def get_model_url_md5(name: str): if len(model_pretrained) == 0: # not on s3 return None, None else: - return (_OPENCLIP_S3_BUCKET + '/' + model_pretrained[0], model_pretrained[1]) + hf_download_url = ( + _OPENCLIP_HUGGINGFACE_BUCKET + + 'resolve/main/' + + model_pretrained[0] + + '?download=true' + ) + try: + response = requests.head(hf_download_url, timeout=10) + if response.status_code in [200, 302]: + return (hf_download_url, model_pretrained[1]) + except Exception: + raise ValueError('Invalid model url.') def download_model( url: str, - target_folder: str = os.path.expanduser("~/.cache/clip"), + target_folder: str = os.path.expanduser('~/.cache/clip'), md5sum: str = None, with_resume: bool = True, max_attempts: int = 3, ) -> str: os.makedirs(target_folder, exist_ok=True) filename = os.path.basename(url) + filename = filename.split('?')[0] + if filename.split('.')[-1] == 'onnx': + filename = filename.split('-')[-1] download_target = os.path.join(target_folder, filename) @@ -175,14 +190,14 @@ def download_model( ) progress = Progress( - " \n", # divide this bar from Flow's bar - TextColumn("[bold blue]{task.fields[filename]}", justify="right"), - "[progress.percentage]{task.percentage:>3.1f}%", - "•", + ' \n', # divide this bar from Flow's bar + TextColumn('[bold blue]{task.fields[filename]}', justify='right'), + '[progress.percentage]{task.percentage:>3.1f}%', + '•', DownloadColumn(), - "•", + '•', TransferSpeedColumn(), - "•", + '•', TimeRemainingColumn(), )