Spaces:
Running
Running
Joshua Lochner
commited on
Commit
·
d7a594b
1
Parent(s):
dffef09
Download classifier and vectorizer if not present
Browse files- src/model.py +20 -3
src/model.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
|
|
| 1 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
| 2 |
from shared import CustomTokens, device
|
|
|
|
| 3 |
from functools import lru_cache
|
| 4 |
import pickle
|
| 5 |
import os
|
|
@@ -29,7 +31,7 @@ class ModelArguments:
|
|
| 29 |
# }
|
| 30 |
# )
|
| 31 |
cache_dir: Optional[str] = field(
|
| 32 |
-
default=
|
| 33 |
metadata={
|
| 34 |
'help': 'Where to store the pretrained models downloaded from huggingface.co'
|
| 35 |
},
|
|
@@ -63,13 +65,27 @@ class ModelArguments:
|
|
| 63 |
|
| 64 |
@lru_cache(maxsize=None)
|
| 65 |
def get_classifier_vectorizer(classifier_args):
|
|
|
|
| 66 |
classifier_path = os.path.join(
|
| 67 |
classifier_args.classifier_dir, classifier_args.classifier_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
with open(classifier_path, 'rb') as fp:
|
| 69 |
classifier = pickle.load(fp)
|
| 70 |
|
|
|
|
| 71 |
vectorizer_path = os.path.join(
|
| 72 |
classifier_args.classifier_dir, classifier_args.vectorizer_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
with open(vectorizer_path, 'rb') as fp:
|
| 74 |
vectorizer = pickle.load(fp)
|
| 75 |
|
|
@@ -79,10 +95,11 @@ def get_classifier_vectorizer(classifier_args):
|
|
| 79 |
@lru_cache(maxsize=None)
|
| 80 |
def get_model_tokenizer(model_name_or_path, cache_dir=None):
|
| 81 |
if model_name_or_path is None:
|
| 82 |
-
raise
|
| 83 |
|
| 84 |
# Load pretrained model and tokenizer
|
| 85 |
-
model = AutoModelForSeq2SeqLM.from_pretrained(
|
|
|
|
| 86 |
model.to(device())
|
| 87 |
|
| 88 |
tokenizer = AutoTokenizer.from_pretrained(
|
|
|
|
| 1 |
+
from huggingface_hub import hf_hub_download
|
| 2 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
| 3 |
from shared import CustomTokens, device
|
| 4 |
+
from errors import ClassifierLoadError, ModelLoadError
|
| 5 |
from functools import lru_cache
|
| 6 |
import pickle
|
| 7 |
import os
|
|
|
|
| 31 |
# }
|
| 32 |
# )
|
| 33 |
cache_dir: Optional[str] = field(
|
| 34 |
+
default='models',
|
| 35 |
metadata={
|
| 36 |
'help': 'Where to store the pretrained models downloaded from huggingface.co'
|
| 37 |
},
|
|
|
|
| 65 |
|
| 66 |
@lru_cache(maxsize=None)
|
| 67 |
def get_classifier_vectorizer(classifier_args):
|
| 68 |
+
# Classifier
|
| 69 |
classifier_path = os.path.join(
|
| 70 |
classifier_args.classifier_dir, classifier_args.classifier_file)
|
| 71 |
+
if not os.path.exists(classifier_path):
|
| 72 |
+
hf_hub_download(repo_id=classifier_args.classifier_model,
|
| 73 |
+
filename=classifier_args.classifier_file,
|
| 74 |
+
cache_dir=classifier_args.classifier_dir,
|
| 75 |
+
force_filename=classifier_args.classifier_file,
|
| 76 |
+
)
|
| 77 |
with open(classifier_path, 'rb') as fp:
|
| 78 |
classifier = pickle.load(fp)
|
| 79 |
|
| 80 |
+
# Vectorizer
|
| 81 |
vectorizer_path = os.path.join(
|
| 82 |
classifier_args.classifier_dir, classifier_args.vectorizer_file)
|
| 83 |
+
if not os.path.exists(vectorizer_path):
|
| 84 |
+
hf_hub_download(repo_id=classifier_args.classifier_model,
|
| 85 |
+
filename=classifier_args.vectorizer_file,
|
| 86 |
+
cache_dir=classifier_args.classifier_dir,
|
| 87 |
+
force_filename=classifier_args.vectorizer_file,
|
| 88 |
+
)
|
| 89 |
with open(vectorizer_path, 'rb') as fp:
|
| 90 |
vectorizer = pickle.load(fp)
|
| 91 |
|
|
|
|
| 95 |
@lru_cache(maxsize=None)
|
| 96 |
def get_model_tokenizer(model_name_or_path, cache_dir=None):
|
| 97 |
if model_name_or_path is None:
|
| 98 |
+
raise ModelLoadError('Invalid model_name_or_path.')
|
| 99 |
|
| 100 |
# Load pretrained model and tokenizer
|
| 101 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(
|
| 102 |
+
model_name_or_path, cache_dir=cache_dir)
|
| 103 |
model.to(device())
|
| 104 |
|
| 105 |
tokenizer = AutoTokenizer.from_pretrained(
|