eacortes commited on
Commit
7212fdc
·
verified ·
1 Parent(s): d5c6f63

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -27,7 +27,7 @@ model_name: ms-marco-MiniLM-L12-v2
27
  - **Original Model:** [cross-encoder/ms-marco-MiniLM-L12-v2](https://huggingface.co/cross-encoder/ms-marco-MiniLM-L12-v2)
28
  - **Target Platform:** rk3588
29
  - **rknn-toolkit2 Version:** 2.3.2
30
- - **rk-transformers Version:** 0.1.1
31
 
32
  ### Available Model Files
33
 
@@ -46,40 +46,32 @@ model_name: ms-marco-MiniLM-L12-v2
46
 
47
  ### Installation
48
 
49
- Install `rk-transformers` to use this model:
50
 
51
  ```bash
52
- pip install rk-transformers
53
  ```
54
 
55
- #### RKTransformers API
56
 
57
  ```python
58
- from rktransformers import RKRTModelForSequenceClassification
59
  from transformers import AutoTokenizer
60
 
61
- # Load tokenizer and model
62
  tokenizer = AutoTokenizer.from_pretrained("rk-transformers/ms-marco-MiniLM-L12-v2")
63
- model = RKRTModelForSequenceClassification.from_pretrained(
64
  "rk-transformers/ms-marco-MiniLM-L12-v2",
65
  platform="rk3588",
66
  core_mask="auto",
67
  )
68
 
69
- # Tokenize and run inference
70
- inputs = tokenizer(
71
- ["Sample text for encoding"],
72
- padding="max_length",
73
- max_length=512,
74
- truncation=True,
75
- return_tensors="np"
76
- )
77
-
78
  outputs = model(**inputs)
79
- print(outputs.shape)
 
80
 
81
  # Load specific optimized/quantized model file
82
- model = RKRTModelForSequenceClassification.from_pretrained(
83
  "rk-transformers/ms-marco-MiniLM-L12-v2",
84
  platform="rk3588",
85
  file_name="rknn/model_w8a8.rknn"
@@ -88,10 +80,11 @@ model = RKRTModelForSequenceClassification.from_pretrained(
88
 
89
  ## Configuration
90
 
91
- The full configuration for all exported RKNN models is available in the [rknn.json](./rknn.json) file.
92
 
93
  </details>
94
 
 
95
  # Cross-Encoder for MS Marco
96
 
97
  This model was trained on the [MS Marco Passage Ranking](https://github.com/microsoft/MSMARCO-Passage-Ranking) task.
 
27
  - **Original Model:** [cross-encoder/ms-marco-MiniLM-L12-v2](https://huggingface.co/cross-encoder/ms-marco-MiniLM-L12-v2)
28
  - **Target Platform:** rk3588
29
  - **rknn-toolkit2 Version:** 2.3.2
30
+ - **rk-transformers Version:** 0.3.0
31
 
32
  ### Available Model Files
33
 
 
46
 
47
  ### Installation
48
 
49
+ Install `rk-transformers` with inference dependencies to use this model:
50
 
51
  ```bash
52
+ pip install rk-transformers[inference]
53
  ```
54
 
55
+ #### RK-Transformers API
56
 
57
  ```python
58
+ from rktransformers import RKModelForSequenceClassification
59
  from transformers import AutoTokenizer
60
 
 
61
  tokenizer = AutoTokenizer.from_pretrained("rk-transformers/ms-marco-MiniLM-L12-v2")
62
+ model = RKModelForSequenceClassification.from_pretrained(
63
  "rk-transformers/ms-marco-MiniLM-L12-v2",
64
  platform="rk3588",
65
  core_mask="auto",
66
  )
67
 
68
+ inputs = tokenizer("Hello, my dog is cute", return_tensors="np")
 
 
 
 
 
 
 
 
69
  outputs = model(**inputs)
70
+ logits = outputs.logits
71
+ print(logits.shape)
72
 
73
  # Load specific optimized/quantized model file
74
+ model = RKModelForSequenceClassification.from_pretrained(
75
  "rk-transformers/ms-marco-MiniLM-L12-v2",
76
  platform="rk3588",
77
  file_name="rknn/model_w8a8.rknn"
 
80
 
81
  ## Configuration
82
 
83
+ The full configuration for all exported RKNN models is available in the [config.json](./config.json) file.
84
 
85
  </details>
86
 
87
+ ---
88
  # Cross-Encoder for MS Marco
89
 
90
  This model was trained on the [MS Marco Passage Ranking](https://github.com/microsoft/MSMARCO-Passage-Ranking) task.
config.json CHANGED
@@ -23,6 +23,372 @@
23
  "num_hidden_layers": 12,
24
  "pad_token_id": 0,
25
  "position_embedding_type": "absolute",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  "sbert_ce_default_activation_function": "torch.nn.modules.linear.Identity",
27
  "torch_dtype": "float32",
28
  "transformers_version": "4.55.4",
 
23
  "num_hidden_layers": 12,
24
  "pad_token_id": 0,
25
  "position_embedding_type": "absolute",
26
+ "rknn": {
27
+ "model.rknn": {
28
+ "batch_size": 1,
29
+ "custom_string": null,
30
+ "dynamic_input": null,
31
+ "float_dtype": "float16",
32
+ "inputs_yuv_fmt": null,
33
+ "max_seq_length": 512,
34
+ "mean_values": null,
35
+ "model_input_names": [
36
+ "input_ids",
37
+ "attention_mask",
38
+ "token_type_ids"
39
+ ],
40
+ "opset": 19,
41
+ "optimization": {
42
+ "compress_weight": false,
43
+ "enable_flash_attention": true,
44
+ "model_pruning": false,
45
+ "optimization_level": 0,
46
+ "remove_reshape": false,
47
+ "remove_weight": false,
48
+ "sparse_infer": false
49
+ },
50
+ "quantization": {
51
+ "auto_hybrid_cos_thresh": 0.98,
52
+ "auto_hybrid_euc_thresh": null,
53
+ "dataset_columns": null,
54
+ "dataset_name": null,
55
+ "dataset_size": 128,
56
+ "dataset_split": null,
57
+ "dataset_subset": null,
58
+ "do_quantization": false,
59
+ "quant_img_RGB2BGR": false,
60
+ "quantized_algorithm": "normal",
61
+ "quantized_dtype": "w8a8",
62
+ "quantized_hybrid_level": 0,
63
+ "quantized_method": "channel"
64
+ },
65
+ "rktransformers_version": "0.3.0",
66
+ "single_core_mode": false,
67
+ "std_values": null,
68
+ "target_platform": "rk3588",
69
+ "task": "sequence-classification",
70
+ "task_kwargs": null
71
+ },
72
+ "model_b1_s256.rknn": {
73
+ "batch_size": 1,
74
+ "custom_string": null,
75
+ "dynamic_input": null,
76
+ "float_dtype": "float16",
77
+ "inputs_yuv_fmt": null,
78
+ "max_seq_length": 256,
79
+ "mean_values": null,
80
+ "model_input_names": [
81
+ "input_ids",
82
+ "attention_mask",
83
+ "token_type_ids"
84
+ ],
85
+ "opset": 19,
86
+ "optimization": {
87
+ "compress_weight": false,
88
+ "enable_flash_attention": true,
89
+ "model_pruning": false,
90
+ "optimization_level": 0,
91
+ "remove_reshape": false,
92
+ "remove_weight": false,
93
+ "sparse_infer": false
94
+ },
95
+ "quantization": {
96
+ "auto_hybrid_cos_thresh": 0.98,
97
+ "auto_hybrid_euc_thresh": null,
98
+ "dataset_columns": null,
99
+ "dataset_name": null,
100
+ "dataset_size": 128,
101
+ "dataset_split": null,
102
+ "dataset_subset": null,
103
+ "do_quantization": false,
104
+ "quant_img_RGB2BGR": false,
105
+ "quantized_algorithm": "normal",
106
+ "quantized_dtype": "w8a8",
107
+ "quantized_hybrid_level": 0,
108
+ "quantized_method": "channel"
109
+ },
110
+ "rktransformers_version": "0.3.0",
111
+ "single_core_mode": false,
112
+ "std_values": null,
113
+ "target_platform": "rk3588",
114
+ "task": "sequence-classification",
115
+ "task_kwargs": null
116
+ },
117
+ "model_b4_s256.rknn": {
118
+ "batch_size": 4,
119
+ "custom_string": null,
120
+ "dynamic_input": null,
121
+ "float_dtype": "float16",
122
+ "inputs_yuv_fmt": null,
123
+ "max_seq_length": 256,
124
+ "mean_values": null,
125
+ "model_input_names": [
126
+ "input_ids",
127
+ "attention_mask",
128
+ "token_type_ids"
129
+ ],
130
+ "opset": 19,
131
+ "optimization": {
132
+ "compress_weight": false,
133
+ "enable_flash_attention": true,
134
+ "model_pruning": false,
135
+ "optimization_level": 0,
136
+ "remove_reshape": false,
137
+ "remove_weight": false,
138
+ "sparse_infer": false
139
+ },
140
+ "quantization": {
141
+ "auto_hybrid_cos_thresh": 0.98,
142
+ "auto_hybrid_euc_thresh": null,
143
+ "dataset_columns": null,
144
+ "dataset_name": null,
145
+ "dataset_size": 128,
146
+ "dataset_split": null,
147
+ "dataset_subset": null,
148
+ "do_quantization": false,
149
+ "quant_img_RGB2BGR": false,
150
+ "quantized_algorithm": "normal",
151
+ "quantized_dtype": "w8a8",
152
+ "quantized_hybrid_level": 0,
153
+ "quantized_method": "channel"
154
+ },
155
+ "rktransformers_version": "0.3.0",
156
+ "single_core_mode": false,
157
+ "std_values": null,
158
+ "target_platform": "rk3588",
159
+ "task": "sequence-classification",
160
+ "task_kwargs": null
161
+ },
162
+ "model_b4_s512.rknn": {
163
+ "batch_size": 4,
164
+ "custom_string": null,
165
+ "dynamic_input": null,
166
+ "float_dtype": "float16",
167
+ "inputs_yuv_fmt": null,
168
+ "max_seq_length": 512,
169
+ "mean_values": null,
170
+ "model_input_names": [
171
+ "input_ids",
172
+ "attention_mask",
173
+ "token_type_ids"
174
+ ],
175
+ "opset": 19,
176
+ "optimization": {
177
+ "compress_weight": false,
178
+ "enable_flash_attention": true,
179
+ "model_pruning": false,
180
+ "optimization_level": 0,
181
+ "remove_reshape": false,
182
+ "remove_weight": false,
183
+ "sparse_infer": false
184
+ },
185
+ "quantization": {
186
+ "auto_hybrid_cos_thresh": 0.98,
187
+ "auto_hybrid_euc_thresh": null,
188
+ "dataset_columns": null,
189
+ "dataset_name": null,
190
+ "dataset_size": 128,
191
+ "dataset_split": null,
192
+ "dataset_subset": null,
193
+ "do_quantization": false,
194
+ "quant_img_RGB2BGR": false,
195
+ "quantized_algorithm": "normal",
196
+ "quantized_dtype": "w8a8",
197
+ "quantized_hybrid_level": 0,
198
+ "quantized_method": "channel"
199
+ },
200
+ "rktransformers_version": "0.3.0",
201
+ "single_core_mode": false,
202
+ "std_values": null,
203
+ "target_platform": "rk3588",
204
+ "task": "sequence-classification",
205
+ "task_kwargs": null
206
+ },
207
+ "rknn/model_o1.rknn": {
208
+ "batch_size": 1,
209
+ "custom_string": null,
210
+ "dynamic_input": null,
211
+ "float_dtype": "float16",
212
+ "inputs_yuv_fmt": null,
213
+ "max_seq_length": 512,
214
+ "mean_values": null,
215
+ "model_input_names": [
216
+ "input_ids",
217
+ "attention_mask",
218
+ "token_type_ids"
219
+ ],
220
+ "opset": 19,
221
+ "optimization": {
222
+ "compress_weight": false,
223
+ "enable_flash_attention": true,
224
+ "model_pruning": false,
225
+ "optimization_level": 1,
226
+ "remove_reshape": false,
227
+ "remove_weight": false,
228
+ "sparse_infer": false
229
+ },
230
+ "quantization": {
231
+ "auto_hybrid_cos_thresh": 0.98,
232
+ "auto_hybrid_euc_thresh": null,
233
+ "dataset_columns": null,
234
+ "dataset_name": null,
235
+ "dataset_size": 128,
236
+ "dataset_split": null,
237
+ "dataset_subset": null,
238
+ "do_quantization": false,
239
+ "quant_img_RGB2BGR": false,
240
+ "quantized_algorithm": "normal",
241
+ "quantized_dtype": "w8a8",
242
+ "quantized_hybrid_level": 0,
243
+ "quantized_method": "channel"
244
+ },
245
+ "rktransformers_version": "0.3.0",
246
+ "single_core_mode": false,
247
+ "std_values": null,
248
+ "target_platform": "rk3588",
249
+ "task": "sequence-classification",
250
+ "task_kwargs": null
251
+ },
252
+ "rknn/model_o2.rknn": {
253
+ "batch_size": 1,
254
+ "custom_string": null,
255
+ "dynamic_input": null,
256
+ "float_dtype": "float16",
257
+ "inputs_yuv_fmt": null,
258
+ "max_seq_length": 512,
259
+ "mean_values": null,
260
+ "model_input_names": [
261
+ "input_ids",
262
+ "attention_mask",
263
+ "token_type_ids"
264
+ ],
265
+ "opset": 19,
266
+ "optimization": {
267
+ "compress_weight": false,
268
+ "enable_flash_attention": true,
269
+ "model_pruning": false,
270
+ "optimization_level": 2,
271
+ "remove_reshape": false,
272
+ "remove_weight": false,
273
+ "sparse_infer": false
274
+ },
275
+ "quantization": {
276
+ "auto_hybrid_cos_thresh": 0.98,
277
+ "auto_hybrid_euc_thresh": null,
278
+ "dataset_columns": null,
279
+ "dataset_name": null,
280
+ "dataset_size": 128,
281
+ "dataset_split": null,
282
+ "dataset_subset": null,
283
+ "do_quantization": false,
284
+ "quant_img_RGB2BGR": false,
285
+ "quantized_algorithm": "normal",
286
+ "quantized_dtype": "w8a8",
287
+ "quantized_hybrid_level": 0,
288
+ "quantized_method": "channel"
289
+ },
290
+ "rktransformers_version": "0.3.0",
291
+ "single_core_mode": false,
292
+ "std_values": null,
293
+ "target_platform": "rk3588",
294
+ "task": "sequence-classification",
295
+ "task_kwargs": null
296
+ },
297
+ "rknn/model_o3.rknn": {
298
+ "batch_size": 1,
299
+ "custom_string": null,
300
+ "dynamic_input": null,
301
+ "float_dtype": "float16",
302
+ "inputs_yuv_fmt": null,
303
+ "max_seq_length": 512,
304
+ "mean_values": null,
305
+ "model_input_names": [
306
+ "input_ids",
307
+ "attention_mask",
308
+ "token_type_ids"
309
+ ],
310
+ "opset": 19,
311
+ "optimization": {
312
+ "compress_weight": false,
313
+ "enable_flash_attention": true,
314
+ "model_pruning": false,
315
+ "optimization_level": 3,
316
+ "remove_reshape": false,
317
+ "remove_weight": false,
318
+ "sparse_infer": false
319
+ },
320
+ "quantization": {
321
+ "auto_hybrid_cos_thresh": 0.98,
322
+ "auto_hybrid_euc_thresh": null,
323
+ "dataset_columns": null,
324
+ "dataset_name": null,
325
+ "dataset_size": 128,
326
+ "dataset_split": null,
327
+ "dataset_subset": null,
328
+ "do_quantization": false,
329
+ "quant_img_RGB2BGR": false,
330
+ "quantized_algorithm": "normal",
331
+ "quantized_dtype": "w8a8",
332
+ "quantized_hybrid_level": 0,
333
+ "quantized_method": "channel"
334
+ },
335
+ "rktransformers_version": "0.3.0",
336
+ "single_core_mode": false,
337
+ "std_values": null,
338
+ "target_platform": "rk3588",
339
+ "task": "sequence-classification",
340
+ "task_kwargs": null
341
+ },
342
+ "rknn/model_w8a8.rknn": {
343
+ "batch_size": 1,
344
+ "custom_string": null,
345
+ "dynamic_input": null,
346
+ "float_dtype": "float16",
347
+ "inputs_yuv_fmt": null,
348
+ "max_seq_length": 512,
349
+ "mean_values": null,
350
+ "model_input_names": [
351
+ "input_ids",
352
+ "attention_mask",
353
+ "token_type_ids"
354
+ ],
355
+ "opset": 19,
356
+ "optimization": {
357
+ "compress_weight": false,
358
+ "enable_flash_attention": true,
359
+ "model_pruning": false,
360
+ "optimization_level": 0,
361
+ "remove_reshape": false,
362
+ "remove_weight": false,
363
+ "sparse_infer": false
364
+ },
365
+ "quantization": {
366
+ "auto_hybrid_cos_thresh": 0.98,
367
+ "auto_hybrid_euc_thresh": null,
368
+ "dataset_columns": [
369
+ "answer"
370
+ ],
371
+ "dataset_name": "sentence-transformers/natural-questions",
372
+ "dataset_size": 1024,
373
+ "dataset_split": [
374
+ "train"
375
+ ],
376
+ "dataset_subset": null,
377
+ "do_quantization": true,
378
+ "quant_img_RGB2BGR": false,
379
+ "quantized_algorithm": "normal",
380
+ "quantized_dtype": "w8a8",
381
+ "quantized_hybrid_level": 0,
382
+ "quantized_method": "channel"
383
+ },
384
+ "rktransformers_version": "0.3.0",
385
+ "single_core_mode": false,
386
+ "std_values": null,
387
+ "target_platform": "rk3588",
388
+ "task": "sequence-classification",
389
+ "task_kwargs": null
390
+ }
391
+ },
392
  "sbert_ce_default_activation_function": "torch.nn.modules.linear.Identity",
393
  "torch_dtype": "float32",
394
  "transformers_version": "4.55.4",
model.rknn CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bada98d5ef1f57199733bceeb9b348a061eb17b77e444b68cca1557ef64b52b
3
  size 72099070
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:646a50d03c63c0aa2745c6716cf5a25f79fa3b1ee39bd3d266d7fcf074e5e4d8
3
  size 72099070
model_b4_s256.rknn CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:872b6e2550a0cd9ed4de28fc86d62b3af6227fdae378f84df5fddb32334d5724
3
  size 78763262
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd6cd277b1941fbf89bf4cc6e25eaeb19d5781a8e31ccabdd19d4d2433c81ceb
3
  size 78763262
model_b4_s512.rknn CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c640bce5951ca71d22756d003eaccac10159d40e489f15b170c2c781a88fd916
3
  size 85670846
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd83142741f61ae6c8cde672d694dae638509eec65046a1fb9694a5b5944b779
3
  size 85670846
rknn/model_o1.rknn CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d90dcbea5b184df2830e4a9a84c0135d335df1b43b4c8b2e36ba26d4f654016
3
  size 72099070
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfc6b8c69a4dc9e31e698b91e3404f0abe6acf746837e938ca43fce4a31841ac
3
  size 72099070
rknn/model_o2.rknn CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b0813cfb3c5bdbab369477ee781addd58a069246ac34b71b6e8c38255070aeb
3
  size 72099070
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5ca3683f33587cf746c282a72a01778ffa4ec721bc0f996318aadb7957e1deb
3
  size 72099070
rknn/model_o3.rknn CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd402bb19ef3b0a952eafa820aa6b2c9c369668256334b6ec314e7e7436c86ae
3
  size 72099070
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:100dbcec0e54ff61cff2b3ebfa52aefa352e7dddb4e50fe8a45deb5390e81282
3
  size 72099070
rknn/model_w8a8.rknn CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f39839582190c0b2e6f0c260994f946c8ea973ccc456635111e721f1e6e6843
3
  size 38286411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1db6b461c724bfecba727a2e268cd1ae637af37aca93adf3def0a4b9e03ee93
3
  size 38286411