edanigoben commited on
Commit
08af74e
·
1 Parent(s): dc1ce4d

Training in progress, epoch 1

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "factored/distilbert-fr-explorer-mlm",
3
  "activation": "gelu",
4
  "architectures": [
5
  "DistilBertForMaskedLM"
 
1
  {
2
+ "_name_or_path": "distilbert-base-uncased",
3
  "activation": "gelu",
4
  "architectures": [
5
  "DistilBertForMaskedLM"
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:171874ef4b9e9b9b9e269e9bb62429fe5e4b9b499b147ab4a2caca02c6dcad6e
3
  size 267978033
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a46a44349ddf941c7b0adbb8f5d494fb7523335edbfbc3c78c6996ef83cdc74e
3
  size 267978033
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": true,
5
+ "mask_token": "[MASK]",
6
+ "model_max_length": 512,
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "strip_accents": null,
10
+ "tokenize_chinese_chars": true,
11
+ "tokenizer_class": "DistilBertTokenizer",
12
+ "unk_token": "[UNK]"
13
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3c145f3359d2b186979bc5a05e7481c3332046a3fd906f0982490766a66942a
3
  size 3643
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ade086dc3eef9e31ea99498922e16be35fb3b840da4dd28e39ece098dc9530f
3
  size 3643
vocab.txt ADDED
The diff for this file is too large to render. See raw diff