Upload folder using huggingface_hub

Browse files

Files changed (10) hide show

README.md +20 -14
added_tokens.json +11 -10
chat_template.jinja +8 -1
config.json +34 -26
generation_config.json +4 -4
model.safetensors +2 -2
special_tokens_map.json +2 -2
tokenizer.json +2 -2
tokenizer.model +3 -0
tokenizer_config.json +53 -32

README.md CHANGED Viewed

@@ -1,3 +1,5 @@
 Code for creating the tiny model:
 ```python
@@ -6,30 +8,34 @@ import torch
 torch.set_default_dtype(torch.float32)
 import os
-from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, set_seed
 # === Settings ===
-model_id = "microsoft/Phi-4-mini-instruct"
-output_dir = "phi-4-mini-tiny-random"
 set_seed(0)
 # === Step 1: Define tiny model config ===
 config = AutoConfig.from_pretrained(model_id)
-# the "originally" values are for Phi-4-mini-instruct
-config.num_hidden_layers = 4    # originally 32
-config.num_attention_heads = 4  # originally 24
-config.num_key_value_heads = 2  # originally 8
-config.hidden_size = 64         # originally 3072, this has the largest influence on model size
-config.intermediate_size = 256  # originally 8192; MLP layer
-config.initializer_range = 0.1  # originally 0.02; without this change, phi-4-mini model outputs collapse with larger inputs
-# Keep 6 RoPE factors (originally 48). Adjust config.hidden_size when adjusting this.
 if config.rope_scaling:
-    config.rope_scaling["short_factor"] = config.rope_scaling["short_factor"][::8]
-    config.rope_scaling["long_factor"] = config.rope_scaling["long_factor"][::8]
 # === Step 2: Create model from config ===
 model = AutoModelForCausalLM.from_config(config)
@@ -45,4 +51,4 @@ tokenizer = AutoTokenizer.from_pretrained(model_id)
 os.makedirs(output_dir, exist_ok=True)
 model.save_pretrained(output_dir)
 tokenizer.save_pretrained(output_dir)
-```

+Tiny random model with Phi3 architecture based on microsoft/Phi-3.5-mini-instruct, including longrope config
 Code for creating the tiny model:
 ```python
 torch.set_default_dtype(torch.float32)
 import os
+from pathlib import Path
+from optimum.intel import OVModelForCausalLM
+from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, set_seed, Phi3ForCausalLM
 # === Settings ===
+model_id = "microsoft/Phi-3.5-mini-instruct"
+output_dir = "phi-3.5-mini-tiny-random"
+ov_output_dir = output_dir + "-ov"
 set_seed(0)
 # === Step 1: Define tiny model config ===
 config = AutoConfig.from_pretrained(model_id)
+config.num_hidden_layers = 4
+config.num_attention_heads = 4
+config.num_key_value_heads = 2
+config.hidden_size = 96
+config.intermediate_size = 256
+config.initializer_range = 0.1
+# Keep fewer RoPE factors. Adjust config.hidden_size when adjusting this.
 if config.rope_scaling:
+    config.rope_scaling["short_factor"] = config.rope_scaling["short_factor"][::4]
+    config.rope_scaling["long_factor"] = config.rope_scaling["long_factor"][::4]
 # === Step 2: Create model from config ===
 model = AutoModelForCausalLM.from_config(config)
 os.makedirs(output_dir, exist_ok=True)
 model.save_pretrained(output_dir)
 tokenizer.save_pretrained(output_dir)
+```

added_tokens.json CHANGED Viewed

@@ -1,12 +1,13 @@
 {
-  "<|/tool_call|>": 200026,
-  "<|/tool|>": 200024,
-  "<|assistant|>": 200019,
-  "<|end|>": 200020,
-  "<|system|>": 200022,
-  "<|tag|>": 200028,
-  "<|tool_call|>": 200025,
-  "<|tool_response|>": 200027,
-  "<|tool|>": 200023,
-  "<|user|>": 200021
 }

 {
+  "<|assistant|>": 32001,
+  "<|endoftext|>": 32000,
+  "<|end|>": 32007,
+  "<|placeholder1|>": 32002,
+  "<|placeholder2|>": 32003,
+  "<|placeholder3|>": 32004,
+  "<|placeholder4|>": 32005,
+  "<|placeholder5|>": 32008,
+  "<|placeholder6|>": 32009,
+  "<|system|>": 32006,
+  "<|user|>": 32010
 }

chat_template.jinja CHANGED Viewed

	@@ -1 +1,8 @@
1	- {% for message in messages %}{% if message['role'] == 'system' and ~~'tools' in~~ message ~~and message~~['~~tools'] is not none %}{{ '<\|' + message['role'] + '\|>' + message['~~content'] ~~+ '<\|tool\|>' + message['tools'] + '<\|/tool\|>' + '<\|end\|>' }}{~~% ~~else %~~}{{ '<\|~~' + message['role'] + '~~\|>~~' + message['content'] + '<\|end\|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<\|assistant\|>' }}{% else %}{{ eos_token }}{% endif %}~~

+{% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>
+' + message['content'] + '<|end|>
+'}}{% elif message['role'] == 'user' %}{{'<|user|>
+' + message['content'] + '<|end|>
+'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>
+' + message['content'] + '<|end|>
+'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>
+' }}{% else %}{{ eos_token }}{% endif %}

config.json CHANGED Viewed

@@ -1,59 +1,67 @@
 {
   "architectures": [
     "Phi3ForCausalLM"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
   "auto_map": {
-    "AutoConfig": "configuration_phi3.Phi3Config",
-    "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM",
-    "AutoTokenizer": "Xenova/gpt-4o"
   },
-  "bos_token_id": 199999,
   "embd_pdrop": 0.0,
-  "eos_token_id": 199999,
-  "full_attn_mod": 1,
   "hidden_act": "silu",
-  "hidden_size": 64,
   "initializer_range": 0.1,
   "intermediate_size": 256,
-  "interpolate_factor": 1,
-  "lm_head_bias": false,
   "max_position_embeddings": 131072,
-  "mlp_bias": false,
   "model_type": "phi3",
   "num_attention_heads": 4,
   "num_hidden_layers": 4,
   "num_key_value_heads": 2,
   "original_max_position_embeddings": 4096,
-  "pad_token_id": 199999,
-  "partial_rotary_factor": 0.75,
   "resid_pdrop": 0.0,
   "rms_norm_eps": 1e-05,
   "rope_scaling": {
     "long_factor": [
-      1,
-      2.446418898,
-      5.984965424,
-      14.64173252,
-      32.1,
-      32.71
     ],
     "short_factor": [
       1.0,
-      1.0,
-      1.0,
-      1.0,
-      1.0,
-      1.0
     ],
     "type": "longrope"
   },
   "rope_theta": 10000.0,
   "sliding_window": 262144,
-  "tie_word_embeddings": true,
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.55.4",
   "use_cache": true,
-  "vocab_size": 200064
 }

 {
+  "_name_or_path": "microsoft/Phi-3.5-mini-instruct",
   "architectures": [
     "Phi3ForCausalLM"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
   "auto_map": {
+    "AutoConfig": "microsoft/Phi-3.5-mini-instruct--configuration_phi3.Phi3Config",
+    "AutoModelForCausalLM": "microsoft/Phi-3.5-mini-instruct--modeling_phi3.Phi3ForCausalLM"
   },
+  "bos_token_id": 1,
   "embd_pdrop": 0.0,
+  "eos_token_id": 32000,
   "hidden_act": "silu",
+  "hidden_size": 96,
   "initializer_range": 0.1,
   "intermediate_size": 256,
   "max_position_embeddings": 131072,
   "model_type": "phi3",
   "num_attention_heads": 4,
   "num_hidden_layers": 4,
   "num_key_value_heads": 2,
   "original_max_position_embeddings": 4096,
+  "pad_token_id": 32000,
+  "partial_rotary_factor": 1.0,
   "resid_pdrop": 0.0,
   "rms_norm_eps": 1e-05,
   "rope_scaling": {
     "long_factor": [
+      1.0800000429153442,
+      1.5899999141693115,
+      3.2300000190734863,
+      7.700000286102295,
+      24.46000099182129,
+      32.590003967285156,
+      50.340003967285156,
+      58.21000289916992,
+      62.71000289916992,
+      63.93000411987305,
+      64.06999969482422,
+      64.4800033569336
     ],
     "short_factor": [
       1.0,
+      1.0499999523162842,
+      1.0499999523162842,
+      1.1599998474121094,
+      1.339999794960022,
+      1.8499997854232788,
+      1.9899996519088745,
+      2.0199997425079346,
+      2.0299997329711914,
+      2.0299997329711914,
+      2.0799996852874756,
+      2.5899994373321533
     ],
     "type": "longrope"
   },
   "rope_theta": 10000.0,
   "sliding_window": 262144,
+  "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.49.0",
   "use_cache": true,
+  "vocab_size": 32064
 }

generation_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "_from_model_config": true,
-  "bos_token_id": 199999,
-  "eos_token_id": 199999,
-  "pad_token_id": 199999,
-  "transformers_version": "4.55.4"
 }

 {
   "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 32000,
+  "pad_token_id": 32000,
+  "transformers_version": "4.49.0"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dd0f9279935f9954e78ee211b05cf0af55be72ae6ca265315baee0d7b1674ad7
-size 26103712

 version https://git-lfs.github.com/spec/v1
+oid sha256:74cead14dfeca6601724692aa0bd751abe8708cde35894a3d814cc67f83bf98e
+size 13128248

special_tokens_map.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "bos_token": {
-    "content": "<|endoftext|>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
@@ -21,7 +21,7 @@
     "single_word": false
   },
   "unk_token": {
-    "content": "<|endoftext|>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

 {
   "bos_token": {
+    "content": "<s>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "unk_token": {
+    "content": "<unk>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:382cc235b56c725945e149cc25f191da667c836655efd0857b004320e90e91ea
-size 15524095

 version https://git-lfs.github.com/spec/v1
+oid sha256:3cb815b904d82b82b25dcd90edd00e71a5ee5443472ad611bcb84f1339300647
+size 3620657

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

tokenizer_config.json CHANGED Viewed

@@ -1,25 +1,41 @@
 {
   "add_bos_token": false,
   "add_eos_token": false,
-  "add_prefix_space": false,
   "added_tokens_decoder": {
-    "199999": {
-      "content": "<|endoftext|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "200018": {
-      "content": "<|endofprompt|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "200019": {
       "content": "<|assistant|>",
       "lstrip": false,
       "normalized": false,
@@ -27,72 +43,72 @@
       "single_word": false,
       "special": true
     },
-    "200020": {
-      "content": "<|end|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": true,
       "single_word": false,
       "special": true
     },
-    "200021": {
-      "content": "<|user|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": true,
       "single_word": false,
       "special": true
     },
-    "200022": {
-      "content": "<|system|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": true,
       "single_word": false,
       "special": true
     },
-    "200023": {
-      "content": "<|tool|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": true,
       "single_word": false,
-      "special": false
     },
-    "200024": {
-      "content": "<|/tool|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": true,
       "single_word": false,
-      "special": false
     },
-    "200025": {
-      "content": "<|tool_call|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": true,
       "single_word": false,
-      "special": false
     },
-    "200026": {
-      "content": "<|/tool_call|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": true,
       "single_word": false,
-      "special": false
     },
-    "200027": {
-      "content": "<|tool_response|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": true,
       "single_word": false,
-      "special": false
     },
-    "200028": {
-      "content": "<|tag|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": true,
@@ -100,12 +116,17 @@
       "special": true
     }
   },
-  "bos_token": "<|endoftext|>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "<|endoftext|>",
   "extra_special_tokens": {},
   "model_max_length": 131072,
   "pad_token": "<|endoftext|>",
-  "tokenizer_class": "GPT2Tokenizer",
-  "unk_token": "<|endoftext|>"
 }

 {
   "add_bos_token": false,
   "add_eos_token": false,
+  "add_prefix_space": null,
   "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "1": {
+      "content": "<s>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "32000": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32001": {
       "content": "<|assistant|>",
       "lstrip": false,
       "normalized": false,
       "single_word": false,
       "special": true
     },
+    "32002": {
+      "content": "<|placeholder1|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": true,
       "single_word": false,
       "special": true
     },
+    "32003": {
+      "content": "<|placeholder2|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": true,
       "single_word": false,
       "special": true
     },
+    "32004": {
+      "content": "<|placeholder3|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": true,
       "single_word": false,
       "special": true
     },
+    "32005": {
+      "content": "<|placeholder4|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": true,
       "single_word": false,
+      "special": true
     },
+    "32006": {
+      "content": "<|system|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": true,
       "single_word": false,
+      "special": true
     },
+    "32007": {
+      "content": "<|end|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": true,
       "single_word": false,
+      "special": true
     },
+    "32008": {
+      "content": "<|placeholder5|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": true,
       "single_word": false,
+      "special": true
     },
+    "32009": {
+      "content": "<|placeholder6|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": true,
       "single_word": false,
+      "special": true
     },
+    "32010": {
+      "content": "<|user|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": true,
       "special": true
     }
   },
+  "bos_token": "<s>",
+  "chat_template": "{% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "<|endoftext|>",
   "extra_special_tokens": {},
+  "legacy": false,
   "model_max_length": 131072,
   "pad_token": "<|endoftext|>",
+  "padding_side": "left",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
 }