Spaces:
Running
Running
talk-llama : sync llama.cpp
Browse files- examples/talk-llama/llama.cpp +20 -16
examples/talk-llama/llama.cpp
CHANGED
|
@@ -2190,6 +2190,11 @@ struct llama_model_loader {
|
|
| 2190 |
LLM_KV llm_kv = LLM_KV(LLM_ARCH_UNKNOWN);
|
| 2191 |
|
| 2192 |
llama_model_loader(const std::string & fname, bool use_mmap, const struct llama_model_kv_override * param_overrides_p) : file(fname.c_str(), "rb") {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2193 |
struct gguf_init_params params = {
|
| 2194 |
/*.no_alloc = */ true,
|
| 2195 |
/*.ctx = */ &ctx_meta,
|
|
@@ -2242,11 +2247,10 @@ struct llama_model_loader {
|
|
| 2242 |
type_max = type;
|
| 2243 |
}
|
| 2244 |
|
| 2245 |
-
|
| 2246 |
-
|
| 2247 |
-
|
| 2248 |
-
|
| 2249 |
-
#endif
|
| 2250 |
}
|
| 2251 |
|
| 2252 |
switch (type_max) {
|
|
@@ -6451,15 +6455,15 @@ static uint8_t llama_token_to_byte(const llama_vocab& vocab, llama_token id) {
|
|
| 6451 |
static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch) {
|
| 6452 |
static const char * hex = "0123456789ABCDEF";
|
| 6453 |
switch (llama_vocab_get_type(vocab)) {
|
| 6454 |
-
|
| 6455 |
-
|
| 6456 |
-
|
| 6457 |
-
|
| 6458 |
-
|
| 6459 |
-
|
| 6460 |
-
|
| 6461 |
-
|
| 6462 |
-
|
| 6463 |
}
|
| 6464 |
}
|
| 6465 |
|
|
@@ -7095,7 +7099,7 @@ static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab &
|
|
| 7095 |
}
|
| 7096 |
|
| 7097 |
#ifdef PRETOKENIZERDEBUG
|
| 7098 |
-
LLAMA_LOG_WARN(TT: (%ld %ld %ld) '%s'\n", raw_text.length(), fragment.offset, fragment.length, raw_text.c_str());
|
| 7099 |
#endif
|
| 7100 |
llm_tokenizer_spm tokenizer(vocab);
|
| 7101 |
llama_escape_whitespace(raw_text);
|
|
@@ -7116,7 +7120,7 @@ static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab &
|
|
| 7116 |
auto raw_text = fragment.raw_text.substr(fragment.offset, fragment.length);
|
| 7117 |
|
| 7118 |
#ifdef PRETOKENIZERDEBUG
|
| 7119 |
-
LLAMA_LOG_WARN(TT: (%ld %ld %ld) '%s'\n", raw_text.length(), fragment.offset, fragment.length, raw_text.c_str());
|
| 7120 |
#endif
|
| 7121 |
llm_tokenizer_bpe tokenizer(vocab);
|
| 7122 |
tokenizer.tokenize(raw_text, output);
|
|
|
|
| 2190 |
LLM_KV llm_kv = LLM_KV(LLM_ARCH_UNKNOWN);
|
| 2191 |
|
| 2192 |
llama_model_loader(const std::string & fname, bool use_mmap, const struct llama_model_kv_override * param_overrides_p) : file(fname.c_str(), "rb") {
|
| 2193 |
+
int trace = 0;
|
| 2194 |
+
if (getenv("LLAMA_TRACE")) {
|
| 2195 |
+
trace = atoi(getenv("LLAMA_TRACE"));
|
| 2196 |
+
}
|
| 2197 |
+
|
| 2198 |
struct gguf_init_params params = {
|
| 2199 |
/*.no_alloc = */ true,
|
| 2200 |
/*.ctx = */ &ctx_meta,
|
|
|
|
| 2247 |
type_max = type;
|
| 2248 |
}
|
| 2249 |
|
| 2250 |
+
if (trace > 0) {
|
| 2251 |
+
struct ggml_tensor * meta = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
|
| 2252 |
+
LLAMA_LOG_INFO("%s: - tensor %4d: %32s %-8s [ %s ]\n", __func__, i, ggml_get_name(meta), ggml_type_name(type), llama_format_tensor_shape(meta).c_str());
|
| 2253 |
+
}
|
|
|
|
| 2254 |
}
|
| 2255 |
|
| 2256 |
switch (type_max) {
|
|
|
|
| 6455 |
static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch) {
|
| 6456 |
static const char * hex = "0123456789ABCDEF";
|
| 6457 |
switch (llama_vocab_get_type(vocab)) {
|
| 6458 |
+
case LLAMA_VOCAB_TYPE_SPM: {
|
| 6459 |
+
const char buf[7] = { '<', '0', 'x', hex[ch >> 4], hex[ch & 15], '>', 0 };
|
| 6460 |
+
return vocab.token_to_id.at(buf);
|
| 6461 |
+
}
|
| 6462 |
+
case LLAMA_VOCAB_TYPE_BPE: {
|
| 6463 |
+
return vocab.token_to_id.at(bytes_to_unicode_bpe(ch));
|
| 6464 |
+
}
|
| 6465 |
+
default:
|
| 6466 |
+
GGML_ASSERT(false);
|
| 6467 |
}
|
| 6468 |
}
|
| 6469 |
|
|
|
|
| 7099 |
}
|
| 7100 |
|
| 7101 |
#ifdef PRETOKENIZERDEBUG
|
| 7102 |
+
LLAMA_LOG_WARN("TT: (%ld %ld %ld) '%s'\n", raw_text.length(), fragment.offset, fragment.length, raw_text.c_str());
|
| 7103 |
#endif
|
| 7104 |
llm_tokenizer_spm tokenizer(vocab);
|
| 7105 |
llama_escape_whitespace(raw_text);
|
|
|
|
| 7120 |
auto raw_text = fragment.raw_text.substr(fragment.offset, fragment.length);
|
| 7121 |
|
| 7122 |
#ifdef PRETOKENIZERDEBUG
|
| 7123 |
+
LLAMA_LOG_WARN("TT: (%ld %ld %ld) '%s'\n", raw_text.length(), fragment.offset, fragment.length, raw_text.c_str());
|
| 7124 |
#endif
|
| 7125 |
llm_tokenizer_bpe tokenizer(vocab);
|
| 7126 |
tokenizer.tokenize(raw_text, output);
|