Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/llama-context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,14 @@ llama_context::llama_context(
cparams.causal_attn = params.attention_type == LLAMA_ATTENTION_TYPE_CAUSAL;
}

// Non-causal models (e.g. embedding/classification encoders such as BERT) cannot generate text.
// A generation request (bounded n_outputs_max) would otherwise trip GGML_ASSERT in output_reserve();
// fail early with a clear message instead of aborting.
if (!cparams.causal_attn && !llama_model_has_encoder(&model) && params.n_outputs_max != 0) {
throw std::runtime_error("this model is non-causal (e.g. an embedding/classification model) "
"and cannot be used for text generation; use the embedding API (e.g. llama-embedding) instead");
}

cparams.flash_attn = params.flash_attn_type != LLAMA_FLASH_ATTN_TYPE_DISABLED;
cparams.auto_fa = params.flash_attn_type == LLAMA_FLASH_ATTN_TYPE_AUTO;

Expand Down
5 changes: 5 additions & 0 deletions tools/server/server-context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1166,6 +1166,11 @@ struct server_context_impl {
return false;
}

if (ctx_tgt == nullptr) {
SRV_ERR("failed to create context with model, '%s'\n", params_base.model.path.c_str());
return false;
}

vocab = llama_model_get_vocab(model_tgt);

n_ctx = llama_n_ctx(ctx_tgt);
Expand Down