llama_ros: llama.cpp for ROS 2
|
#include <llama.hpp>
Public Member Functions | |
Llama (const struct common_params ¶ms, std::string system_prompt="", bool initial_reset=true) | |
virtual | ~Llama () |
std::vector< llama_token > | tokenize (const std::string &text, bool add_bos, bool special=false) |
std::string | detokenize (const std::vector< llama_token > &tokens) |
virtual void | reset () |
void | cancel () |
std::string | format_chat_prompt (std::vector< struct common_chat_msg > chat_msgs, bool add_ass) |
std::vector< struct LoRA > | list_loras () |
void | update_loras (std::vector< struct LoRA > loras) |
std::vector< llama_token > | truncate_tokens (const std::vector< llama_token > &tokens, int limit_size, bool add_eos=true) |
struct EmbeddingsOuput | generate_embeddings (const std::string &input_prompt, int normalization=2) |
struct EmbeddingsOuput | generate_embeddings (const std::vector< llama_token > &tokens, int normalization=2) |
float | rank_document (const std::string &query, const std::string &document) |
std::vector< float > | rank_documents (const std::string &query, const std::vector< std::string > &documents) |
struct ResponseOutput | generate_response (const std::string &input_prompt, struct common_params_sampling sparams, GenerateResponseCallback callbakc=nullptr, std::vector< std::string > stop={}) |
struct ResponseOutput | generate_response (const std::string &input_prompt, GenerateResponseCallback callbakc=nullptr, std::vector< std::string > stop={}) |
const struct llama_context * | get_ctx () |
const struct llama_model * | get_model () |
const struct llama_vocab * | get_vocab () |
int | get_n_ctx () |
int | get_n_ctx_train () |
int | get_n_embd () |
int | get_n_vocab () |
std::string | get_metadata (const std::string &key, size_t size) |
std::string | get_metadata (const std::string &model_name, const std::string &key, size_t size) |
int | get_int_metadata (const std::string &key, size_t size) |
int | get_int_metadata (const std::string &model_name, const std::string &key, size_t size) |
float | get_float_metadata (const std::string &key, size_t size) |
float | get_float_metadata (const std::string &model_name, const std::string &key, size_t size) |
struct Metadata | get_metadata () |
bool | is_embedding () |
bool | is_reranking () |
bool | add_bos_token () |
bool | is_eog () |
llama_token | get_token_eos () |
llama_token | get_token_bos () |
llama_token | get_token_sep () |
Protected Member Functions | |
virtual void | load_prompt (const std::string &input_prompt, bool add_pfx, bool add_sfx) |
StopType | find_stop (std::vector< struct CompletionOutput > completion_result_list, std::vector< std::string > stopping_words) |
StopType | find_stop_word (std::vector< struct CompletionOutput > completion_result_list, std::string stopping_word) |
bool | eval_system_prompt () |
virtual bool | eval_prompt () |
bool | eval_prompt (std::vector< llama_token > prompt_tokens) |
bool | eval_token (llama_token token) |
bool | eval (std::vector< llama_token > tokens) |
virtual bool | eval (struct llama_batch batch) |
std::vector< struct TokenProb > | get_probs () |
struct CompletionOutput | sample () |
Protected Attributes | |
struct common_params | params |
struct common_init_result | llama_init |
struct llama_context * | ctx |
struct llama_model * | model |
std::vector< common_adapter_lora_info > | lora_adapters |
struct common_sampler * | sampler |
struct ggml_threadpool * | threadpool |
struct ggml_threadpool * | threadpool_batch |
std::string | system_prompt |
bool | canceled |
llama_utils::Spinner | spinner |
std::vector< llama_token > | prompt_tokens |
int32_t | n_past |
int32_t | n_consumed |
int32_t | ga_i |
Private Attributes | |
std::recursive_mutex | mutex |
Llama::Llama | ( | const struct common_params & | params, |
std::string | system_prompt = "", | ||
bool | initial_reset = true ) |
|
virtual |
|
inline |
void Llama::cancel | ( | ) |
std::string Llama::detokenize | ( | const std::vector< llama_token > & | tokens | ) |
|
protected |
|
protectedvirtual |
Reimplemented in llava_ros::Llava.
|
protectedvirtual |
Reimplemented in llava_ros::Llava.
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
std::string Llama::format_chat_prompt | ( | std::vector< struct common_chat_msg > | chat_msgs, |
bool | add_ass ) |
struct EmbeddingsOuput Llama::generate_embeddings | ( | const std::string & | input_prompt, |
int | normalization = 2 ) |
struct EmbeddingsOuput Llama::generate_embeddings | ( | const std::vector< llama_token > & | tokens, |
int | normalization = 2 ) |
struct ResponseOutput Llama::generate_response | ( | const std::string & | input_prompt, |
GenerateResponseCallback | callbakc = nullptr, | ||
std::vector< std::string > | stop = {} ) |
struct ResponseOutput Llama::generate_response | ( | const std::string & | input_prompt, |
struct common_params_sampling | sparams, | ||
GenerateResponseCallback | callbakc = nullptr, | ||
std::vector< std::string > | stop = {} ) |
|
inline |
float Llama::get_float_metadata | ( | const std::string & | key, |
size_t | size ) |
float Llama::get_float_metadata | ( | const std::string & | model_name, |
const std::string & | key, | ||
size_t | size ) |
int Llama::get_int_metadata | ( | const std::string & | key, |
size_t | size ) |
int Llama::get_int_metadata | ( | const std::string & | model_name, |
const std::string & | key, | ||
size_t | size ) |
struct Metadata Llama::get_metadata | ( | ) |
std::string Llama::get_metadata | ( | const std::string & | key, |
size_t | size ) |
std::string Llama::get_metadata | ( | const std::string & | model_name, |
const std::string & | key, | ||
size_t | size ) |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
protected |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
std::vector< struct LoRA > Llama::list_loras | ( | ) |
|
protectedvirtual |
Reimplemented in llava_ros::Llava.
float Llama::rank_document | ( | const std::string & | query, |
const std::string & | document ) |
std::vector< float > Llama::rank_documents | ( | const std::string & | query, |
const std::vector< std::string > & | documents ) |
|
virtual |
Reimplemented in llava_ros::Llava.
|
protected |
std::vector< llama_token > Llama::tokenize | ( | const std::string & | text, |
bool | add_bos, | ||
bool | special = false ) |
std::vector< llama_token > Llama::truncate_tokens | ( | const std::vector< llama_token > & | tokens, |
int | limit_size, | ||
bool | add_eos = true ) |
void Llama::update_loras | ( | std::vector< struct LoRA > | loras | ) |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
|
private |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |