Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 100 additions & 0 deletions csrc/models/backend_plugin_loader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#include "backend_plugin_loader.hpp"

#include <cstdlib>
#include <dlfcn.h>
#include <mutex>
#include <sstream>
#include <stdexcept>
#include <string>
#include <unordered_map>

namespace infinilm::models {
namespace {

using PluginInitFn = void (*)();

std::mutex &loader_mutex() {
static std::mutex mutex;
return mutex;
}

std::unordered_map<std::string, void *> &loaded_handles() {
static std::unordered_map<std::string, void *> handles;
return handles;
}

std::string trim(std::string value) {
const auto begin = value.find_first_not_of(" \t\n\r");
if (begin == std::string::npos) {
return "";
}
const auto end = value.find_last_not_of(" \t\n\r");
return value.substr(begin, end - begin + 1);
}

std::vector<std::string> split_plugins(const char *env_value) {
std::vector<std::string> plugins;
if (env_value == nullptr || *env_value == '\0') {
return plugins;
}

std::stringstream stream(env_value);
std::string item;
while (std::getline(stream, item, ',')) {
item = trim(item);
if (!item.empty()) {
plugins.push_back(item);
}
}
return plugins;
}

} // namespace

void load_backend_plugin(const std::string &plugin_path) {
const std::string path = trim(plugin_path);
if (path.empty()) {
return;
}

std::lock_guard<std::mutex> lock(loader_mutex());
auto &handles = loaded_handles();
if (handles.find(path) != handles.end()) {
return;
}

void *handle = dlopen(path.c_str(), RTLD_NOW | RTLD_GLOBAL);
if (handle == nullptr) {
const char *error = dlerror();
throw std::runtime_error(
"infinilm::models::load_backend_plugin: failed to load " + path
+ ": " + (error == nullptr ? "unknown dlopen error" : std::string(error)));
}

dlerror();
auto init_fn = reinterpret_cast<PluginInitFn>(dlsym(handle, "infinilm_backend_plugin_init"));
const char *dlsym_error = dlerror();
if (dlsym_error == nullptr && init_fn != nullptr) {
init_fn();
}

handles[path] = handle;
}

void load_backend_plugins_from_env() {
for (const auto &plugin : split_plugins(std::getenv("INFINILM_BACKEND_PLUGINS"))) {
load_backend_plugin(plugin);
}
}

std::vector<std::string> loaded_backend_plugins() {
std::lock_guard<std::mutex> lock(loader_mutex());
std::vector<std::string> plugins;
plugins.reserve(loaded_handles().size());
for (const auto &[path, _] : loaded_handles()) {
plugins.push_back(path);
}
return plugins;
}

} // namespace infinilm::models
30 changes: 30 additions & 0 deletions csrc/models/backend_plugin_loader.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#pragma once

#include <string>
#include <vector>

namespace infinilm::models {

/**
* Load one out-of-tree backend plugin shared object.
*
* The plugin may either rely on static initializers that call
* `register_causal_lm_model` / `register_model_config`, or export an optional
* `extern "C" void infinilm_backend_plugin_init()` function. Loading is
* idempotent for each path.
*/
void load_backend_plugin(const std::string &plugin_path);

/**
* Load backend plugins from `INFINILM_BACKEND_PLUGINS`.
*
* The environment variable accepts comma-separated shared object paths.
*/
void load_backend_plugins_from_env();

/**
* Return plugin paths that have already been loaded.
*/
std::vector<std::string> loaded_backend_plugins();

} // namespace infinilm::models
7 changes: 7 additions & 0 deletions csrc/models/infinilm_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@ void InfinilmModel::reset_cache(const cache::CacheConfig *cache_config) {
kv_cache_vec = std::move(default_allocate_kv_cache_tensors(cache_config, model_config_, attention_backend));
}

void InfinilmModel::load_parameters_no_sync(
const std::unordered_map<std::string, infinicore::Tensor> &params) {
for (const auto &[name, param] : params) {
load_parameter(name, param);
}
}

std::vector<infinicore::Tensor> InfinilmModel::default_allocate_kv_cache_tensors(
const cache::CacheConfig *cache_config,
const std::shared_ptr<infinilm::config::ModelConfig> &text_config,
Expand Down
3 changes: 3 additions & 0 deletions csrc/models/infinilm_model.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "infinicore/tensor.hpp"

#include <optional>
#include <unordered_map>
#include <vector>

namespace infinilm {
Expand Down Expand Up @@ -57,6 +58,8 @@ class InfinilmModel : public infinicore::nn::Module {
return cache_config_.get();
}

void load_parameters_no_sync(
const std::unordered_map<std::string, infinicore::Tensor> &params);
void process_weights_after_loading();
void reset_runtime_state() const;

Expand Down
8 changes: 8 additions & 0 deletions csrc/pybind11/bindings.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "cache/cache.hpp"
#include "engine/engine.hpp"
#include "../models/backend_plugin_loader.hpp"

namespace py = pybind11;

Expand All @@ -12,4 +13,11 @@ PYBIND11_MODULE(_infinilm, m) {
infinilm::engine::bind_hook_registry(m);
infinilm::engine::distributed::bind_dist_config(m);
infinilm::engine::bind_infer_engine(m);

m.def("load_backend_plugin", &infinilm::models::load_backend_plugin,
"Load one InfiniLM C++ backend plugin shared object.");
m.def("load_backend_plugins_from_env", &infinilm::models::load_backend_plugins_from_env,
"Load InfiniLM C++ backend plugins from INFINILM_BACKEND_PLUGINS.");
m.def("loaded_backend_plugins", &infinilm::models::loaded_backend_plugins,
"Return paths of loaded InfiniLM C++ backend plugins.");
}
54 changes: 42 additions & 12 deletions python/infinilm/__init__.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,41 @@
from .models import AutoLlamaModel
from . import distributed
from . import cache
from . import llm
from . import base_config

from .llm import (
LLM,
AsyncLLMEngine,
SamplingParams,
RequestOutput,
TokenOutput,
from importlib import import_module

from .plugins import (
ModelSpec,
load_plugin,
load_plugins,
register_model,
registered_model_types,
)


_LAZY_ATTRS = {
"AutoLlamaModel": ("infinilm.models", "AutoLlamaModel"),
"LLM": ("infinilm.llm", "LLM"),
"AsyncLLMEngine": ("infinilm.llm", "AsyncLLMEngine"),
"SamplingParams": ("infinilm.llm", "SamplingParams"),
"RequestOutput": ("infinilm.llm", "RequestOutput"),
"TokenOutput": ("infinilm.llm", "TokenOutput"),
}

_LAZY_MODULES = {"distributed", "cache", "llm", "base_config"}


def __getattr__(name):
if name in _LAZY_MODULES:
module = import_module(f".{name}", __name__)
globals()[name] = module
return module

target = _LAZY_ATTRS.get(name)
if target is not None:
module_name, attr_name = target
value = getattr(import_module(module_name), attr_name)
globals()[name] = value
return value

raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

__all__ = [
"AutoLlamaModel",
"distributed",
Expand All @@ -24,4 +48,10 @@
"SamplingParams",
"RequestOutput",
"TokenOutput",
# Out-of-tree model plugins
"ModelSpec",
"load_plugin",
"load_plugins",
"register_model",
"registered_model_types",
]
53 changes: 53 additions & 0 deletions python/infinilm/backend_plugins.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from __future__ import annotations

import os
from collections.abc import Sequence


def _split_plugin_list(value: str | None) -> list[str]:
if not value:
return []
return [item.strip() for item in value.split(",") if item.strip()]


def _backend_module():
from infinilm.lib import _infinilm

return _infinilm


def load_backend_plugin(plugin: str | os.PathLike[str]) -> None:
"""Load one InfiniLM C++ backend plugin shared object."""

_backend_module().load_backend_plugin(os.fspath(plugin))


def load_backend_plugins(plugins: Sequence[str | os.PathLike[str]] | str | None = None) -> tuple[str, ...]:
"""Load explicitly requested InfiniLM C++ backend plugins."""

requested: list[str] = []
if isinstance(plugins, (str, os.PathLike)):
requested.extend(_split_plugin_list(os.fspath(plugins)))
elif plugins:
requested.extend(os.fspath(plugin) for plugin in plugins)

for plugin in requested:
load_backend_plugin(plugin)
return loaded_backend_plugins()


def load_backend_plugins_from_env() -> tuple[str, ...]:
"""Load backend plugins from `INFINILM_BACKEND_PLUGINS`.

This is an explicit compatibility helper for command-line or embedding
workflows. Core config/model factories do not read environment variables
implicitly.
"""

return load_backend_plugins(os.environ.get("INFINILM_BACKEND_PLUGINS"))


def loaded_backend_plugins() -> tuple[str, ...]:
"""Return paths of C++ backend plugins already loaded in this process."""

return tuple(_backend_module().loaded_backend_plugins())
10 changes: 8 additions & 2 deletions python/infinilm/infer_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@

import infinicore

from infinilm.cache import PagedKVCacheConfig
from infinilm.cache import StaticKVCacheConfig, PagedKVCacheConfig
from infinilm.backend_plugins import load_backend_plugins
from infinilm.distributed import DistConfig
from infinilm.lib import _infinilm
from infinilm.plugins import adapt_config, load_plugins

from .modeling_utils import parse_dtype
from .exception_utils import handle_oom_and_exit
Expand Down Expand Up @@ -67,10 +69,14 @@ def __init__(
enable_graph_compiling=False,
attention_backend="default",
kv_cache_dtype=None,
backend_plugins=None,
use_mla=False,
):
self.hf_config = read_hf_config(model_path)
load_plugins()
self.hf_config = adapt_config(read_hf_config(model_path))
self.hf_generation_config = read_hf_generation_config(model_path)
load_backend_plugins(self.hf_config.get("_infinilm_backend_plugins"))
load_backend_plugins(backend_plugins)

if device is None:
device = infinicore.device()
Expand Down
3 changes: 3 additions & 0 deletions python/infinilm/lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
import os
from pathlib import Path

# Register shared pybind11 types used by the InfiniLM extension.
import infinicore # noqa: F401

# Ensure the directory containing this __init__.py is on sys.path
# This allows importing the .so file from the same directory
_lib_dir = Path(__file__).parent
Expand Down
Loading