Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Xmake cache
.xmake/
build/
python/infinilm/lib/*.so

# MacOS Cache
.DS_Store
Expand All @@ -10,12 +11,13 @@ build/

# Python
__pycache__/
*.egg-info/

# Log
*.log

# Cache
cache/
.cache/

# JSON
*.json
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "third_party/spdlog"]
path = third_party/spdlog
url = https://github.com/gabime/spdlog.git
116 changes: 116 additions & 0 deletions csrc/cache/kv_cache.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#pragma once

#include "infinicore/tensor.hpp"
#include "infinicore/device.hpp"
#include <algorithm>
#include <utility>
#include <memory>

namespace infinilm::cache {

/**
* @brief Simple KV cache structure for incremental decoding
*
* Stores key and value caches with shape [n_kv_head, capacity, head_dim]
* Similar to DynamicLayer in Python cache_utils.py
*
* This is a common component that can be used by any model architecture
* that needs KV caching for attention mechanisms.
*/
struct KVCache {
infinicore::Tensor k_cache; // [n_kv_head, capacity, head_dim]
infinicore::Tensor v_cache; // [n_kv_head, capacity, head_dim]
size_t cache_position; // Current position in cache
size_t max_capacity; // Maximum capacity of cache
bool initialized; // Whether cache has been initialized

KVCache()
: cache_position(0), max_capacity(0), initialized(false),
// Create empty placeholder tensors (will be replaced on first use)
k_cache(infinicore::Tensor::empty({1, 1, 1}, infinicore::DataType::F32,
infinicore::Device(infinicore::Device::Type::CPU, 0))),
v_cache(infinicore::Tensor::empty({1, 1, 1}, infinicore::DataType::F32,
infinicore::Device(infinicore::Device::Type::CPU, 0))) {}

/**
* @brief Initialize or update cache capacity
* @param num_kv_heads Number of key-value heads
* @param head_dim Head dimension
* @param seq_len Sequence length of new tokens
* @param dtype Data type
* @param device Device
*/
void ensure_capacity(size_t num_kv_heads, size_t head_dim, size_t seq_len,
infinicore::DataType dtype, const infinicore::Device &device) {
size_t required_capacity = cache_position + seq_len;

// Lazy initialization
if (!initialized) {
max_capacity = std::max(required_capacity, size_t(4096)); // Start with at least 4096
k_cache = infinicore::Tensor::empty({num_kv_heads, max_capacity, head_dim},
dtype, device);
v_cache = infinicore::Tensor::empty({num_kv_heads, max_capacity, head_dim},
dtype, device);
cache_position = 0;
initialized = true;
}
// Grow cache if needed (similar to DynamicLayer in Python)
else if (required_capacity > max_capacity) {
size_t new_capacity = std::max(max_capacity * 2, required_capacity);
auto k_new = infinicore::Tensor::empty({num_kv_heads, new_capacity, head_dim},
dtype, device);
auto v_new = infinicore::Tensor::empty({num_kv_heads, new_capacity, head_dim},
dtype, device);

// Copy existing cache data
if (cache_position > 0) {
auto k_slice = k_cache->narrow({{1, 0, cache_position}});
auto v_slice = v_cache->narrow({{1, 0, cache_position}});
k_new->narrow({{1, 0, cache_position}})->copy_from(k_slice);
v_new->narrow({{1, 0, cache_position}})->copy_from(v_slice);
}

k_cache = k_new;
v_cache = v_new;
max_capacity = new_capacity;
}
}

/**
* @brief Update cache with new key and value states
* @param k_new New key states [n_kv_head, seq_len, head_dim]
* @param v_new New value states [n_kv_head, seq_len, head_dim]
* @return Tuple of (k_total, v_total) with shape [n_kv_head, total_seq_len, head_dim]
*
* Note: This method writes to the cache. If using with attention op, the attention op
* also writes to the cache, so this should be called AFTER attention, not before.
*/
std::pair<infinicore::Tensor, infinicore::Tensor> update(
const infinicore::Tensor &k_new,
const infinicore::Tensor &v_new) {
size_t seq_len = k_new->shape()[1];
size_t num_kv_heads = k_new->shape()[0];
size_t head_dim = k_new->shape()[2];

// Ensure capacity
ensure_capacity(num_kv_heads, head_dim, seq_len,
k_new->dtype(), k_new->device());

// Copy new k/v into cache at current position
auto k_dst = k_cache->narrow({{1, cache_position, seq_len}});
auto v_dst = v_cache->narrow({{1, cache_position, seq_len}});
k_dst->copy_from(k_new);
v_dst->copy_from(v_new);

// Update position
cache_position += seq_len;

// Return the total cache up to current position
auto k_total = k_cache->narrow({{1, 0, cache_position}});
auto v_total = v_cache->narrow({{1, 0, cache_position}});

return std::make_pair(k_total->contiguous(), v_total->contiguous());
}
};

} // namespace infinilm::models::common
44 changes: 44 additions & 0 deletions csrc/models/debug_utils/hooks.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#include "hooks.hpp"
#include <spdlog/spdlog.h>

namespace infinilm::models::debug_utils {

void HookRegistry::register_hook(const std::string &name, HookCallback callback) {
hooks_[name] = callback;
SPDLOG_DEBUG("HookRegistry: Registered hook '{}'", name);
}

void HookRegistry::call_hook(const std::string &name, const infinicore::Tensor &tensor, int layer_idx) const {
// Try exact match first
auto it = hooks_.find(name);
if (it != hooks_.end()) {
try {
it->second(name, tensor, layer_idx);
} catch (const std::exception &e) {
SPDLOG_ERROR("HookRegistry: Error calling hook '{}': {}", name, e.what());
}
return;
}

// Try pattern matching (e.g., "layer0_*" matches "layer0_q_after_proj")
for (const auto &[pattern, callback] : hooks_) {
if (pattern.back() == '*' && name.size() >= pattern.size() - 1) {
std::string prefix = pattern.substr(0, pattern.size() - 1);
if (name.substr(0, prefix.size()) == prefix) {
try {
callback(name, tensor, layer_idx);
} catch (const std::exception &e) {
SPDLOG_ERROR("HookRegistry: Error calling hook pattern '{}' for '{}': {}", pattern, name, e.what());
}
return;
}
}
}
}

void HookRegistry::clear() {
hooks_.clear();
SPDLOG_DEBUG("HookRegistry: Cleared all hooks");
}

} // namespace infinilm::models::debug_utils
186 changes: 186 additions & 0 deletions csrc/models/debug_utils/hooks.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
#pragma once
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个东西可以作为nn::module的通用基建放在infinicore里


#include "infinicore/tensor.hpp"
#include <functional>
#include <string>
#include <memory>
#include <unordered_map>

namespace infinilm::models::debug_utils {

// TODO: move to InfiniCore as common utils in future work

/**
* @brief Hook callback type for capturing intermediate values (DEBUG ONLY)
*
* Hook functions are called with:
* - name: Identifier for the intermediate value (e.g., "layer0_q_after_proj")
* - tensor: The intermediate tensor value
* - layer_idx: Layer index (for layer-specific hooks, -1 if not applicable)
*
* NOTE: This is a debug utility. Do not use in production code.
*/
using HookCallback = std::function<void(const std::string &name, const infinicore::Tensor &tensor, int layer_idx)>;

/**
* @brief Hook registry for managing hooks (DEBUG ONLY)
*
* NOTE: This is a debug utility for capturing intermediate tensor values
* during model execution. Do not use in production code.
*/
class HookRegistry {
public:
/**
* @brief Register a hook callback
*
* @param name Hook name (can be pattern like "layer0_*" or specific name)
* @param callback Hook callback function
*/
void register_hook(const std::string &name, HookCallback callback);

/**
* @brief Call hook if registered
*
* @param name Full hook name
* @param tensor Tensor to pass to hook
* @param layer_idx Layer index (-1 if not applicable)
*/
void call_hook(const std::string &name, const infinicore::Tensor &tensor, int layer_idx = -1) const;

/**
* @brief Clear all hooks
*/
void clear();

/**
* @brief Check if any hooks are registered
*/
bool has_hooks() const { return !hooks_.empty(); }

private:
std::unordered_map<std::string, HookCallback> hooks_;
};

/**
* @brief Macro to simplify hook registration (DEBUG ONLY)
*
* Usage: REGISTER_HOOK(registry, "hook_name", callback)
*/
#define REGISTER_HOOK(registry, name, callback) \
(registry)->register_hook(name, callback)

/**
* @brief Macro to simplify hook calls with automatic null and has_hooks checks (DEBUG ONLY)
*
* Usage: CALL_HOOK(registry, "hook_name", tensor)
* Note: layer_idx defaults to -1
*/
#define CALL_HOOK(registry, name, tensor) \
do { \
if ((registry) && (registry)->has_hooks()) { \
(registry)->call_hook(name, tensor, -1); \
} \
} while (0)

/**
* @brief Macro to simplify hook calls with explicit layer index (DEBUG ONLY)
*
* Usage: CALL_HOOK_LAYER(registry, "hook_name", tensor, layer_idx)
*/
#define CALL_HOOK_LAYER(registry, name, tensor, layer_idx) \
do { \
if ((registry) && (registry)->has_hooks()) { \
(registry)->call_hook(name, tensor, layer_idx); \
} \
} while (0)

/**
* @brief Macros to simplify hook_registry and hook_prefix management in model classes
*/

// Declare hook_registry and hook_prefix member variables
#define HOOK_REGISTRY_MEMBER() \
std::shared_ptr<debug_utils::HookRegistry> hook_registry_; \
std::string hook_prefix_;

// Set hook_registry and hook_prefix (no forwarding to submodules)
#define SET_HOOK_REGISTRY_SIMPLE() \
void set_hook_registry(const std::shared_ptr<debug_utils::HookRegistry> &hook_registry, const std::string &hook_prefix = "") { \
hook_registry_ = hook_registry; \
hook_prefix_ = hook_prefix; \
}

// Helper macro to build incremental hook prefix
#define BUILD_HOOK_PREFIX(prefix, name) \
(prefix.empty() ? std::string(name) : prefix + "_" + std::string(name))

// Set hook_registry and hook_prefix and forward to one or more submodules
// Usage: SET_HOOK_REGISTRY(submodule1) or SET_HOOK_REGISTRY(submodule1, submodule2)
// The hook_prefix will be incremented for each submodule (e.g., "layer0" -> "layer0_attention")
// Note: Currently supports up to 2 submodules. For more, extend the pattern below.
#define SET_HOOK_REGISTRY(...) \
SET_HOOK_REGISTRY_IMPL(__VA_ARGS__)

// Helper to handle variable number of arguments using a reliable pattern
#define SET_HOOK_REGISTRY_IMPL(...) \
SET_HOOK_REGISTRY_GET_NTH(__VA_ARGS__, SET_HOOK_REGISTRY_2, SET_HOOK_REGISTRY_1, SET_HOOK_REGISTRY_0,)(__VA_ARGS__)

// Get the selector based on argument count
// Pattern: when we have N args, the (N+1)th parameter from the end is the selector
// For 0 args: _1=SET_HOOK_REGISTRY_2, _2=SET_HOOK_REGISTRY_1, _3=SET_HOOK_REGISTRY_0, N=(empty) → need to use _3
// For 1 arg: _1=arg, _2=SET_HOOK_REGISTRY_2, _3=SET_HOOK_REGISTRY_1, N=SET_HOOK_REGISTRY_0 → wrong, need _3
// For 2 args: _1=arg1, _2=arg2, _3=SET_HOOK_REGISTRY_2, N=SET_HOOK_REGISTRY_1 → wrong, need _3

// Use _3 as the selector (it's in the right position for all cases)
#define SET_HOOK_REGISTRY_GET_NTH(_1, _2, _3, N, ...) _3

// Implementation for 0 args (shouldn't be used, but handle gracefully)
#define SET_HOOK_REGISTRY_0() \
void set_hook_registry(const std::shared_ptr<debug_utils::HookRegistry> &hook_registry, const std::string &hook_prefix = "") { \
hook_registry_ = hook_registry; \
hook_prefix_ = hook_prefix; \
}

// Implementation for 1 arg
#define SET_HOOK_REGISTRY_1(submodule) \
void set_hook_registry(const std::shared_ptr<debug_utils::HookRegistry> &hook_registry, const std::string &hook_prefix = "") { \
hook_registry_ = hook_registry; \
hook_prefix_ = hook_prefix; \
if (submodule##_) { \
std::string submodule_prefix = BUILD_HOOK_PREFIX(hook_prefix, #submodule); \
submodule##_->set_hook_registry(hook_registry, submodule_prefix); \
} \
}

// Implementation for 2 args
#define SET_HOOK_REGISTRY_2(submodule1, submodule2) \
void set_hook_registry(const std::shared_ptr<debug_utils::HookRegistry> &hook_registry, const std::string &hook_prefix = "") { \
hook_registry_ = hook_registry; \
hook_prefix_ = hook_prefix; \
if (submodule1##_) { \
std::string submodule1_prefix = BUILD_HOOK_PREFIX(hook_prefix, #submodule1); \
submodule1##_->set_hook_registry(hook_registry, submodule1_prefix); \
} \
if (submodule2##_) { \
std::string submodule2_prefix = BUILD_HOOK_PREFIX(hook_prefix, #submodule2); \
submodule2##_->set_hook_registry(hook_registry, submodule2_prefix); \
} \
}

// Set hook_registry and hook_prefix for a vector of submodules
// For vectors, the prefix is incremented with an index (e.g., "layer0", "layer1", ...)
// If parent has a prefix, it becomes "parent_layer0", "parent_layer1", etc.
#define SET_HOOK_REGISTRY_VEC(vec_name) \
void set_hook_registry(const std::shared_ptr<debug_utils::HookRegistry> &hook_registry, const std::string &hook_prefix = "") { \
hook_registry_ = hook_registry; \
hook_prefix_ = hook_prefix; \
for (size_t i = 0; i < vec_name##_.size(); ++i) { \
if (vec_name##_[i]) { \
std::string layer_name = "layer" + std::to_string(i); \
std::string item_prefix = BUILD_HOOK_PREFIX(hook_prefix, layer_name); \
vec_name##_[i]->set_hook_registry(hook_registry, item_prefix); \
} \
} \
}

} // namespace infinilm::models::debug_utils
Loading