Skip to content

Commit 338a471

Browse files
authored
Merge pull request #11 from paulapistrila3/main
UD32 OS Release
2 parents 40a0842 + a799bdd commit 338a471

File tree

161 files changed

+13510
-10006
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

161 files changed

+13510
-10006
lines changed

CMakeLists.txt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,13 @@ set(CMAKE_CXX_STANDARD 17 CACHE STRING "C++ standard to conform to")
1414
# set the project name
1515
project(VPUNN)
1616

17+
# Enable /bigobj for debug build type with MSVC - remove after vpunn headers are cleaned up
18+
#if (MSVC AND CMAKE_BUILD_TYPE STREQUAL "Debug")
19+
if (MSVC)
20+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /bigobj")
21+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /bigobj")
22+
endif()
23+
1724
# impacts the blas/blasStatic library , if ON and linux will enable vector instructions by -msse[N]
1825
# on win(MSVC) will define USE_SIMD
1926
# if target architecture does not support it, deactivate it and enable manually the required optimizations (for vector instructions if runtime is important) at least for blas
@@ -26,6 +33,14 @@ option(VPUNN_BUILD_TESTS "build tests" ON)
2633
option(VPUNN_ENABLE_LOGGING "enable logging" OFF)
2734
option(ENABLE_PYTHON_BINDING "Build the python bindings" OFF)
2835
option(GENERATE_PYTHON_BINDING "Generate the python bindings code" OFF)
36+
option(VPUNN_BUILD_HTTP_CLIENT "Build support for cost provider http service" OFF)
37+
if (VPUNN_BUILD_HTTP_CLIENT)
38+
add_compile_definitions(VPUNN_BUILD_HTTP_CLIENT)
39+
message(STATUS "-- Enable HTTP service cost provider ")
40+
set (VPUNN_HTTP_CLIENT_LIB vpunn_http_client)
41+
else()
42+
set (VPUNN_HTTP_CLIENT_LIB "")
43+
endif()
2944

3045
option(VPUNN_OPT_LEGACY_ZTILING "Use legacy ZTiling mechanism" ON)
3146
if(VPUNN_OPT_LEGACY_ZTILING)

README.md

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,4 +257,18 @@ For most updated list of operators and their details see also the unit tests: Te
257257

258258
For information about the profiled operators and extraparameters you can consult this [document](src/shave/Readme.md#shave-current-operators)
259259

260-
260+
## Cost providers
261+
262+
The cost model is designed to be extensible. The cost providers are the classes that implement the cost model for a specific device. The cost providers are selected at runtime based on the device type. The following cost providers are available:
263+
- NN based cost provider - is a learned performance model.
264+
- Theoretical cost provider - is a simple mathematical model.
265+
- "Oracle" cost provider - a LUT of measured performance for specific workloads.
266+
- Profiled cost provider - it's an http service that can be queried to get the measured performance of a specific workload.
267+
- Currently it supports only DPU costs and it can be configured using the following env. variables
268+
- `ENABLE_VPUNN_PROFILING_SERVICE` -- `TRUE` to enable the profiling service
269+
- `VPUNN_PROFILING_SERVICE_BACKEND` -- `silicon` to use the RVP for profiling, `vpuem` to use VPUEM as a cost provider.
270+
- `VPUNN_PROFILING_SERVICE_HOST` -- address of the profiling service host, default is `irlccggpu04.ir.intel.com`
271+
- `VPUNN_PROFILING_SERVICE_PORT` -- port of the profiling service, default is `5000`
272+
273+
To see a list of all queried workloads and which cost provider was used for each, set the environment variable `ENABLE_VPUNN_DATA_SERIALIZATION` to `TRUE`.
274+
This will generate a couple of `csv` files in the directory where vpunn is used.

include/core/cache.h

Lines changed: 171 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -12,33 +12,70 @@
1212

1313
#include <list>
1414
#include <map>
15+
#include <optional>
1516
#include <stdexcept>
1617
#include <vector>
18+
#include <thread>
19+
#include <filesystem>
20+
#include <shared_mutex>
21+
22+
#include <cassert>
1723

18-
#include "core/utils.h"
1924
#include "core/persistent_cache.h"
25+
#include "core/utils.h"
2026

2127
namespace VPUNN {
2228

23-
/**
24-
* @brief a workload cache using LRU (least recent used) replacement policy
25-
* @tparam K is the Key type
26-
* @tparam V is the Value type
27-
*/
2829
template <typename K, typename V>
29-
class LRUCache {
30-
private:
31-
typedef std::list<std::pair<K, V>> List;
32-
typedef typename List::const_iterator List_Iter_cnst;
30+
class FixedCacheAddON {
31+
protected:
32+
// FixedCacheAddON(): FixedCacheAddON("", "") {
33+
// }
34+
FixedCacheAddON(const std::string& filename, const std::string& prio2_loadIfPairedCacheExists)
35+
: deserialized_table{[&]() {
36+
auto env_override = check_if_env_path_override();
37+
if (!env_override.empty()) {
38+
return FixedCache(env_override);
39+
}
40+
return FixedCache(decideCacheFilename(filename, prio2_loadIfPairedCacheExists));
41+
}()} {
42+
}
3343

34-
typedef std::map<K, List_Iter_cnst> Map;
35-
typedef typename Map::const_iterator Map_Iter_cnst;
44+
FixedCacheAddON(const char* file_data, size_t file_data_length)
45+
: deserialized_table{[&]() {
46+
auto env_override = check_if_env_path_override();
47+
if (!env_override.empty()) {
48+
return FixedCache(env_override);
49+
}
50+
return FixedCache(file_data, file_data_length);
51+
}()} {
52+
}
3653

37-
List workloads; ///< list with first being the most recently used key
38-
Map m_table; ///< table for fast searching of keys (contains pointers to list objects)
39-
const size_t max_size;
40-
size_t size{0};
54+
protected:
55+
bool contains(const K& wl) const {
56+
if constexpr (has_hash_v<K>) {
57+
if (deserialized_table.contains(wl.hash()))
58+
return true;
59+
} else {
60+
if (deserialized_table.contains(NNDescriptor<float>(wl).hash()))
61+
return true;
62+
}
63+
return false;
64+
}
65+
66+
std::optional<V> get(const K& wl) const {
67+
// Check if the workload is in the deserialized table
68+
uint32_t wlhash{0};
69+
if constexpr (has_hash_v<K>) {
70+
wlhash = wl.hash();
71+
} else {
72+
wlhash = NNDescriptor<float>(wl).hash();
73+
}
4174

75+
return deserialized_table.get(wlhash);
76+
}
77+
78+
private:
4279
/// loaded from file, must be loaded from a file with the same descriptor signature
4380
/// @note this is a draft implementation
4481
/// This datatype knows it is a float Value and uint32 key. this beats the K, V template
@@ -62,6 +99,41 @@ class LRUCache {
6299
return selected_filename;
63100
}
64101

102+
static std::string check_if_env_path_override() {
103+
auto env_cache_path = get_env_vars({"VPUNN_CACHE_PATH"}).at("VPUNN_CACHE_PATH");
104+
if (!env_cache_path.empty() && std::filesystem::exists(env_cache_path)) {
105+
return env_cache_path;
106+
}
107+
return {};
108+
}
109+
110+
public:
111+
const AccessCounter& getPreloadedCacheCounter() const {
112+
return deserialized_table.getCounter();
113+
}
114+
};
115+
116+
/**
117+
* @brief a workload cache using LRU (least recent used) replacement policy
118+
* @tparam K is the Key type
119+
* @tparam V is the Value type
120+
*/
121+
template <typename K, typename V>
122+
class LRUCache : public FixedCacheAddON<K, V> {
123+
private:
124+
typedef std::list<std::pair<K, V>> List;
125+
typedef typename List::const_iterator List_Iter_cnst;
126+
127+
typedef std::map<K, List_Iter_cnst> Map;
128+
typedef typename Map::const_iterator Map_Iter_cnst;
129+
130+
mutable List workloads; ///< list with first being the most recently used key.
131+
Map m_table; ///< table for fast searching of keys (contains pointers to list objects, as iterators)
132+
133+
const size_t max_size;
134+
135+
mutable std::shared_mutex mtx; ///< Mutex to protect shared resources.
136+
65137
public:
66138
/**
67139
* @brief Construct a new LRUCache object
@@ -70,16 +142,17 @@ class LRUCache {
70142
*/
71143
explicit LRUCache(size_t max_size, const std::string& filename = "",
72144
const std::string& prio2_loadIfPairedCacheExists = "")
73-
: max_size(max_size), deserialized_table{decideCacheFilename(filename, prio2_loadIfPairedCacheExists)} {
145+
: FixedCacheAddON<K, V>(filename, prio2_loadIfPairedCacheExists), max_size(max_size) {
74146
}
75147

76148
// const char* model_data, size_t model_data_length, bool copy_model_data
77-
explicit LRUCache(size_t max_size, const char* file_data = nullptr, size_t file_data_length = 0)
78-
: max_size(max_size), deserialized_table{file_data, file_data_length} {
149+
explicit LRUCache(size_t max_size, const char* file_data, size_t file_data_length)
150+
: FixedCacheAddON<K, V>(file_data, file_data_length), max_size(max_size) {
79151
}
80152

81153
/**
82-
* @brief Add a new workload descriptor to the cache
154+
* @brief Add a new workload descriptor to the cache. If the key exists is does NOT replace the old value with the
155+
* new one
83156
*
84157
* @param wl the workload descriptor (key)
85158
* @param value the workload value
@@ -89,26 +162,62 @@ class LRUCache {
89162
if (max_size == 0)
90163
return;
91164

165+
std::unique_lock<std::shared_mutex> lock(mtx); // Exclusive lock for write
166+
92167
// Check if the workload is already in the deserialized table
168+
if (FixedCacheAddON<K, V>::contains(wl))
169+
return;
170+
171+
const Map_Iter_cnst& map_it{m_table.find(wl)};
172+
if (map_it == m_table.cend()) {
173+
// Insert items in the list and map
174+
workloads.push_front({wl, value}); // adds a new element
175+
m_table.insert({wl, workloads.cbegin()}); // would not add a new element if wl is already inside
176+
177+
clean_up_excess_elements(); // if size is exceeded
178+
} else {
179+
// wl already in table, keep old value, move to first position
180+
mark_as_most_recently_used(map_it);
181+
}
182+
183+
if (!check_consistency()) {
184+
throw std::runtime_error("Cache consistency check failed after adding workload");
185+
}
186+
}
187+
188+
public:
189+
/**
190+
* @brief Get a value from the cache.
191+
*
192+
* @param wl the workload(key) descriptor
193+
* @return std::optional<V> the value stored in the cache, or nothing if not available
194+
*/
195+
std::optional<V> get(const K& wl) const {
196+
// Check if the workload is in the deserialized table
93197
{
94-
if constexpr (has_hash_v<K>) {
95-
if (deserialized_table.contains(wl.hash()))
96-
return;
97-
} else {
98-
if (deserialized_table.contains(NNDescriptor<float>(wl).hash()))
99-
return;
198+
const std::optional<V> found{FixedCacheAddON<K, V>::get(wl)};
199+
if (found) {
200+
return found;
100201
}
101202
}
102203

103-
// Insert items in the list and map
104-
workloads.push_front({wl, value});
105-
m_table.insert({wl, workloads.cbegin()});
106-
size++;
204+
// First, try to find the key with a shared lock
205+
{
206+
std::shared_lock<std::shared_mutex> lock(mtx);
207+
auto map_it = m_table.find(wl);
208+
if (map_it == m_table.cend()) {
209+
return std::nullopt;
210+
}
211+
}
107212

108-
// delete the oldest ones that occupy more space than allowed
109-
while (size > max_size) {
110-
const auto& last_item{workloads.back()};
111-
remove(last_item.first); // key is first in pair
213+
// If found, acquire a unique lock and do the mutation
214+
std::unique_lock<std::shared_mutex> lock(mtx);
215+
auto map_it = m_table.find(wl);
216+
if (map_it != m_table.cend()) {
217+
mark_as_most_recently_used(map_it);
218+
return (map_it->second->second);
219+
} else {
220+
return std::nullopt;
112221
}
113222
}
114223

@@ -119,57 +228,49 @@ class LRUCache {
119228
* @param wl the workload descriptor
120229
*/
121230
void remove(const K& wl) {
122-
Map_Iter_cnst it = m_table.find(wl);
231+
const Map_Iter_cnst& it{m_table.find(wl)};
123232

124233
if (it != m_table.cend()) {
125234
m_table.erase(it->first); // key is first
126235
} else {
127-
throw std::out_of_range("VPUNN Cache out of range");
236+
throw std::out_of_range("VPUNN Cache out of range, an element was not in table");
128237
}
129238

130239
workloads.pop_back(); // Remove the last element from the list
131-
size--; // Update the size
132-
}
133-
134-
public:
135-
/**
136-
* @brief Get a workload from the cache.
137-
*
138-
* @param wl the workload descriptor
139-
* @return T* a pointer to the workload value stored in the cache, or nullptr if not available
140-
*/
141-
const V* get(const K& wl) {
142-
// Check if the workload is in the deserialized table
143-
{
144-
uint32_t wlhash{0};
145-
if constexpr (has_hash_v<K>) {
146-
wlhash = wl.hash();
147-
} else {
148-
wlhash = NNDescriptor<float>(wl).hash();
149-
}
150240

151-
const V* elementInPreloadedCache{deserialized_table.get_pointer(wlhash)};
152-
if (elementInPreloadedCache) {
153-
return elementInPreloadedCache; // ret the pointer to the element in the preloaded cache
154-
}
241+
if (!check_consistency()) {
242+
throw std::runtime_error("Cache consistency check failed after removing workload");
155243
}
244+
}
156245

157-
// Check if the workload is in the main table
158-
Map_Iter_cnst it = m_table.find(wl);
159-
if (it != m_table.cend()) {
160-
// Move the workload to the beginning of the list
161-
workloads.splice(workloads.cbegin(), workloads, it->second);
162-
return &(it->second->second); // second is the list iterator
163-
} else {
164-
return nullptr;
246+
/// deletes what exceeds the size
247+
void clean_up_excess_elements() {
248+
// delete the oldest ones that occupy more space than allowed
249+
while (m_table.size() > max_size) {
250+
const auto& oldest_item{workloads.back()}; // last
251+
remove(oldest_item.first); // key is first in pair
252+
}
253+
}
254+
255+
void mark_as_most_recently_used(const Map_Iter_cnst& map_it) const {
256+
if (map_it != m_table.cend()) { // Move the workload to the beginning of the list
257+
const List_Iter_cnst& list_it = map_it->second; // second is the list iterator
258+
workloads.splice(workloads.cbegin(), workloads, list_it);
165259
}
166260
}
167261

168-
const AccessCounter& getPreloadedCacheCounter() const {
169-
return deserialized_table.getCounter();
262+
protected:
263+
264+
/// @brief Check if the cache is consistent, i.e., the number of workloads matches the size of the table
265+
bool check_consistency() const {
266+
if (workloads.size() != m_table.size()) {
267+
return false;
268+
}
269+
270+
return true;
170271
}
171272
};
172273

173274
} // namespace VPUNN
174275

175-
#endif // VPUNN_CACHE
276+
#endif // VPUNN_CACHE

0 commit comments

Comments
 (0)