intel
diff --git a/‎CMakeLists.txt‎
Lines changed: 15 additions & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 15 additions & 1 deletion b/‎README.md‎
Lines changed: 15 additions & 1 deletion
diff --git a/‎include/core/cache.h‎
Lines changed: 171 additions & 70 deletions b/‎include/core/cache.h‎
Lines changed: 171 additions & 70 deletions
@@ -14,6 +14,13 @@ set(CMAKE_CXX_STANDARD 17 CACHE STRING "C++ standard to conform to")
 # set the project name
 project(VPUNN)
 
+# Enable /bigobj for debug build type with MSVC - remove after vpunn headers are cleaned up
+#if (MSVC AND CMAKE_BUILD_TYPE STREQUAL "Debug")
+if (MSVC)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /bigobj")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /bigobj")
+endif()
+
 # impacts the blas/blasStatic library , if ON and linux will enable vector instructions by -msse[N]
 # on win(MSVC) will define USE_SIMD
 # if target architecture does not support it, deactivate it and enable manually the required optimizations (for vector instructions if runtime is important) at least for blas
@@ -26,6 +33,14 @@ option(VPUNN_BUILD_TESTS "build tests" ON)
 option(VPUNN_ENABLE_LOGGING "enable logging" OFF)
 option(ENABLE_PYTHON_BINDING "Build the python bindings" OFF)
 option(GENERATE_PYTHON_BINDING "Generate the python bindings code" OFF)
+option(VPUNN_BUILD_HTTP_CLIENT "Build support for cost provider http service" OFF)
+if (VPUNN_BUILD_HTTP_CLIENT)
+    add_compile_definitions(VPUNN_BUILD_HTTP_CLIENT)
+    message(STATUS "-- Enable HTTP service cost provider ")
+    set (VPUNN_HTTP_CLIENT_LIB vpunn_http_client)
+else()
+    set (VPUNN_HTTP_CLIENT_LIB "")
+endif()
 
 option(VPUNN_OPT_LEGACY_ZTILING "Use legacy ZTiling mechanism" ON)
 if(VPUNN_OPT_LEGACY_ZTILING)
 
@@ -257,4 +257,18 @@ For most updated list of operators and their details see also the unit tests: Te
 
 For information about the profiled operators and extraparameters you can consult this [document](src/shave/Readme.md#shave-current-operators)
 
-
+## Cost providers
+
+The cost model is designed to be extensible. The cost providers are the classes that implement the cost model for a specific device. The cost providers are selected at runtime based on the device type. The following cost providers are available:
+- NN based cost provider - is a learned performance model.
+- Theoretical cost provider - is a simple mathematical model.
+- "Oracle" cost provider - a LUT of measured performance for specific workloads.
+- Profiled cost provider - it's an http service that can be queried to get the measured performance of a specific workload.
+    - Currently it supports only DPU costs and it can be configured using the following env. variables
+        - `ENABLE_VPUNN_PROFILING_SERVICE` -- `TRUE` to enable the profiling service
+        - `VPUNN_PROFILING_SERVICE_BACKEND` -- `silicon` to use the RVP for profiling, `vpuem` to use VPUEM as a cost provider.
+        - `VPUNN_PROFILING_SERVICE_HOST` -- address of the profiling service host, default is `irlccggpu04.ir.intel.com`
+        - `VPUNN_PROFILING_SERVICE_PORT` -- port of the profiling service, default is `5000`
+
+To see a list of all queried workloads and which cost provider was used for each, set the environment variable `ENABLE_VPUNN_DATA_SERIALIZATION` to `TRUE`.
+This will generate a couple of `csv` files in the directory where vpunn is used.
@@ -12,33 +12,70 @@
 
 #include <list>
 #include <map>
+#include <optional>
 #include <stdexcept>
 #include <vector>
+#include <thread>
+#include <filesystem>
+#include <shared_mutex>
+
+#include <cassert>
 
-#include "core/utils.h"
 #include "core/persistent_cache.h"
+#include "core/utils.h"
 
 namespace VPUNN {
 
-/**
- * @brief a workload cache using LRU (least recent used) replacement policy
- * @tparam K is the Key type
- * @tparam V is the Value type
- */
 template <typename K, typename V>
-class LRUCache {
-private:
-    typedef std::list<std::pair<K, V>> List;
-    typedef typename List::const_iterator List_Iter_cnst;
+class FixedCacheAddON {
+protected:
+    // FixedCacheAddON(): FixedCacheAddON("", "") {
+    // }
+    FixedCacheAddON(const std::string& filename, const std::string& prio2_loadIfPairedCacheExists)
+            : deserialized_table{[&]() {
+                  auto env_override = check_if_env_path_override();
+                  if (!env_override.empty()) {
+                      return FixedCache(env_override);
+                  }
+                  return FixedCache(decideCacheFilename(filename, prio2_loadIfPairedCacheExists));
+              }()} {
+    }
 
-    typedef std::map<K, List_Iter_cnst> Map;
-    typedef typename Map::const_iterator Map_Iter_cnst;
+    FixedCacheAddON(const char* file_data, size_t file_data_length)
+            : deserialized_table{[&]() {
+                  auto env_override = check_if_env_path_override();
+                  if (!env_override.empty()) {
+                      return FixedCache(env_override);
+                  }
+                  return FixedCache(file_data, file_data_length);
+              }()} {
+    }
 
-    List workloads;  ///< list with first being the most recently used key
-    Map m_table;     ///< table for fast searching of keys  (contains pointers to list objects)
-    const size_t max_size;
-    size_t size{0};
+protected:
+    bool contains(const K& wl) const {
+        if constexpr (has_hash_v<K>) {
+            if (deserialized_table.contains(wl.hash()))
+                return true;
+        } else {
+            if (deserialized_table.contains(NNDescriptor<float>(wl).hash()))
+                return true;
+        }
+        return false;
+    }
+
+    std::optional<V> get(const K& wl) const {
+        // Check if the workload is in the deserialized table
+        uint32_t wlhash{0};
+        if constexpr (has_hash_v<K>) {
+            wlhash = wl.hash();
+        } else {
+            wlhash = NNDescriptor<float>(wl).hash();
+        }
 
+        return deserialized_table.get(wlhash);
+    }
+
+private:
     /// loaded from file, must be loaded from a file with the same descriptor signature
     /// @note this is a draft implementation
     /// This datatype knows it is a float Value and uint32 key. this beats the K, V template
@@ -62,6 +99,41 @@ class LRUCache {
         return selected_filename;
     }
 
+    static std::string check_if_env_path_override() {
+        auto env_cache_path = get_env_vars({"VPUNN_CACHE_PATH"}).at("VPUNN_CACHE_PATH");
+        if (!env_cache_path.empty() && std::filesystem::exists(env_cache_path)) {
+            return env_cache_path;
+        }
+        return {};
+    }
+
+public:
+    const AccessCounter& getPreloadedCacheCounter() const {
+        return deserialized_table.getCounter();
+    }
+};
+
+/**
+ * @brief a workload cache using LRU (least recent used) replacement policy
+ * @tparam K is the Key type
+ * @tparam V is the Value type
+ */
+template <typename K, typename V>
+class LRUCache : public FixedCacheAddON<K, V> {
+private:
+    typedef std::list<std::pair<K, V>> List;
+    typedef typename List::const_iterator List_Iter_cnst;
+
+    typedef std::map<K, List_Iter_cnst> Map;
+    typedef typename Map::const_iterator Map_Iter_cnst;
+
+    mutable List workloads;  ///< list with first being the most recently used key.
+    Map m_table;             ///< table for fast searching of keys  (contains pointers to list objects, as iterators)
+
+    const size_t max_size;
+
+    mutable std::shared_mutex mtx;  ///< Mutex to protect shared resources.
+
 public:
     /**
      * @brief Construct a new LRUCache object
@@ -70,16 +142,17 @@ class LRUCache {
      */
     explicit LRUCache(size_t max_size, const std::string& filename = "",
                       const std::string& prio2_loadIfPairedCacheExists = "")
-            : max_size(max_size), deserialized_table{decideCacheFilename(filename, prio2_loadIfPairedCacheExists)} {
+            : FixedCacheAddON<K, V>(filename, prio2_loadIfPairedCacheExists), max_size(max_size) {
     }
 
     // const char* model_data, size_t model_data_length, bool copy_model_data
-    explicit LRUCache(size_t max_size, const char* file_data = nullptr, size_t file_data_length = 0)
-            : max_size(max_size), deserialized_table{file_data, file_data_length} {
+    explicit LRUCache(size_t max_size, const char* file_data, size_t file_data_length)
+            : FixedCacheAddON<K, V>(file_data, file_data_length), max_size(max_size) {
     }
 
     /**
-     * @brief Add a new workload descriptor to the cache
+     * @brief Add a new workload descriptor to the cache. If the key exists is does NOT replace the old value with the
+     * new one
      *
      * @param wl the workload descriptor (key)
      * @param value the workload value
@@ -89,26 +162,62 @@ class LRUCache {
         if (max_size == 0)
             return;
 
+        std::unique_lock<std::shared_mutex> lock(mtx);  // Exclusive lock for write
+
         // Check if the workload is already in the deserialized table
+        if (FixedCacheAddON<K, V>::contains(wl))
+            return;
+
+        const Map_Iter_cnst& map_it{m_table.find(wl)};
+        if (map_it == m_table.cend()) {
+            // Insert items in the list and map
+            workloads.push_front({wl, value});         // adds a new element
+            m_table.insert({wl, workloads.cbegin()});  // would not add a new element if wl is already inside
+
+            clean_up_excess_elements();  // if size is exceeded
+        } else {
+            // wl already in table, keep old value, move to first position
+            mark_as_most_recently_used(map_it);
+        }
+
+        if (!check_consistency()) {
+            throw std::runtime_error("Cache consistency check failed after adding workload");
+        }
+    }
+
+public:
+    /**
+     * @brief Get a value from the cache.
+     *
+     * @param wl the workload(key) descriptor
+     * @return std::optional<V> the value stored in the cache, or nothing if not available
+     */
+    std::optional<V> get(const K& wl) const {
+        // Check if the workload is in the deserialized table
         {
-            if constexpr (has_hash_v<K>) {
-                if (deserialized_table.contains(wl.hash()))
-                    return;
-            } else {
-                if (deserialized_table.contains(NNDescriptor<float>(wl).hash()))
-                    return;
+            const std::optional<V> found{FixedCacheAddON<K, V>::get(wl)};
+            if (found) {
+                return found;
             }
         }
 
-        // Insert items in the list and map
-        workloads.push_front({wl, value});
-        m_table.insert({wl, workloads.cbegin()});
-        size++;
+        // First, try to find the key with a shared lock
+        {
+            std::shared_lock<std::shared_mutex> lock(mtx);
+            auto map_it = m_table.find(wl);
+            if (map_it == m_table.cend()) {
+                return std::nullopt;
+            }
+        }
 
-        // delete the oldest ones that occupy more space than allowed
-        while (size > max_size) {
-            const auto& last_item{workloads.back()};
-            remove(last_item.first);  // key is first in pair
+        // If found, acquire a unique lock and do the mutation
+        std::unique_lock<std::shared_mutex> lock(mtx);
+        auto map_it = m_table.find(wl);
+        if (map_it != m_table.cend()) {
+            mark_as_most_recently_used(map_it);
+            return (map_it->second->second);
+        } else {
+            return std::nullopt;
         }
     }
 
@@ -119,57 +228,49 @@ class LRUCache {
      * @param wl the workload descriptor
      */
     void remove(const K& wl) {
-        Map_Iter_cnst it = m_table.find(wl);
+        const Map_Iter_cnst& it{m_table.find(wl)};
 
         if (it != m_table.cend()) {
             m_table.erase(it->first);  // key is first
         } else {
-            throw std::out_of_range("VPUNN Cache out of range");
+            throw std::out_of_range("VPUNN Cache out of range, an element was not in table");
         }
 
         workloads.pop_back();  // Remove the last element from the list
-        size--;                // Update the size
-    }
-
-public:
-    /**
-     * @brief Get a workload from the cache.
-     *
-     * @param wl the workload descriptor
-     * @return T* a pointer to the workload value stored in the cache, or nullptr if not available
-     */
-    const V* get(const K& wl) {
-        // Check if the workload is in the deserialized table
-        {
-            uint32_t wlhash{0};
-            if constexpr (has_hash_v<K>) {
-                wlhash = wl.hash();
-            } else {
-                wlhash = NNDescriptor<float>(wl).hash();
-            }
 
-            const V* elementInPreloadedCache{deserialized_table.get_pointer(wlhash)};
-            if (elementInPreloadedCache) {
-                return elementInPreloadedCache;  // ret the pointer to the element in the preloaded cache
-            }
+        if (!check_consistency()) {
+            throw std::runtime_error("Cache consistency check failed after removing workload");
         }
+    }
 
-        // Check if the workload is in the main table
-        Map_Iter_cnst it = m_table.find(wl);
-        if (it != m_table.cend()) {
-            // Move the workload to the beginning of the list
-            workloads.splice(workloads.cbegin(), workloads, it->second);
-            return &(it->second->second);  // second is the list iterator
-        } else {
-            return nullptr;
+    /// deletes what exceeds the size
+    void clean_up_excess_elements() {
+        // delete the oldest ones that occupy more space than allowed
+        while (m_table.size() > max_size) {
+            const auto& oldest_item{workloads.back()};  // last
+            remove(oldest_item.first);                  // key is first in pair
+        }
+    }
+    
+    void mark_as_most_recently_used(const Map_Iter_cnst& map_it) const {
+        if (map_it != m_table.cend()) {                      // Move the workload to the beginning of the list
+            const List_Iter_cnst& list_it = map_it->second;  // second is the list iterator
+            workloads.splice(workloads.cbegin(), workloads, list_it);
         }
     }
 
-    const AccessCounter& getPreloadedCacheCounter() const {
-        return deserialized_table.getCounter();
+protected:
+
+    /// @brief Check if the cache is consistent, i.e., the number of workloads matches the size of the table
+    bool check_consistency() const {
+        if (workloads.size() != m_table.size()) {
+            return false;
+        }
+
+        return true;
     }
 };
 
 }  // namespace VPUNN
 
-#endif  // VPUNN_CACHE
+#endif  // VPUNN_CACHE