Skip to content

Commit f870ed1

Browse files
committed
implement event-based retry strategy
1 parent fd0562a commit f870ed1

File tree

9 files changed

+339
-37
lines changed

9 files changed

+339
-37
lines changed

orchagent/cbf/cbfnhgorch.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ void CbfNhgOrch::doTask(Consumer& consumer)
177177
if (success)
178178
{
179179
m_syncdNextHopGroups.erase(cbf_nhg_it);
180+
notifyRetry(gRouteOrch, APP_ROUTE_TABLE_NAME, make_constraint(RETRY_CST_ECMP));
180181
}
181182
}
182183
}

orchagent/nhgbase.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,13 @@ class NhgOrchCommon : public Orch
440440
--nhg_entry.ref_count;
441441
}
442442

443+
unsigned getNhgRefCount(const string& index)
444+
{
445+
SWSS_LOG_ENTER();
446+
auto& nhg_entry = m_syncdNextHopGroups.at(index);
447+
return nhg_entry.ref_count;
448+
}
449+
443450
/* Getters / Setters. */
444451
static inline unsigned getSyncedNhgCount() { return NhgBase::getSyncedCount(); }
445452

orchagent/nhgorch.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ extern sai_next_hop_api_t* sai_next_hop_api;
2323
NhgOrch::NhgOrch(DBConnector *db, string tableName) : NhgOrchCommon(db, tableName)
2424
{
2525
SWSS_LOG_ENTER();
26+
createRetryCache(tableName);
2627
}
2728

2829
/*
@@ -266,6 +267,7 @@ void NhgOrch::doTask(Consumer& consumer)
266267
if (nhg->sync())
267268
{
268269
m_syncdNextHopGroups.emplace(index, NhgEntry<NextHopGroup>(std::move(nhg)));
270+
notifyRetry(gRouteOrch, APP_ROUTE_TABLE_NAME, make_constraint(RETRY_CST_NHG, index));
269271
}
270272
else
271273
{
@@ -300,6 +302,7 @@ void NhgOrch::doTask(Consumer& consumer)
300302
success = false;
301303
}
302304
m_syncdNextHopGroups.emplace(index, NhgEntry<NextHopGroup>(std::move(nhg)));
305+
notifyRetry(gRouteOrch, APP_ROUTE_TABLE_NAME, make_constraint(RETRY_CST_NHG, index));
303306
}
304307
}
305308
}
@@ -413,7 +416,9 @@ void NhgOrch::doTask(Consumer& consumer)
413416
/* If the group does exist, but it's still referenced, skip. */
414417
else if (nhg_it->second.ref_count > 0)
415418
{
416-
SWSS_LOG_INFO("Unable to remove group %s which is referenced", index.c_str());
419+
SWSS_LOG_INFO("Unable to remove group %s which is referenced, move task entry to RetryCache", index.c_str());
420+
consumer.addToRetry(std::move(it->second), make_constraint(RETRY_CST_NHG_REF, index));
421+
success = true;
417422
}
418423
/* Else, if the group is no more referenced, remove it. */
419424
else
@@ -425,6 +430,7 @@ void NhgOrch::doTask(Consumer& consumer)
425430
if (success)
426431
{
427432
m_syncdNextHopGroups.erase(nhg_it);
433+
notifyRetry(gRouteOrch, APP_ROUTE_TABLE_NAME, make_constraint(RETRY_CST_ECMP));
428434
}
429435
}
430436
}

orchagent/orch.cpp

Lines changed: 83 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,69 @@ vector<Selectable *> Orch::getSelectables()
147147
return selectables;
148148
}
149149

150-
void ConsumerBase::addToSync(const KeyOpFieldsValuesTuple &entry)
150+
void Orch::createRetryCache(const std::string &executorName) {
151+
if (m_retryCaches.find(executorName) == m_retryCaches.end())
152+
m_retryCaches[executorName] = std::make_shared<RetryCache>(executorName);
153+
}
154+
155+
RetryCache *Orch::getRetryCache(const std::string &executorName)
156+
{
157+
if (m_retryCaches.find(executorName) == m_retryCaches.end())
158+
return nullptr;
159+
else
160+
return m_retryCaches[executorName].get();
161+
}
162+
163+
ConsumerBase* Orch::getConsumerBase(const std::string &executorName)
164+
{
165+
if (m_consumerMap.find(executorName) == m_consumerMap.end())
166+
return nullptr;
167+
return dynamic_cast<ConsumerBase*>(m_consumerMap[executorName].get());
168+
}
169+
170+
void ConsumerBase::addToRetry(const Task &task, const Constraint &cst) {
171+
getOrch()->getRetryCache(getName())->cache_failed_task(task, cst);
172+
}
173+
174+
void Orch::addToRetry(const std::string &executorName, const Task &task, const Constraint &cst) {
175+
getRetryCache(executorName)->cache_failed_task(task, cst);
176+
}
177+
178+
size_t Orch::retryToSync(const std::string &executorName, size_t threshold)
179+
{
180+
auto retryCache = getRetryCache(executorName);
181+
182+
if (!retryCache || threshold <= 0)
183+
return 0;
184+
185+
std::unordered_set<Constraint>& constraints = retryCache->getResolvedConstraints();
186+
187+
size_t count = 0;
188+
189+
for (auto it = constraints.begin(); it != constraints.end() && count < threshold;)
190+
{
191+
auto cst = *it++;
192+
193+
auto tasks = retryCache->resolve(cst, threshold - count);
194+
195+
count += tasks->size();
196+
197+
getConsumerBase(executorName)->addToSync(tasks, true);
198+
199+
}
200+
return count;
201+
}
202+
203+
void Orch::notifyRetry(Orch *retryOrch, const std::string &executorName, const Constraint &cst)
204+
{
205+
retryOrch->getRetryCache(executorName)->add_resolution(cst);
206+
}
207+
208+
size_t ConsumerBase::addToSync(std::shared_ptr<std::deque<swss::KeyOpFieldsValuesTuple>> entries, bool onRetry) {
209+
return addToSync(*entries, onRetry);
210+
}
211+
212+
void ConsumerBase::addToSync(const KeyOpFieldsValuesTuple &entry, bool onRetry)
151213
{
152214
SWSS_LOG_ENTER();
153215

@@ -157,6 +219,24 @@ void ConsumerBase::addToSync(const KeyOpFieldsValuesTuple &entry)
157219
/* Record incoming tasks */
158220
Recorder::Instance().swss.record(dumpTuple(entry));
159221

222+
auto retryCache = getOrch()->getRetryCache(getName());
223+
224+
if (retryCache)
225+
{
226+
auto it = retryCache->getRetryMap().find(key);
227+
if (it != retryCache->getRetryMap().end()) // key exists
228+
{
229+
if (it->second.second == entry) // skip duplicate task
230+
return;
231+
232+
auto cache = retryCache->erase_stale_cache(key);
233+
Recorder::Instance().retry.record(dumpTuple(*cache).append(DECACHE));
234+
235+
if (op == SET_COMMAND)
236+
m_toSync.emplace(key, std::move(*cache));
237+
}
238+
}
239+
160240
/*
161241
* m_toSync is a multimap which will allow one key with multiple values,
162242
* Also, the order of the key-value pairs whose keys compare equivalent
@@ -230,22 +310,18 @@ void ConsumerBase::addToSync(const KeyOpFieldsValuesTuple &entry)
230310

231311
}
232312

233-
size_t ConsumerBase::addToSync(const std::deque<KeyOpFieldsValuesTuple> &entries)
313+
size_t ConsumerBase::addToSync(const std::deque<KeyOpFieldsValuesTuple> &entries, bool onRetry)
234314
{
235315
SWSS_LOG_ENTER();
236316

237317
for (auto& entry: entries)
238318
{
239-
addToSync(entry);
319+
addToSync(entry, onRetry);
240320
}
241321

242322
return entries.size();
243323
}
244324

245-
size_t ConsumerBase::addToSync(std::shared_ptr<std::deque<swss::KeyOpFieldsValuesTuple>> entries) {
246-
return addToSync(*entries);
247-
}
248-
249325
// TODO: Table should be const
250326
size_t ConsumerBase::refillToSync(Table* table)
251327
{

orchagent/orch.h

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ extern "C" {
2626
#include "response_publisher.h"
2727
#include "recorder.h"
2828
#include "schema.h"
29+
#include "retrycache.h"
2930

3031
const char delimiter = ':';
3132
const char list_item_delimiter = ',';
@@ -174,11 +175,18 @@ class ConsumerBase : public Executor {
174175
/* record the tuple */
175176
void recordTuple(const swss::KeyOpFieldsValuesTuple &tuple);
176177

177-
void addToSync(const swss::KeyOpFieldsValuesTuple &entry);
178+
void addToSync(const swss::KeyOpFieldsValuesTuple &entry, bool onRetry=false);
178179

179180
// Returns: the number of entries added to m_toSync
180-
size_t addToSync(const std::deque<swss::KeyOpFieldsValuesTuple> &entries);
181-
size_t addToSync(std::shared_ptr<std::deque<swss::KeyOpFieldsValuesTuple>> entries);
181+
size_t addToSync(const std::deque<swss::KeyOpFieldsValuesTuple> &entries, bool onRetry=false);
182+
size_t addToSync(std::shared_ptr<std::deque<swss::KeyOpFieldsValuesTuple>> entries, bool onRetry=false);
183+
184+
/**
185+
* Move a task to retry cache for future processing
186+
* @param task a task tuple
187+
* @param cst the constraint for the task
188+
*/
189+
void addToRetry(const Task &task, const Constraint &cst);
182190

183191
size_t refillToSync();
184192
size_t refillToSync(swss::Table* table);
@@ -264,6 +272,7 @@ typedef enum
264272
typedef std::pair<swss::DBConnector *, std::string> TableConnector;
265273
typedef std::pair<swss::DBConnector *, std::vector<std::string>> TablesConnector;
266274

275+
267276
class Orch
268277
{
269278
public:
@@ -296,13 +305,33 @@ class Orch
296305
virtual void doTask(swss::SelectableTimer &timer) { }
297306

298307
void dumpPendingTasks(std::vector<std::string> &ts);
308+
309+
void createRetryCache(const std::string &executorName);
310+
RetryCache* getRetryCache(const std::string &executorName);
311+
ConsumerBase* getConsumerBase(const std::string &executorName);
312+
313+
// Add a task and its constraint to the retry cache
314+
void addToRetry(const std::string &executorName, const Task &task, const Constraint &cst);
315+
316+
/** Delete tasks whose constraints are resolved in this executor's retry cache , then add them back to its m_toSync.
317+
* @param executorName name of the executor (actually a ConsumerBase instance)
318+
* @param cst task constraint **/
319+
virtual size_t retryToSync(const std::string &executorName, size_t threshold=30000);
320+
321+
/** Notify the executor that the constraint is already resolved
322+
* @param retryOrch the orch to be notified
323+
* @param executorName name of the executor to be notified
324+
* @param cst the constraint that can be resolved
325+
* **/
326+
virtual void notifyRetry(Orch *retryOrch, const std::string &executorName, const Constraint &cst);
299327

300328
/**
301329
* @brief Flush pending responses
302330
*/
303331
void flushResponses();
304332
protected:
305333
ConsumerMap m_consumerMap;
334+
RetryCacheMap m_retryCaches;
306335

307336
Orch();
308337
ref_resolve_status resolveFieldRefValue(type_map&, const std::string&, const std::string&, swss::KeyOpFieldsValuesTuple&, sai_object_id_t&, std::string&);

0 commit comments

Comments
 (0)