File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed
Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -133,7 +133,7 @@ def update(self, batch: TransitionBatch) -> Dict:
133133 self .log_alpha .exp ().detach () if self .auto_alpha else self .alpha
134134 )
135135
136- # FIX: Sample fresh actions for each policy update iteration
136+ # Sample fresh actions for each policy update iteration
137137 # This ensures stochasticity across iterations
138138 a , z , mean , log_std = self .model (states )
139139 logp = self .model .policy_log_prob (z , mean , log_std )
@@ -150,7 +150,7 @@ def update(self, batch: TransitionBatch) -> Dict:
150150
151151 # --- Entropy coefficient (alpha) update ---
152152 if self .auto_alpha :
153- # CRITICAL FIX: Get fresh sample for alpha update
153+ # Get fresh sample for alpha update
154154 with torch .no_grad ():
155155 _ , z_alpha , mean_alpha , log_std_alpha = self .model (states )
156156 logp_alpha = self .model .policy_log_prob (z_alpha , mean_alpha , log_std_alpha )
You can’t perform that action at this time.
0 commit comments