Fixed newHand() bug!

Before, the eligibility traces would carry over across hands, but I don't think that's very good behavior. Now, the newHand() function calls "zeroEligibility()", which clears the eligibility signals for the agent.
landoskape · Aug 8, 2023 · d7c5efe · d7c5efe
1 parent 0c05396
commit d7c5efe
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 2 deletions.
diff --git a/dominoes.ipynb b/dominoes.ipynb
@@ -83,7 +83,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 58%|█████████████████████████████████████████████                                 | 289/500 [3:30:21<40:53, 11.63s/it]"
+      " 64%|█████████████████████████████████████████████████▉                            | 320/500 [3:36:20<37:33, 12.52s/it]"
      ]
     }
    ],

diff --git a/dominoesAgents.py b/dominoesAgents.py
@@ -335,8 +335,14 @@ def specializedInit(self,**kwargs):
         self.extraParameters = []
         self.extraEligibility = []
 
+    def newHand(self):
+        self.zeroEligibility()
+
     def prepareNetwork(self):
         raise ValueError("It looks like you instantiated an object of the valueAgent class directly. This class is only used to provide a scaffold for complete valueAgents, see possible agents in this script!")
+
+    def zeroEligibility(self):
+        raise ValueError("It looks like you instantiated an object of the valueAgent class directly. This class is only used to provide a scaffold for complete valueAgents, see possible agents in this script!")    
 
     def setLearning(self, learningState):
         self.learning = learningState
@@ -503,6 +509,9 @@ def prepareNetwork(self):
         # Prepare Training Functions & Optimizers
         self.finalScoreEligibility = [[torch.zeros(prms.shape).to(self.device) for prms in self.finalScoreNetwork.parameters()] for _ in range(self.finalScoreOutputDimension)]
 
+    def zeroEligibility(self):
+        self.finalScoreEligibility = [[torch.zeros(prms.shape).to(self.device) for prms in self.finalScoreNetwork.parameters()] for _ in range(self.finalScoreOutputDimension)]
+
     def prepareValueInputs(self, updateObject=True):
         self.valueNetworkInput = self.generateValueInput().to(self.device)
 
@@ -548,6 +557,7 @@ def specializedInit(self,**kwargs):
         self.playValue = np.sum(self.dominoes, axis=1)
 
     def newHand(self):
+        super().newHand()
         self.needsLineUpdate = True
 
     def linePlayedOn(self):
@@ -573,7 +583,10 @@ def prepareNetwork(self):
 
         # Prepare Training Functions & Optimizers
         self.finalScoreEligibility = [[torch.zeros(prms.shape).to(self.device) for prms in self.finalScoreNetwork.parameters()] for _ in range(self.finalScoreOutputDimension)]
-
+
+    def zeroEligibility(self):
+        self.finalScoreEligibility = [[torch.zeros(prms.shape).to(self.device) for prms in self.finalScoreNetwork.parameters()] for _ in range(self.finalScoreOutputDimension)]
+
     def prepareValueInputs(self):
         # First, get all possible lines that can be made on agent's own line.
         if self.needsLineUpdate: self.lineSequence, self.lineDirection = df.constructLineRecursive(self.dominoes, self.myHand, self.available[0], maxLineLength=self.maxLineLength)