diff --git a/dominoes.ipynb b/dominoes.ipynb index 72ab24f..974e1a8 100644 --- a/dominoes.ipynb +++ b/dominoes.ipynb @@ -83,7 +83,7 @@ "name": "stderr", "output_type": "stream", "text": [ - " 58%|█████████████████████████████████████████████ | 289/500 [3:30:21<40:53, 11.63s/it]" + " 64%|█████████████████████████████████████████████████▉ | 320/500 [3:36:20<37:33, 12.52s/it]" ] } ], diff --git a/dominoesAgents.py b/dominoesAgents.py index 8cbff23..bee7060 100644 --- a/dominoesAgents.py +++ b/dominoesAgents.py @@ -335,8 +335,14 @@ def specializedInit(self,**kwargs): self.extraParameters = [] self.extraEligibility = [] + def newHand(self): + self.zeroEligibility() + def prepareNetwork(self): raise ValueError("It looks like you instantiated an object of the valueAgent class directly. This class is only used to provide a scaffold for complete valueAgents, see possible agents in this script!") + + def zeroEligibility(self): + raise ValueError("It looks like you instantiated an object of the valueAgent class directly. This class is only used to provide a scaffold for complete valueAgents, see possible agents in this script!") def setLearning(self, learningState): self.learning = learningState @@ -503,6 +509,9 @@ def prepareNetwork(self): # Prepare Training Functions & Optimizers self.finalScoreEligibility = [[torch.zeros(prms.shape).to(self.device) for prms in self.finalScoreNetwork.parameters()] for _ in range(self.finalScoreOutputDimension)] + def zeroEligibility(self): + self.finalScoreEligibility = [[torch.zeros(prms.shape).to(self.device) for prms in self.finalScoreNetwork.parameters()] for _ in range(self.finalScoreOutputDimension)] + def prepareValueInputs(self, updateObject=True): self.valueNetworkInput = self.generateValueInput().to(self.device) @@ -548,6 +557,7 @@ def specializedInit(self,**kwargs): self.playValue = np.sum(self.dominoes, axis=1) def newHand(self): + super().newHand() self.needsLineUpdate = True def linePlayedOn(self): @@ -573,7 +583,10 @@ def prepareNetwork(self): # Prepare Training Functions & Optimizers self.finalScoreEligibility = [[torch.zeros(prms.shape).to(self.device) for prms in self.finalScoreNetwork.parameters()] for _ in range(self.finalScoreOutputDimension)] - + + def zeroEligibility(self): + self.finalScoreEligibility = [[torch.zeros(prms.shape).to(self.device) for prms in self.finalScoreNetwork.parameters()] for _ in range(self.finalScoreOutputDimension)] + def prepareValueInputs(self): # First, get all possible lines that can be made on agent's own line. if self.needsLineUpdate: self.lineSequence, self.lineDirection = df.constructLineRecursive(self.dominoes, self.myHand, self.available[0], maxLineLength=self.maxLineLength)