Skip to content

Commit

Permalink
Fixed newHand() bug!
Browse files Browse the repository at this point in the history
Before, the eligibility traces would carry over across hands, but I don't think that's very good behavior. Now, the newHand() function calls "zeroEligibility()", which clears the eligibility signals for the agent.
  • Loading branch information
landoskape committed Aug 8, 2023
1 parent 0c05396 commit d7c5efe
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 2 deletions.
2 changes: 1 addition & 1 deletion dominoes.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
" 58%|█████████████████████████████████████████████ | 289/500 [3:30:21<40:53, 11.63s/it]"
" 64%|█████████████████████████████████████████████████▉ | 320/500 [3:36:20<37:33, 12.52s/it]"
]
}
],
Expand Down
15 changes: 14 additions & 1 deletion dominoesAgents.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,8 +335,14 @@ def specializedInit(self,**kwargs):
self.extraParameters = []
self.extraEligibility = []

def newHand(self):
self.zeroEligibility()

def prepareNetwork(self):
raise ValueError("It looks like you instantiated an object of the valueAgent class directly. This class is only used to provide a scaffold for complete valueAgents, see possible agents in this script!")

def zeroEligibility(self):
raise ValueError("It looks like you instantiated an object of the valueAgent class directly. This class is only used to provide a scaffold for complete valueAgents, see possible agents in this script!")

def setLearning(self, learningState):
self.learning = learningState
Expand Down Expand Up @@ -503,6 +509,9 @@ def prepareNetwork(self):
# Prepare Training Functions & Optimizers
self.finalScoreEligibility = [[torch.zeros(prms.shape).to(self.device) for prms in self.finalScoreNetwork.parameters()] for _ in range(self.finalScoreOutputDimension)]

def zeroEligibility(self):
self.finalScoreEligibility = [[torch.zeros(prms.shape).to(self.device) for prms in self.finalScoreNetwork.parameters()] for _ in range(self.finalScoreOutputDimension)]

def prepareValueInputs(self, updateObject=True):
self.valueNetworkInput = self.generateValueInput().to(self.device)

Expand Down Expand Up @@ -548,6 +557,7 @@ def specializedInit(self,**kwargs):
self.playValue = np.sum(self.dominoes, axis=1)

def newHand(self):
super().newHand()
self.needsLineUpdate = True

def linePlayedOn(self):
Expand All @@ -573,7 +583,10 @@ def prepareNetwork(self):

# Prepare Training Functions & Optimizers
self.finalScoreEligibility = [[torch.zeros(prms.shape).to(self.device) for prms in self.finalScoreNetwork.parameters()] for _ in range(self.finalScoreOutputDimension)]


def zeroEligibility(self):
self.finalScoreEligibility = [[torch.zeros(prms.shape).to(self.device) for prms in self.finalScoreNetwork.parameters()] for _ in range(self.finalScoreOutputDimension)]

def prepareValueInputs(self):
# First, get all possible lines that can be made on agent's own line.
if self.needsLineUpdate: self.lineSequence, self.lineDirection = df.constructLineRecursive(self.dominoes, self.myHand, self.available[0], maxLineLength=self.maxLineLength)
Expand Down

0 comments on commit d7c5efe

Please sign in to comment.