Module rlmodels.models.grad_utils
Expand source code
import torch
import torch.optim as optim
import numpy as np
class SumTree:
"""efficient memory data sctructure class (fast retrieves and updates).
source of the SumTree class code : https://github.com/jaromiru/AI-blog/blob/master/SumTree.py
**Parameters**:
*capacity* (*int*): number of tree leaves
"""
write = 0
current_size=0
def __init__(self, capacity):
# INPUT
# *capacity*: number of tree leaves
self.capacity = capacity
self.tree = np.zeros( 2*capacity - 1 )
self.data = np.zeros( capacity, dtype=object )
def _propagate(self, idx, change):
parent = (idx - 1) // 2
self.tree[parent] += change
if parent != 0:
self._propagate(parent, change)
def _retrieve(self, idx, s):
left = 2 * idx + 1
right = left + 1
if left >= len(self.tree):
return idx
if s <= self.tree[left]:
return self._retrieve(left, s)
else:
return self._retrieve(right, s-self.tree[left])
def get_current_size(self):
return self.current_size
def total(self):
"""returns the sum of leaf weights
"""
return self.tree[0]
def add(self, p, data):
"""adds data to tree, potentially overwritting older data
**Parameters**:
*p* (*float*): leaf weight
*data*: leaf data
"""
idx = self.write + self.capacity - 1
self.data[self.write] = data
self.update(idx, p)
self.write += 1
if self.write >= self.capacity:
self.write = 0
self.current_size = min(self.current_size+1,self.capacity)
def update(self, idx, p):
"""updates leaf weight
**Parameters**:
*idx* (*int*): leaf index
*p* (*float*): new weight
"""
change = p - self.tree[idx]
self.tree[idx] = p
if self.capacity > 1:
self._propagate(idx, change)
def get(self, s):
"""get leaf corresponding to numeric value
**Parameters**:
*s* (*float*): numeric value
**Returns**:
triplet with leaf id (*int*), tree node id (*int*) and leaf data
"""
idx = self._retrieve(0, s)
dataIdx = idx - self.capacity + 1
return (idx, self.tree[idx], self.data[dataIdx])
class Agent(object):
"""neural network gradient optimisation wrapper
**Parameters**:
*model* (*torch.nn.Module*): Pytorch neural network model
*opt* (*torch.optim*): Pytorch optimizer object
"""
def __init__(self,model,opt=None):
self.model = model
self.optim = opt
self.scheduler = None
def forward(self,x):
""" Apply *model*'s *forward* method to input
**Parameters**:
*x* (*torch.Tensor*): state tensor
"""
if isinstance(x,np.ndarray):
x = torch.from_numpy(x).float()
return self.model.forward(x)
def _step(self):
if self.scheduler is not None:
self.scheduler.step()
# class FormattedActionEnv(object):
# """environment wrapper that formats a model output action to gym environment's required format
# Parameters:
# *env* : environment with the same interface as in gym library
# *action_map* (*function*): mapper that takes a model output and formats it to the environment's standard input type
# """
# def __init__(self,env,action_map):
# self.env = env
# self.action_space = self.env.action_space
# self.observation_space = self.env.observation_space
# self.action_map = action_map
# def step(self,a):
# a = self.action_map(a)
# s,r,done,info = self.env.step(a)
# return s,r,done,info
# def render(self):
# self.env.render()
# def close(self):
# self.env.close()
# def reset(self):
# self.env.reset()
# def seed(self,seed):
# self.env.seed(seed)
Classes
class Agent (model, opt=None)
-
neural network gradient optimisation wrapper
Parameters:
model (torch.nn.Module): Pytorch neural network model
opt (torch.optim): Pytorch optimizer object
Expand source code
class Agent(object): """neural network gradient optimisation wrapper **Parameters**: *model* (*torch.nn.Module*): Pytorch neural network model *opt* (*torch.optim*): Pytorch optimizer object """ def __init__(self,model,opt=None): self.model = model self.optim = opt self.scheduler = None def forward(self,x): """ Apply *model*'s *forward* method to input **Parameters**: *x* (*torch.Tensor*): state tensor """ if isinstance(x,np.ndarray): x = torch.from_numpy(x).float() return self.model.forward(x) def _step(self): if self.scheduler is not None: self.scheduler.step()
Methods
def forward(self, x)
-
Apply model's forward method to input
Parameters:
x (torch.Tensor): state tensor
Expand source code
def forward(self,x): """ Apply *model*'s *forward* method to input **Parameters**: *x* (*torch.Tensor*): state tensor """ if isinstance(x,np.ndarray): x = torch.from_numpy(x).float() return self.model.forward(x)
class SumTree (capacity)
-
efficient memory data sctructure class (fast retrieves and updates).
source of the SumTree class code : https://github.com/jaromiru/AI-blog/blob/master/SumTree.py
Parameters:
capacity (int): number of tree leaves
Expand source code
class SumTree: """efficient memory data sctructure class (fast retrieves and updates). source of the SumTree class code : https://github.com/jaromiru/AI-blog/blob/master/SumTree.py **Parameters**: *capacity* (*int*): number of tree leaves """ write = 0 current_size=0 def __init__(self, capacity): # INPUT # *capacity*: number of tree leaves self.capacity = capacity self.tree = np.zeros( 2*capacity - 1 ) self.data = np.zeros( capacity, dtype=object ) def _propagate(self, idx, change): parent = (idx - 1) // 2 self.tree[parent] += change if parent != 0: self._propagate(parent, change) def _retrieve(self, idx, s): left = 2 * idx + 1 right = left + 1 if left >= len(self.tree): return idx if s <= self.tree[left]: return self._retrieve(left, s) else: return self._retrieve(right, s-self.tree[left]) def get_current_size(self): return self.current_size def total(self): """returns the sum of leaf weights """ return self.tree[0] def add(self, p, data): """adds data to tree, potentially overwritting older data **Parameters**: *p* (*float*): leaf weight *data*: leaf data """ idx = self.write + self.capacity - 1 self.data[self.write] = data self.update(idx, p) self.write += 1 if self.write >= self.capacity: self.write = 0 self.current_size = min(self.current_size+1,self.capacity) def update(self, idx, p): """updates leaf weight **Parameters**: *idx* (*int*): leaf index *p* (*float*): new weight """ change = p - self.tree[idx] self.tree[idx] = p if self.capacity > 1: self._propagate(idx, change) def get(self, s): """get leaf corresponding to numeric value **Parameters**: *s* (*float*): numeric value **Returns**: triplet with leaf id (*int*), tree node id (*int*) and leaf data """ idx = self._retrieve(0, s) dataIdx = idx - self.capacity + 1 return (idx, self.tree[idx], self.data[dataIdx])
Class variables
var current_size
var write
Methods
def add(self, p, data)
-
adds data to tree, potentially overwritting older data
Parameters:
p (float): leaf weight
data: leaf data
Expand source code
def add(self, p, data): """adds data to tree, potentially overwritting older data **Parameters**: *p* (*float*): leaf weight *data*: leaf data """ idx = self.write + self.capacity - 1 self.data[self.write] = data self.update(idx, p) self.write += 1 if self.write >= self.capacity: self.write = 0 self.current_size = min(self.current_size+1,self.capacity)
def get(self, s)
-
get leaf corresponding to numeric value
Parameters:
s (float): numeric value
Returns:
triplet with leaf id (int), tree node id (int) and leaf data
Expand source code
def get(self, s): """get leaf corresponding to numeric value **Parameters**: *s* (*float*): numeric value **Returns**: triplet with leaf id (*int*), tree node id (*int*) and leaf data """ idx = self._retrieve(0, s) dataIdx = idx - self.capacity + 1 return (idx, self.tree[idx], self.data[dataIdx])
def get_current_size(self)
-
Expand source code
def get_current_size(self): return self.current_size
def total(self)
-
returns the sum of leaf weights
Expand source code
def total(self): """returns the sum of leaf weights """ return self.tree[0]
def update(self, idx, p)
-
updates leaf weight
Parameters:
idx (int): leaf index
p (float): new weight
Expand source code
def update(self, idx, p): """updates leaf weight **Parameters**: *idx* (*int*): leaf index *p* (*float*): new weight """ change = p - self.tree[idx] self.tree[idx] = p if self.capacity > 1: self._propagate(idx, change)