ml-double-q-learning
v0.0.5
Published
Library implementing the double-q-learning algorithm.
Downloads
8
Maintainers
Readme
ml-double-q-learning
Library implementing the double-q-learning algorithm.
paper: https://papers.nips.cc/paper/3964-double-q-learning.pdf
Install
npm install ml-double-q-learning
DoubleQLearningAgent
export class DoubleQLearningAgent<TAction = any> implements IQLearningAgent {
public replayMemory: [string, number, number][] = [];
public episode: number = 0;
public trained = false;
constructor(
public actions: TAction[],
private pickActionStrategy: (actionsStats: number[], episode: number) => Promise<number> = greedyPickAction,
public memory: IMemoryAdapter = new MapInMemory(),
public learningRate = 0.1,
public discountFactor = 0.99,
) {}
public async play(state: IState): Promise<IStep<TAction>> {};
public reward(step: IStep<TAction>, reward: number): void {};
public async learn(): Promise<void> {};
}
Memory (from ml-q-learning)
Pick action strategy (from ml-q-learning)
randomPickAction
greedyPickAction
epsilonGreedyPickAction
decayingEpsilonGreedyPickAction
softmaxPickAction
epsilonSoftmaxGreedyPickAction
decayingEpsilonSoftmaxGreedyPickAction
Example use
Maze escape
P - Player
# - Wall
. - Nothing
X - Trap = -200
R - Treasure = 200
F - Finish = 1000
Start maze
[ [ 'P', '.', '.', '#', '.', '.', '.', '#', 'R' ],
[ '.', '#', '.', '#', '.', '.', '.', '#', '.' ],
[ '.', '#', '.', '#', '.', '#', '.', '#', '.' ],
[ '.', '#', 'X', '#', '.', '#', '.', '.', '.' ],
[ '.', '#', '#', '#', 'F', '#', '.', '.', '.' ],
[ '.', '#', '.', '#', '#', '#', '.', '#', 'X' ],
[ '.', '.', 'X', '.', '.', '.', '.', '#', '.' ],
[ '.', '.', '.', '.', '#', '.', '.', '#', 'R' ] ]
...many plays...
-------------------------------
numberOfPlay: 35702,
score: 1168
episode: 3322672
memorySize: 968
-------------------------------
[ [ '.', '.', '.', '#', '.', '.', '.', '#', '.' ],
[ '.', '#', '.', '#', '.', '.', '.', '#', '.' ],
[ '.', '#', '.', '#', '.', '#', '.', '#', '.' ],
[ '.', '#', 'X', '#', '.', '#', '.', '.', '.' ],
[ '.', '#', '#', '#', 'P', '#', '.', '.', '.' ],
[ '.', '#', '.', '#', '#', '#', '.', '#', 'X' ],
[ '.', '.', 'X', '.', '.', '.', '.', '#', '.' ],
[ '.', '.', '.', '.', '#', '.', '.', '#', 'R' ] ]
Sources
- https://papers.nips.cc/paper/3964-double-q-learning.pdf
- https://towardsdatascience.com/double-q-learning-the-easy-way-a924c4085ec3