diff --git a/policy/FeudalGainPolicy.py b/policy/FeudalGainPolicy.py index 5ea4b20fc4f741e665315ca0b858e4e3b8a59984..71236cacbb4f06fe9c2ac23f5b79d6b8a65fe189 100644 --- a/policy/FeudalGainPolicy.py +++ b/policy/FeudalGainPolicy.py @@ -21,13 +21,17 @@ ############################################################################### -""" -Implementation of FeudalGain - -Paper: Arxiv reference +''' +FeudalGainPolicy.py - What Does The User Want? Information Gain for Hierarchical Dialogue Policy Optimisation +================================================== Author: Christian Geishauser -""" + +The implementation of the FeudalGain algorithm that incorporates information gain as intrinsic reward in order to update a Feudal policy. +The details can be found here: https://arxiv.org/abs/2109.07129 + +''' + import numpy as np import random