updated readmes

a719c81e · Christian · d7872038 · a719c81e · a719c81e · a719c81e
Commit a719c81e authored Nov 28, 2022 by Christian
--- a/convlab/policy/gdpl/README.md
+++ b/convlab/policy/gdpl/README.md
@@ -35,24 +35,19 @@ For creating evaluation plots and running evaluation dialogues, please have a lo
 ## References

 ```
-@inproceedings{devlin2019bert,
-  title={BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding},
-  author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
-  booktitle={Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)},
-  pages={4171--4186},
-  year={2019}
-}
-
-@inproceedings{zhu-etal-2020-convlab,
-    title = "{C}onv{L}ab-2: An Open-Source Toolkit for Building, Evaluating, and Diagnosing Dialogue Systems",
-    author = "Zhu, Qi and Zhang, Zheng and Fang, Yan and Li, Xiang and Takanobu, Ryuichi and Li, Jinchao and Peng, Baolin and Gao, Jianfeng and Zhu, Xiaoyan and Huang, Minlie",
-    booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations",
-    month = jul,
-    year = "2020",
-    address = "Online",
+@inproceedings{takanobu-etal-2019-guided,
+    title = "Guided Dialog Policy Learning: Reward Estimation for Multi-Domain Task-Oriented Dialog",
+    author = "Takanobu, Ryuichi  and
+      Zhu, Hanlin  and
+      Huang, Minlie",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)",
+    month = nov,
+    year = "2019",
+    address = "Hong Kong, China",
    publisher = "Association for Computational Linguistics",
-    url = "https://aclanthology.org/2020.acl-demos.19",
-    doi = "10.18653/v1/2020.acl-demos.19",
-    pages = "142--149"
+    url = "https://aclanthology.org/D19-1010",
+    doi = "10.18653/v1/D19-1010",
+    pages = "100--110",
+    abstract = "Dialog policy decides what and how a task-oriented dialog system will respond, and plays a vital role in delivering effective conversations. Many studies apply Reinforcement Learning to learn a dialog policy with the reward function which requires elaborate design and pre-specified user goals. With the growing needs to handle complex goals across multiple domains, such manually designed reward functions are not affordable to deal with the complexity of real-world tasks. To this end, we propose Guided Dialog Policy Learning, a novel algorithm based on Adversarial Inverse Reinforcement Learning for joint reward estimation and policy optimization in multi-domain task-oriented dialog. The proposed approach estimates the reward signal and infers the user goal in the dialog sessions. The reward estimator evaluates the state-action pairs so that it can guide the dialog policy at each dialog turn. Extensive experiments on a multi-domain dialog dataset show that the dialog policy guided by the learned reward function achieves remarkably higher task success than state-of-the-art baselines.",
 }
 ```
\ No newline at end of file
--- a/convlab/policy/pg/README.md
+++ b/convlab/policy/pg/README.md
@@ -35,24 +35,15 @@ For creating evaluation plots and running evaluation dialogues, please have a lo
 ## References

 ```
-@inproceedings{devlin2019bert,
-  title={BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding},
-  author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
-  booktitle={Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)},
-  pages={4171--4186},
-  year={2019}
-}
-
-@inproceedings{zhu-etal-2020-convlab,
-    title = "{C}onv{L}ab-2: An Open-Source Toolkit for Building, Evaluating, and Diagnosing Dialogue Systems",
-    author = "Zhu, Qi and Zhang, Zheng and Fang, Yan and Li, Xiang and Takanobu, Ryuichi and Li, Jinchao and Peng, Baolin and Gao, Jianfeng and Zhu, Xiaoyan and Huang, Minlie",
-    booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations",
-    month = jul,
-    year = "2020",
-    address = "Online",
-    publisher = "Association for Computational Linguistics",
-    url = "https://aclanthology.org/2020.acl-demos.19",
-    doi = "10.18653/v1/2020.acl-demos.19",
-    pages = "142--149"
+@inproceedings{NIPS1999_464d828b,
+ author = {Sutton, Richard S and McAllester, David and Singh, Satinder and Mansour, Yishay},
+ booktitle = {Advances in Neural Information Processing Systems},
+ editor = {S. Solla and T. Leen and K. M\"{u}ller},
+ pages = {},
+ publisher = {MIT Press},
+ title = {Policy Gradient Methods for Reinforcement Learning with Function Approximation},
+ url = {https://proceedings.neurips.cc/paper/1999/file/464d828b85b0bed98e80ade0a5c43b0f-Paper.pdf},
+ volume = {12},
+ year = {1999}
 }
 ```
\ No newline at end of file
--- a/convlab/policy/ppo/README.md
+++ b/convlab/policy/ppo/README.md
@@ -35,24 +35,21 @@ For creating evaluation plots and running evaluation dialogues, please have a lo
 ## References

 ```
-@inproceedings{devlin2019bert,
-  title={BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding},
-  author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
-  booktitle={Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)},
-  pages={4171--4186},
-  year={2019}
-}
-
-@inproceedings{zhu-etal-2020-convlab,
-    title = "{C}onv{L}ab-2: An Open-Source Toolkit for Building, Evaluating, and Diagnosing Dialogue Systems",
-    author = "Zhu, Qi and Zhang, Zheng and Fang, Yan and Li, Xiang and Takanobu, Ryuichi and Li, Jinchao and Peng, Baolin and Gao, Jianfeng and Zhu, Xiaoyan and Huang, Minlie",
-    booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations",
-    month = jul,
-    year = "2020",
-    address = "Online",
-    publisher = "Association for Computational Linguistics",
-    url = "https://aclanthology.org/2020.acl-demos.19",
-    doi = "10.18653/v1/2020.acl-demos.19",
-    pages = "142--149"
+@article{DBLP:journals/corr/SchulmanWDRK17,
+  author    = {John Schulman and
+               Filip Wolski and
+               Prafulla Dhariwal and
+               Alec Radford and
+               Oleg Klimov},
+  title     = {Proximal Policy Optimization Algorithms},
+  journal   = {CoRR},
+  volume    = {abs/1707.06347},
+  year      = {2017},
+  url       = {http://arxiv.org/abs/1707.06347},
+  eprinttype = {arXiv},
+  eprint    = {1707.06347},
+  timestamp = {Mon, 13 Aug 2018 16:47:34 +0200},
+  biburl    = {https://dblp.org/rec/journals/corr/SchulmanWDRK17.bib},
+  bibsource = {dblp computer science bibliography, https://dblp.org}
 }
 ```
\ No newline at end of file
--- a/convlab/policy/vtrace_DPT/README.md
+++ b/convlab/policy/vtrace_DPT/README.md
@@ -51,6 +51,25 @@ For creating evaluation plots and running evaluation dialogues, please have a lo
 ## References

 ```
+@inproceedings{geishauser-etal-2022-dynamic,
+    title = "Dynamic Dialogue Policy for Continual Reinforcement Learning",
+    author = "Geishauser, Christian  and
+      van Niekerk, Carel  and
+      Lin, Hsien-chin  and
+      Lubis, Nurul  and
+      Heck, Michael  and
+      Feng, Shutong  and
+      Ga{\v{s}}i{\'c}, Milica",
+    booktitle = "Proceedings of the 29th International Conference on Computational Linguistics",
+    month = oct,
+    year = "2022",
+    address = "Gyeongju, Republic of Korea",
+    publisher = "International Committee on Computational Linguistics",
+    url = "https://aclanthology.org/2022.coling-1.21",
+    pages = "266--284",
+    abstract = "Continual learning is one of the key components of human learning and a necessary requirement of artificial intelligence. As dialogue can potentially span infinitely many topics and tasks, a task-oriented dialogue system must have the capability to continually learn, dynamically adapting to new challenges while preserving the knowledge it already acquired. Despite the importance, continual reinforcement learning of the dialogue policy has remained largely unaddressed. The lack of a framework with training protocols, baseline models and suitable metrics, has so far hindered research in this direction. In this work we fill precisely this gap, enabling research in dialogue policy optimisation to go from static to dynamic learning. We provide a continual learning algorithm, baseline architectures and metrics for assessing continual learning models. Moreover, we propose the dynamic dialogue policy transformer (DDPT), a novel dynamic architecture that can integrate new knowledge seamlessly, is capable of handling large state spaces and obtains significant zero-shot performance when being exposed to unseen domains, without any growth in network parameter size. We validate the strengths of DDPT in simulation with two user simulators as well as with humans.",
+}
+
 @inproceedings{NEURIPS2019_fa7cdfad,
 author = {Rolnick, David and Ahuja, Arun and Schwarz, Jonathan and Lillicrap, Timothy and Wayne, Gregory},
 booktitle = {Advances in Neural Information Processing Systems},
@@ -62,4 +81,21 @@ For creating evaluation plots and running evaluation dialogues, please have a lo
 volume = {32},
 year = {2019}
 }
+
+@InProceedings{pmlr-v80-espeholt18a,
+  title = 	 {{IMPALA}: Scalable Distributed Deep-{RL} with Importance Weighted Actor-Learner Architectures},
+  author =       {Espeholt, Lasse and Soyer, Hubert and Munos, Remi and Simonyan, Karen and Mnih, Vlad and Ward, Tom and Doron, Yotam and Firoiu, Vlad and Harley, Tim and Dunning, Iain and Legg, Shane and Kavukcuoglu, Koray},
+  booktitle = 	 {Proceedings of the 35th International Conference on Machine Learning},
+  pages = 	 {1407--1416},
+  year = 	 {2018},
+  editor = 	 {Dy, Jennifer and Krause, Andreas},
+  volume = 	 {80},
+  series = 	 {Proceedings of Machine Learning Research},
+  month = 	 {10--15 Jul},
+  publisher =    {PMLR},
+  pdf = 	 {http://proceedings.mlr.press/v80/espeholt18a/espeholt18a.pdf},
+  url = 	 {https://proceedings.mlr.press/v80/espeholt18a.html},
+  abstract = 	 {In this work we aim to solve a large collection of tasks using a single reinforcement learning agent with a single set of parameters. A key challenge is to handle the increased amount of data and extended training time. We have developed a new distributed agent IMPALA (Importance Weighted Actor-Learner Architecture) that not only uses resources more efficiently in single-machine training but also scales to thousands of machines without sacrificing data efficiency or resource utilisation. We achieve stable learning at high throughput by combining decoupled acting and learning with a novel off-policy correction method called V-trace. We demonstrate the effectiveness of IMPALA for multi-task reinforcement learning on DMLab-30 (a set of 30 tasks from the DeepMind Lab environment (Beattie et al., 2016)) and Atari57 (all available Atari games in Arcade Learning Environment (Bellemare et al., 2013a)). Our results show that IMPALA is able to achieve better performance than previous agents with less data, and crucially exhibits positive transfer between tasks as a result of its multi-task approach.}
+}
+
 ```
\ No newline at end of file