Skip to content
Snippets Groups Projects
Commit a719c81e authored by Christian's avatar Christian
Browse files

updated readmes

parent d7872038
No related branches found
No related tags found
No related merge requests found
...@@ -35,24 +35,19 @@ For creating evaluation plots and running evaluation dialogues, please have a lo ...@@ -35,24 +35,19 @@ For creating evaluation plots and running evaluation dialogues, please have a lo
## References ## References
``` ```
@inproceedings{devlin2019bert, @inproceedings{takanobu-etal-2019-guided,
title={BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding}, title = "Guided Dialog Policy Learning: Reward Estimation for Multi-Domain Task-Oriented Dialog",
author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina}, author = "Takanobu, Ryuichi and
booktitle={Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)}, Zhu, Hanlin and
pages={4171--4186}, Huang, Minlie",
year={2019} booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)",
} month = nov,
year = "2019",
@inproceedings{zhu-etal-2020-convlab, address = "Hong Kong, China",
title = "{C}onv{L}ab-2: An Open-Source Toolkit for Building, Evaluating, and Diagnosing Dialogue Systems",
author = "Zhu, Qi and Zhang, Zheng and Fang, Yan and Li, Xiang and Takanobu, Ryuichi and Li, Jinchao and Peng, Baolin and Gao, Jianfeng and Zhu, Xiaoyan and Huang, Minlie",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics", publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.acl-demos.19", url = "https://aclanthology.org/D19-1010",
doi = "10.18653/v1/2020.acl-demos.19", doi = "10.18653/v1/D19-1010",
pages = "142--149" pages = "100--110",
abstract = "Dialog policy decides what and how a task-oriented dialog system will respond, and plays a vital role in delivering effective conversations. Many studies apply Reinforcement Learning to learn a dialog policy with the reward function which requires elaborate design and pre-specified user goals. With the growing needs to handle complex goals across multiple domains, such manually designed reward functions are not affordable to deal with the complexity of real-world tasks. To this end, we propose Guided Dialog Policy Learning, a novel algorithm based on Adversarial Inverse Reinforcement Learning for joint reward estimation and policy optimization in multi-domain task-oriented dialog. The proposed approach estimates the reward signal and infers the user goal in the dialog sessions. The reward estimator evaluates the state-action pairs so that it can guide the dialog policy at each dialog turn. Extensive experiments on a multi-domain dialog dataset show that the dialog policy guided by the learned reward function achieves remarkably higher task success than state-of-the-art baselines.",
} }
``` ```
\ No newline at end of file
...@@ -35,24 +35,15 @@ For creating evaluation plots and running evaluation dialogues, please have a lo ...@@ -35,24 +35,15 @@ For creating evaluation plots and running evaluation dialogues, please have a lo
## References ## References
``` ```
@inproceedings{devlin2019bert, @inproceedings{NIPS1999_464d828b,
title={BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding}, author = {Sutton, Richard S and McAllester, David and Singh, Satinder and Mansour, Yishay},
author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina}, booktitle = {Advances in Neural Information Processing Systems},
booktitle={Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)}, editor = {S. Solla and T. Leen and K. M\"{u}ller},
pages={4171--4186}, pages = {},
year={2019} publisher = {MIT Press},
} title = {Policy Gradient Methods for Reinforcement Learning with Function Approximation},
url = {https://proceedings.neurips.cc/paper/1999/file/464d828b85b0bed98e80ade0a5c43b0f-Paper.pdf},
@inproceedings{zhu-etal-2020-convlab, volume = {12},
title = "{C}onv{L}ab-2: An Open-Source Toolkit for Building, Evaluating, and Diagnosing Dialogue Systems", year = {1999}
author = "Zhu, Qi and Zhang, Zheng and Fang, Yan and Li, Xiang and Takanobu, Ryuichi and Li, Jinchao and Peng, Baolin and Gao, Jianfeng and Zhu, Xiaoyan and Huang, Minlie",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.acl-demos.19",
doi = "10.18653/v1/2020.acl-demos.19",
pages = "142--149"
} }
``` ```
\ No newline at end of file
...@@ -35,24 +35,21 @@ For creating evaluation plots and running evaluation dialogues, please have a lo ...@@ -35,24 +35,21 @@ For creating evaluation plots and running evaluation dialogues, please have a lo
## References ## References
``` ```
@inproceedings{devlin2019bert, @article{DBLP:journals/corr/SchulmanWDRK17,
title={BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding}, author = {John Schulman and
author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina}, Filip Wolski and
booktitle={Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)}, Prafulla Dhariwal and
pages={4171--4186}, Alec Radford and
year={2019} Oleg Klimov},
} title = {Proximal Policy Optimization Algorithms},
journal = {CoRR},
@inproceedings{zhu-etal-2020-convlab, volume = {abs/1707.06347},
title = "{C}onv{L}ab-2: An Open-Source Toolkit for Building, Evaluating, and Diagnosing Dialogue Systems", year = {2017},
author = "Zhu, Qi and Zhang, Zheng and Fang, Yan and Li, Xiang and Takanobu, Ryuichi and Li, Jinchao and Peng, Baolin and Gao, Jianfeng and Zhu, Xiaoyan and Huang, Minlie", url = {http://arxiv.org/abs/1707.06347},
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations", eprinttype = {arXiv},
month = jul, eprint = {1707.06347},
year = "2020", timestamp = {Mon, 13 Aug 2018 16:47:34 +0200},
address = "Online", biburl = {https://dblp.org/rec/journals/corr/SchulmanWDRK17.bib},
publisher = "Association for Computational Linguistics", bibsource = {dblp computer science bibliography, https://dblp.org}
url = "https://aclanthology.org/2020.acl-demos.19",
doi = "10.18653/v1/2020.acl-demos.19",
pages = "142--149"
} }
``` ```
\ No newline at end of file
...@@ -51,6 +51,25 @@ For creating evaluation plots and running evaluation dialogues, please have a lo ...@@ -51,6 +51,25 @@ For creating evaluation plots and running evaluation dialogues, please have a lo
## References ## References
``` ```
@inproceedings{geishauser-etal-2022-dynamic,
title = "Dynamic Dialogue Policy for Continual Reinforcement Learning",
author = "Geishauser, Christian and
van Niekerk, Carel and
Lin, Hsien-chin and
Lubis, Nurul and
Heck, Michael and
Feng, Shutong and
Ga{\v{s}}i{\'c}, Milica",
booktitle = "Proceedings of the 29th International Conference on Computational Linguistics",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2022.coling-1.21",
pages = "266--284",
abstract = "Continual learning is one of the key components of human learning and a necessary requirement of artificial intelligence. As dialogue can potentially span infinitely many topics and tasks, a task-oriented dialogue system must have the capability to continually learn, dynamically adapting to new challenges while preserving the knowledge it already acquired. Despite the importance, continual reinforcement learning of the dialogue policy has remained largely unaddressed. The lack of a framework with training protocols, baseline models and suitable metrics, has so far hindered research in this direction. In this work we fill precisely this gap, enabling research in dialogue policy optimisation to go from static to dynamic learning. We provide a continual learning algorithm, baseline architectures and metrics for assessing continual learning models. Moreover, we propose the dynamic dialogue policy transformer (DDPT), a novel dynamic architecture that can integrate new knowledge seamlessly, is capable of handling large state spaces and obtains significant zero-shot performance when being exposed to unseen domains, without any growth in network parameter size. We validate the strengths of DDPT in simulation with two user simulators as well as with humans.",
}
@inproceedings{NEURIPS2019_fa7cdfad, @inproceedings{NEURIPS2019_fa7cdfad,
author = {Rolnick, David and Ahuja, Arun and Schwarz, Jonathan and Lillicrap, Timothy and Wayne, Gregory}, author = {Rolnick, David and Ahuja, Arun and Schwarz, Jonathan and Lillicrap, Timothy and Wayne, Gregory},
booktitle = {Advances in Neural Information Processing Systems}, booktitle = {Advances in Neural Information Processing Systems},
...@@ -62,4 +81,21 @@ For creating evaluation plots and running evaluation dialogues, please have a lo ...@@ -62,4 +81,21 @@ For creating evaluation plots and running evaluation dialogues, please have a lo
volume = {32}, volume = {32},
year = {2019} year = {2019}
} }
@InProceedings{pmlr-v80-espeholt18a,
title = {{IMPALA}: Scalable Distributed Deep-{RL} with Importance Weighted Actor-Learner Architectures},
author = {Espeholt, Lasse and Soyer, Hubert and Munos, Remi and Simonyan, Karen and Mnih, Vlad and Ward, Tom and Doron, Yotam and Firoiu, Vlad and Harley, Tim and Dunning, Iain and Legg, Shane and Kavukcuoglu, Koray},
booktitle = {Proceedings of the 35th International Conference on Machine Learning},
pages = {1407--1416},
year = {2018},
editor = {Dy, Jennifer and Krause, Andreas},
volume = {80},
series = {Proceedings of Machine Learning Research},
month = {10--15 Jul},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v80/espeholt18a/espeholt18a.pdf},
url = {https://proceedings.mlr.press/v80/espeholt18a.html},
abstract = {In this work we aim to solve a large collection of tasks using a single reinforcement learning agent with a single set of parameters. A key challenge is to handle the increased amount of data and extended training time. We have developed a new distributed agent IMPALA (Importance Weighted Actor-Learner Architecture) that not only uses resources more efficiently in single-machine training but also scales to thousands of machines without sacrificing data efficiency or resource utilisation. We achieve stable learning at high throughput by combining decoupled acting and learning with a novel off-policy correction method called V-trace. We demonstrate the effectiveness of IMPALA for multi-task reinforcement learning on DMLab-30 (a set of 30 tasks from the DeepMind Lab environment (Beattie et al., 2016)) and Atari57 (all available Atari games in Arcade Learning Environment (Bellemare et al., 2013a)). Our results show that IMPALA is able to achieve better performance than previous agents with less data, and crucially exhibits positive transfer between tasks as a result of its multi-task approach.}
}
``` ```
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment