Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
PyDial3 - Public
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
general
dsml
PyDial3 - Public
Commits
8f6a7ef6
Commit
8f6a7ef6
authored
5 years ago
by
Carel van Niekerk
Browse files
Options
Downloads
Patches
Plain Diff
Bug Fixes
parent
ca5d4f6e
No related branches found
No related tags found
No related merge requests found
Pipeline
#40263
passed
5 years ago
Stage: deploy
Changes
4
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
.gitignore
+139
-1
139 additions, 1 deletion
.gitignore
config/pydial_benchmarks/env1-dqn-CR.cfg
+228
-0
228 additions, 0 deletions
config/pydial_benchmarks/env1-dqn-CR.cfg
policy/DQNPolicy.py
+1
-1
1 addition, 1 deletion
policy/DQNPolicy.py
pydial.py
+3
-0
3 additions, 0 deletions
pydial.py
with
371 additions
and
2 deletions
.gitignore
+
139
−
1
View file @
8f6a7ef6
...
@@ -4,3 +4,141 @@ public
...
@@ -4,3 +4,141 @@ public
# Byte-compiled / optimized / DLL files
# Byte-compiled / optimized / DLL files
__pycache__/
__pycache__/
*.py[cod]
*.py[cod]
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Pydial logs and models
_*/
*.log
*.json
*.dct
*.prm
*.pyc
*.model
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# PyCharm stuff
.idea/
.xml
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
This diff is collapsed.
Click to expand it.
config/pydial_benchmarks/env1-dqn-CR.cfg
0 → 100644
+
228
−
0
View file @
8f6a7ef6
# Error model: 0% error rate, addditive confscorer, uniform nbestgenerator
# User model: standard sampled params, sampled patience
# Masks: on
###### General parameters ######
[GENERAL]
domains
=
CamRestaurants
singledomain
=
True
tracedialog
=
0
seed
=
07051991
[exec_config]
configdir
=
_benchmarkpolicies
logfiledir
=
_benchmarklogs
numtrainbatches
=
4
traindialogsperbatch
=
1000
numbatchtestdialogs
=
500
trainsourceiteration
=
0
numtestdialogs
=
500
trainerrorrate
=
0
testerrorrate
=
0
testeverybatch
=
True
#deleteprevpolicy = True
[logging]
usecolor
=
False
screen_level
=
results
file_level
=
results
file
=
auto
###### Environment parameters ######
[agent]
maxturns
=
25
[usermodel]
usenewgoalscenarios
=
True
oldstylepatience
=
False
patience
=
4,6
configfile
=
config/sampledUM.cfg
[errormodel]
nbestsize
=
1
confusionmodel
=
RandomConfusions
nbestgeneratormodel
=
SampledNBestGenerator
confscorer
=
additive
[summaryacts]
maxinformslots
=
5
informmask
=
True
requestmask
=
True
informcountaccepted
=
4
byemask
=
True
###### Dialogue Manager parameters ######
## Comment the following lines if using any other policy (this uses handcrafted policy)##
# [policy]
# policydir = _benchmarkpolicies
# belieftype = focus
# useconfreq = False
# learning = True
# policytype = hdc
# startwithhello = False
# inpolicyfile = auto
# outpolicyfile = auto
## Uncomment for GP policy ##
#[policy]
#policydir = _benchmarkpolicies
#belieftype = focus
#useconfreq = False
#learning = True
#policytype = gp
#startwithhello = False
#inpolicyfile = auto
#outpolicyfile = auto
#
#[gppolicy]
#kernel = polysort
#
#[gpsarsa]
#random = False
#scale = 3
## Uncomment for DQN policy ##
[policy]
policydir
=
_benchmarkpolicies
belieftype
=
focus
useconfreq
=
False
learning
=
True
policytype
=
dqn
startwithhello
=
False
inpolicyfile
=
auto
outpolicyfile
=
auto
[dqnpolicy]
maxiter
=
4000
gamma
=
0.99
learning_rate
=
0.001
tau
=
0.02
replay_type
=
vanilla
minibatch_size
=
64
capacity
=
6000
exploration_type
=
e-greedy
episodeNum
=
0.0
epsilon_start
=
0.3
epsilon_end
=
0.0
n_in
=
268
features
=
["discourseAct", "method", "requested", "full", "lastActionInformNone", "offerHappened", "inform_info"]
max_k
=
5
learning_algorithm
=
dqn
architecture
=
vanilla
h1_size
=
300
h2_size
=
100
training_frequency
=
2
n_samples
=
1
stddev_var_mu
=
0.01
stddev_var_logsigma
=
0.01
mean_log_sigma
=
0.000001
sigma_prior
=
1.5
alpha
=
0.85
alpha_divergence
=
False
sigma_eps
=
0.01
delta
=
1.0
beta
=
0.95
is_threshold
=
5.0
train_iters_per_episode
=
1
## Uncomment for A2C policy ##
#[policy]
#policydir = _benchmarkpolicies
#belieftype = focus
#useconfreq = False
#learning = True
#policytype = a2c
#startwithhello = False
#inpolicyfile = auto
#outpolicyfile = auto
#[dqnpolicy]
#maxiter = 4000
#gamma = 0.99
#learning_rate = 0.001
#tau = 0.02
#replay_type = vanilla
#minibatch_size = 64
#capacity = 1000
#exploration_type = e-greedy
#episodeNum= 0.0
#epsilon_start = 0.5
#epsilon_end = 0.0
#n_in = 268
#features = ["discourseAct", "method", "requested", "full", "lastActionInformNone", "offerHappened", "inform_info"]
#max_k = 5
#learning_algorithm = dqn
#architecture = vanilla
#h1_size = 200
#h2_size = 75
#training_frequency = 2
#n_samples = 1
#stddev_var_mu = 0.01
#stddev_var_logsigma = 0.01
#mean_log_sigma = 0.000001
#sigma_prior = 1.5
#alpha =0.85
#alpha_divergence =False
#sigma_eps = 0.01
#delta = 1.0
#beta = 0.95
#is_threshold = 5.0
#train_iters_per_episode = 1
## Uncomment for eNAC policy ##
#[policy]
#policydir = _benchmarkpolicies
#belieftype = focus
#useconfreq = False
#learning = True
#policytype = enac
#startwithhello = False
#inpolicyfile = auto
#outpolicyfile = auto
#[dqnpolicy]
#maxiter = 4000
#gamma = 0.99
#learning_rate = 0.001
#tau = 0.02
#replay_type = vanilla
#minibatch_size = 64
#capacity = 1000
#exploration_type = e-greedy
#episodeNum= 0.0
#epsilon_start = 0.3
#epsilon_end = 0.0
#n_in = 268
#features = ["discourseAct", "method", "requested", "full", "lastActionInformNone", "offerHappened", "inform_info"]
#max_k = 5
#learning_algorithm = dqn
#architecture = vanilla
#h1_size = 130
#h2_size = 50
#training_frequency = 2
#n_samples = 1
#stddev_var_mu = 0.01
#stddev_var_logsigma = 0.01
#mean_log_sigma = 0.000001
#sigma_prior = 1.5
#alpha =0.85
#alpha_divergence =False
#sigma_eps = 0.01
#delta = 1.0
#beta = 0.95
#is_threshold = 5.0
#train_iters_per_episode = 1
###### Evaluation parameters ######
[eval]
rewardvenuerecommended
=
0
penaliseallturns
=
True
wrongvenuepenalty
=
0
notmentionedvaluepenalty
=
0
successmeasure
=
objective
successreward
=
20
This diff is collapsed.
Click to expand it.
policy/DQNPolicy.py
+
1
−
1
View file @
8f6a7ef6
...
@@ -768,7 +768,7 @@ class DQNPolicy(Policy.Policy):
...
@@ -768,7 +768,7 @@ class DQNPolicy(Policy.Policy):
curiosity_loss
=
self
.
curiosityFunctions
.
training
(
s2_batch
,
s_batch
,
a_batch_one_hot
)
curiosity_loss
=
self
.
curiosityFunctions
.
training
(
s2_batch
,
s_batch
,
a_batch_one_hot
)
# self.curiositypred_loss.append(curiosity_loss) # for plotting
# self.curiositypred_loss.append(curiosity_loss) # for plotting
predicted_q_value
,
currentLoss
=
self
.
dqn
.
train
(
s_batch
,
a_batch_one_hot
,
reshaped_yi
)
predicted_q_value
,
_
,
currentLoss
=
self
.
dqn
.
train
(
s_batch
,
a_batch_one_hot
,
reshaped_yi
)
if
self
.
episodecount
%
1
==
0
:
if
self
.
episodecount
%
1
==
0
:
# Update target networks
# Update target networks
...
...
This diff is collapsed.
Click to expand it.
pydial.py
+
3
−
0
View file @
8f6a7ef6
...
@@ -931,6 +931,9 @@ def test_command(configfile, iteration, seed=None, testerrorrate=None, trainerro
...
@@ -931,6 +931,9 @@ def test_command(configfile, iteration, seed=None, testerrorrate=None, trainerro
policyname
=
'
-
'
.
join
(
ps
[:
-
1
]
+
[
'
seed{}
'
.
format
(
orig_seed
)]
+
[
ps
[
-
1
]])
policyname
=
'
-
'
.
join
(
ps
[:
-
1
]
+
[
'
seed{}
'
.
format
(
orig_seed
)]
+
[
ps
[
-
1
]])
else
:
else
:
policyname
=
"
%s-%02d.%d
"
%
(
configId
,
gtrainerrorrate
,
i
)
policyname
=
"
%s-%02d.%d
"
%
(
configId
,
gtrainerrorrate
,
i
)
if
not
'
seed
'
in
policyname
:
ps
=
policyname
.
split
(
'
-
'
)
policyname
=
'
-
'
.
join
(
ps
[:
-
1
]
+
[
'
seed{}
'
.
format
(
orig_seed
)]
+
[
ps
[
-
1
]])
poldirpath
=
path
(
policy_dir
)
poldirpath
=
path
(
policy_dir
)
if
poldirpath
.
isdir
():
if
poldirpath
.
isdir
():
policyfiles
=
poldirpath
.
files
()
policyfiles
=
poldirpath
.
files
()
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment