Commit 8f6a7ef6 authored by Carel van Niekerk's avatar Carel van Niekerk

Bug Fixes

parent ca5d4f6e
Pipeline #40263 passed with stage
in 2 minutes and 18 seconds
......@@ -3,4 +3,142 @@ public
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
\ No newline at end of file
*.py[cod]
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Pydial logs and models
_*/
*.log
*.json
*.dct
*.prm
*.pyc
*.model
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# PyCharm stuff
.idea/
.xml
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# Error model: 0% error rate, addditive confscorer, uniform nbestgenerator
# User model: standard sampled params, sampled patience
# Masks: on
###### General parameters ######
[GENERAL]
domains = CamRestaurants
singledomain = True
tracedialog = 0
seed = 07051991
[exec_config]
configdir = _benchmarkpolicies
logfiledir = _benchmarklogs
numtrainbatches = 4
traindialogsperbatch = 1000
numbatchtestdialogs = 500
trainsourceiteration = 0
numtestdialogs = 500
trainerrorrate = 0
testerrorrate = 0
testeverybatch = True
#deleteprevpolicy = True
[logging]
usecolor = False
screen_level = results
file_level = results
file = auto
###### Environment parameters ######
[agent]
maxturns = 25
[usermodel]
usenewgoalscenarios = True
oldstylepatience = False
patience = 4,6
configfile = config/sampledUM.cfg
[errormodel]
nbestsize = 1
confusionmodel = RandomConfusions
nbestgeneratormodel = SampledNBestGenerator
confscorer = additive
[summaryacts]
maxinformslots = 5
informmask = True
requestmask = True
informcountaccepted = 4
byemask = True
###### Dialogue Manager parameters ######
## Comment the following lines if using any other policy (this uses handcrafted policy)##
# [policy]
# policydir = _benchmarkpolicies
# belieftype = focus
# useconfreq = False
# learning = True
# policytype = hdc
# startwithhello = False
# inpolicyfile = auto
# outpolicyfile = auto
## Uncomment for GP policy ##
#[policy]
#policydir = _benchmarkpolicies
#belieftype = focus
#useconfreq = False
#learning = True
#policytype = gp
#startwithhello = False
#inpolicyfile = auto
#outpolicyfile = auto
#
#[gppolicy]
#kernel = polysort
#
#[gpsarsa]
#random = False
#scale = 3
## Uncomment for DQN policy ##
[policy]
policydir = _benchmarkpolicies
belieftype = focus
useconfreq = False
learning = True
policytype = dqn
startwithhello = False
inpolicyfile = auto
outpolicyfile = auto
[dqnpolicy]
maxiter = 4000
gamma = 0.99
learning_rate = 0.001
tau = 0.02
replay_type = vanilla
minibatch_size = 64
capacity = 6000
exploration_type = e-greedy
episodeNum= 0.0
epsilon_start = 0.3
epsilon_end = 0.0
n_in = 268
features = ["discourseAct", "method", "requested", "full", "lastActionInformNone", "offerHappened", "inform_info"]
max_k = 5
learning_algorithm = dqn
architecture = vanilla
h1_size = 300
h2_size = 100
training_frequency = 2
n_samples = 1
stddev_var_mu = 0.01
stddev_var_logsigma = 0.01
mean_log_sigma = 0.000001
sigma_prior = 1.5
alpha =0.85
alpha_divergence =False
sigma_eps = 0.01
delta = 1.0
beta = 0.95
is_threshold = 5.0
train_iters_per_episode = 1
## Uncomment for A2C policy ##
#[policy]
#policydir = _benchmarkpolicies
#belieftype = focus
#useconfreq = False
#learning = True
#policytype = a2c
#startwithhello = False
#inpolicyfile = auto
#outpolicyfile = auto
#[dqnpolicy]
#maxiter = 4000
#gamma = 0.99
#learning_rate = 0.001
#tau = 0.02
#replay_type = vanilla
#minibatch_size = 64
#capacity = 1000
#exploration_type = e-greedy
#episodeNum= 0.0
#epsilon_start = 0.5
#epsilon_end = 0.0
#n_in = 268
#features = ["discourseAct", "method", "requested", "full", "lastActionInformNone", "offerHappened", "inform_info"]
#max_k = 5
#learning_algorithm = dqn
#architecture = vanilla
#h1_size = 200
#h2_size = 75
#training_frequency = 2
#n_samples = 1
#stddev_var_mu = 0.01
#stddev_var_logsigma = 0.01
#mean_log_sigma = 0.000001
#sigma_prior = 1.5
#alpha =0.85
#alpha_divergence =False
#sigma_eps = 0.01
#delta = 1.0
#beta = 0.95
#is_threshold = 5.0
#train_iters_per_episode = 1
## Uncomment for eNAC policy ##
#[policy]
#policydir = _benchmarkpolicies
#belieftype = focus
#useconfreq = False
#learning = True
#policytype = enac
#startwithhello = False
#inpolicyfile = auto
#outpolicyfile = auto
#[dqnpolicy]
#maxiter = 4000
#gamma = 0.99
#learning_rate = 0.001
#tau = 0.02
#replay_type = vanilla
#minibatch_size = 64
#capacity = 1000
#exploration_type = e-greedy
#episodeNum= 0.0
#epsilon_start = 0.3
#epsilon_end = 0.0
#n_in = 268
#features = ["discourseAct", "method", "requested", "full", "lastActionInformNone", "offerHappened", "inform_info"]
#max_k = 5
#learning_algorithm = dqn
#architecture = vanilla
#h1_size = 130
#h2_size = 50
#training_frequency = 2
#n_samples = 1
#stddev_var_mu = 0.01
#stddev_var_logsigma = 0.01
#mean_log_sigma = 0.000001
#sigma_prior = 1.5
#alpha =0.85
#alpha_divergence =False
#sigma_eps = 0.01
#delta = 1.0
#beta = 0.95
#is_threshold = 5.0
#train_iters_per_episode = 1
###### Evaluation parameters ######
[eval]
rewardvenuerecommended=0
penaliseallturns = True
wrongvenuepenalty = 0
notmentionedvaluepenalty = 0
successmeasure = objective
successreward = 20
......@@ -768,7 +768,7 @@ class DQNPolicy(Policy.Policy):
curiosity_loss = self.curiosityFunctions.training(s2_batch, s_batch, a_batch_one_hot)
# self.curiositypred_loss.append(curiosity_loss) # for plotting
predicted_q_value, currentLoss = self.dqn.train(s_batch, a_batch_one_hot, reshaped_yi)
predicted_q_value, _, currentLoss = self.dqn.train(s_batch, a_batch_one_hot, reshaped_yi)
if self.episodecount % 1 == 0:
# Update target networks
......
......@@ -931,6 +931,9 @@ def test_command(configfile, iteration, seed=None, testerrorrate=None, trainerro
policyname = '-'.join(ps[:-1] + ['seed{}'.format(orig_seed)] + [ps[-1]])
else:
policyname = "%s-%02d.%d" % (configId, gtrainerrorrate, i)
if not 'seed' in policyname:
ps= policyname.split('-')
policyname = '-'.join(ps[:-1] + ['seed{}'.format(orig_seed)] + [ps[-1]])
poldirpath = path(policy_dir)
if poldirpath.isdir():
policyfiles = poldirpath.files()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment