general
dsml
PyDial3 - Public

Repository

mkdir corpora
cd corpora
wget https://bitbucket.org/matthen/dstc-ii/downloads/DSTCCAM.tar.gz
tar -xzf DSTCCAM.tar.gz
[DEFAULT]
output_dir = output
name = eg

[grammar]
acts = ["inform","request","deny","negate","confirm","null","repeat","affirm","bye","reqalts","hello","thankyou","ack","help"]
nonempty_acts = ["inform","confirm","request","deny"]

slots_enumerated = ["area","pricerange"]
ontology = corpora/scripts/config/ontology_Oct11.json


[classifier]
type = svm
features = ["cnet"]


[train]
output = %(output_dir)s/%(name)s.pickle
dataset = ["Oct11_train"]
dataroot = corpora/data
Checking  train
undeclared informable slots found
[u'task', u'type']
mkdir output
python train.py config/eg.cfg
Not able to learn about:
(u'confirm', u'name', (generic value for name (None))), (u'confirm', u'pricerange', u'moderate'), (u'restart',),
(u'deny', u'pricerange', 'dontcare'), (u'confirm', u'pricerange', 'dontcare'), (u'confirm', u'area', 'dontcare'),
(u'deny', u'area', 'dontcare'), (u'deny', u'name', (generic value for name (None))), (u'deny', u'area', u'north'),
(u'deny', u'area', u'south'), (u'deny', u'pricerange', u'moderate'), (u'confirm', u'pricerange', u'expensive'),
(u'confirm', u'pricerange', u'cheap')
[decode]
output = %(output_dir)s/%(name)s.decode.json
; this will be the output of the decoder on the test set
dataset = ["Oct11_test"]
dataroot = corpora/data
python decode.py config/eg.cfg
[evaluate]
csv_output = %(output_dir)s/%(name)s.score.csv
report_output = %(output_dir)s/%(name)s.report.txt
[experiment]
name: feature_set
type: vary_train   ; this type of experiment will train a bunch of models and track the track dataset
vary:   [
            ["classifier", "features", [
                "[\"cnet\"]",
                "[\"nbest\"]"
                ]]
        ]
; section, option, values
[
    ["section_name1","option_name1", ["value11", "value12",..]],
    ["section_name2","option_name2", ["value21", "value22",..]]
]
Configuring:
        run_0
         Setting:
                classifier_features = ["cnet"]
putting  run_0
Configuring:
        run_1
         Setting:
                classifier_features = ["nbest"]
putting  run_1
ls output/experiments/feature_set/
experiment_config.cfg	log.txt			run_0.cfg		run_1.cfg
$head -20 output/experiments/feature_set/run_*.score.csv
==> output/experiments/feature_set/run_0.score.csv <==
belief_accuracy,all_acc,           0.96242
belief_accuracy,all_l2,            0.04696
belief_accuracy,all_logp,         -0.15237
(ommitted goal, requested, and method breakdown )
ice,ICE,                           1.02352
tophyp,fscore,                     0.87771
tophyp,precision,                  0.90081
tophyp,recall,                     0.85577

==> output/experiments/feature_set/run_1.score.csv <==
belief_accuracy,all_acc,           0.96221
belief_accuracy,all_l2,            0.05005
belief_accuracy,all_logp,         -0.16502
(ommitted goal, requested, and method breakdown )
ice,ICE,                           1.11565
tophyp,fscore,                     0.86706
tophyp,precision,                  0.89549
tophyp,recall,                     0.84038
[export]
models = %(output_dir)s/%(name)s.caesar.svms.txt
dictionary = %(output_dir)s/%(name)s.caesar.dic.txt
config  = %(output_dir)s/%(name)s.caesar.cfg
# Automatically generated by CNetTrain scripts
        CNET   : MAX_NGRAMS           = 200
        CNET   : FEATURES             = ["cnet"]
        CNET   : DICTIONARY           = /Users/matt/Projects/vocaliq/SemIO/CNetTrain/output/eg.caesar.dic.txt
        CNET   : MAX_NGRAM_LENGTH     = 3
        CNET   : MODELS               = /Users/matt/Projects/vocaliq/SemIO/CNetTrain/output/eg.caesar.svms.txt
        CNET   : TAIL_CUTOFF          = 0.001
        CNET   : MAX_ACTIVE_TUPLES    = 10