Add calibration plots script

a236c154 · Carel van Niekerk · 649fbf4a · a236c154
Commit a236c154 authored Aug 4, 2022 by Carel van Niekerk
--- a/convlab/dst/setsumbt/calibration_plots.py
+++ b/convlab/dst/setsumbt/calibration_plots.py
+# -*- coding: utf-8 -*-
+# Copyright 2020 DSML Group, Heinrich Heine University, Düsseldorf
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Calibration Plot plotting script"""
+
+import os
+from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
+
+import torch
+from matplotlib import pyplot as plt
+
+
+def main():
+    parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--data_dir', help='Location of the belief states', required=True)
+    parser.add_argument('--output', help='Output image path', default='calibration_plot.png')
+    parser.add_argument('--n_bins', help='Number of bins', default=10, type=int)
+    args = parser.parse_args()
+
+    if torch.cuda.is_available():
+        device = torch.device('cuda')
+    else:
+        device = torch.device('cpu')
+    path = args.data_dir
+
+    models = os.listdir(path)
+    models = [os.path.join(path, model, 'test.belief') for model in models]
+
+    fig = plt.figure(figsize=(14,8))
+    font=20
+    plt.tick_params(labelsize=font-2)
+    linestyle = ['-', ':', (0, (3, 5, 1, 5)), '-.', (0, (5, 10))]
+    for i, model in enumerate(models):
+        conf, acc = get_calibration(model, device, n_bins=args.n_bins)
+        name = model.split('/')[-2].strip()
+        print(name, conf, acc)
+        plt.plot(conf, acc, label=name, linestyle=linestyle[i], linewidth=3)
+
+    plt.plot(torch.tensor([0,1]), torch.tensor([0,1]), linestyle='--', color='black', linewidth=3)
+    plt.xlabel('Confidence', fontsize=font)
+    plt.ylabel('Joint Goal Accuracy', fontsize=font)
+    plt.legend(fontsize=font)
+
+    plt.savefig(args.output)
+
+
+def get_calibration(path, device, n_bins=10, temperature=1.00):
+    logits = torch.load(path, map_location=device)
+    y_true = logits['labels']
+    logits = logits['belief_states']
+
+    y_pred = {slot: logits[slot].reshape(-1, logits[slot].size(-1)).argmax(-1) for slot in logits}
+    goal_acc = {slot: (y_pred[slot] == y_true[slot].reshape(-1)).int() for slot in y_pred}
+    goal_acc = sum([goal_acc[slot] for slot in goal_acc])
+    goal_acc = (goal_acc == len(y_true)).int()
+
+    scores = [logits[slot].reshape(-1, logits[slot].size(-1)).max(-1)[0].unsqueeze(0) for slot in logits]
+    scores = torch.cat(scores, 0).min(0)[0]
+
+    step = 1.0 / float(n_bins)
+    bin_ranges = torch.arange(0.0, 1.0 + 1e-10, step)
+    bins = []
+    for b in range(n_bins):
+        lower, upper = bin_ranges[b], bin_ranges[b + 1]
+        if b == 0:
+            ids = torch.where((scores >= lower) * (scores <= upper))[0]
+        else:
+            ids = torch.where((scores > lower) * (scores <= upper))[0]
+        bins.append(ids)
+
+    conf = [0.0]
+    for b in bins:
+        if b.size(0) > 0:
+            l = scores[b]
+            conf.append(l.mean())
+        else:
+            conf.append(-1)
+    conf = torch.tensor(conf)
+
+    slot = [s for s in y_true][0]
+    acc = [0.0]
+    for b in bins:
+        if b.size(0) > 0:
+            acc_ = goal_acc[b]
+            acc_ = acc_[y_true[slot].reshape(-1)[b] >= 0]
+            if acc_.size(0) >= 0:
+                acc.append(acc_.float().mean())
+            else:
+                acc.append(-1)
+        else:
+            acc.append(-1)
+    acc = torch.tensor(acc)
+
+    conf = conf[acc != -1]
+    acc = acc[acc != -1]
+
+    return conf, acc
+
+
+if __name__ == '__main__':
+    main()