Skip to content
Snippets Groups Projects
Select Git revision
  • fb16cd9ca310af86142edb4ef8d9fba471611f79
  • master default protected
2 results

MultiEvaluator.py

Blame
  • user avatar
    Carel van Niekerk authored
    fb16cd9c
    History
    Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    MultiEvaluator.py 9.94 KiB
    ###############################################################################
    # PyDial: Multi-domain Statistical Spoken Dialogue System Software
    ###############################################################################
    #
    # Copyright 2015-16  Cambridge University Engineering Department 
    # Dialogue Systems Group
    #
    # 
    # Licensed under the Apache License, Version 2.0 (the "License");
    # you may not use this file except in compliance with the License.
    # You may obtain a copy of the License at
    #
    # http://www.apache.org/licenses/LICENSE-2.0
    #
    # Unless required by applicable law or agreed to in writing, software
    # distributed under the License is distributed on an "AS IS" BASIS,
    # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    # See the License for the specific language governing permissions and
    # limitations under the License.
    #
    ###############################################################################
    
    '''
    MulitEvaluator.py - wrapper module for calling multiple evaluators
    ======================================================================================
    
    Copyright CUED Dialogue Systems Group 2016 - 2017
    
    .. seealso:: PyDial Imports/Dependencies: 
    
        import :mod:`utils.Settings` |.|
        import :mod:`utils.ContextLogger` |.|
        import :mod:`SuccessEvaluator.ObjectiveSuccessEvaluator` |.|
        import :mod:`SuccessEvaluator.SubjectiveSuccessEvaluator` 
        import :class:`evaluation.EvaluationManager.Evaluator` |.|
    
    ************************
    
    '''
    __author__ = "cued_dialogue_systems_group"
    
    from evaluation.EvaluationManager import Evaluator
    from evaluation.SuccessEvaluator import ObjectiveSuccessEvaluator,SubjectiveSuccessEvaluator
    from utils import Settings, ContextLogger
    import numpy as np
    logger = ContextLogger.getLogger('')
    
    class MultiEvaluator(Evaluator):
        '''
        Wrapper class for combining multiple evaluators, e.g. if objective and subjective success needs to be tracked.
        
        Need to set the mainEvaluator in the config file which refers to the index in the list of evaluators which is used to compute the reward.
        
        Optionally may set the compareEvaluator config setting to specify an evaluator to determine if the current dialogue is used for training, e.g., if a setting like subj == obj is used.
        '''
        
        def __init__(self, domainString):
            super(MultiEvaluator, self).__init__(domainString)
            
            self.evaluators = [ObjectiveSuccessEvaluator(domainString), SubjectiveSuccessEvaluator(domainString)]
            
            self.evaluatorList = None
            if Settings.config.has_option('eval', 'multievaluatorlist'):
                self.evaluatorList = Settings.config.get('eval', 'multievaluatorlist').split(',')
            if Settings.config.has_option('eval_'+domainString, 'multievaluatorlist'):
                self.evaluatorList = Settings.config.get('eval_'+domainString, 'multievaluatorlist').split(',')
            if self.evaluatorList is not None:
                self.evaluators = []
                for e in self.evaluatorList:
                    e = e.strip()
                    self.evaluators.append(self._load_evaluator(e, domainString))
            
            self.evaluator_label = "multi evaluator (" + ", ".join([e.evaluator_label for e in self.evaluators]) + ")"
    #         for e in self.evaluators:
    #             self.evaluator_label += e.evaluator_label + " "
            
            self.mainEvaluator = 0
            if Settings.config.has_option('eval', 'mainevaluator'):
                self.mainEvaluator = Settings.config.getint('eval', 'mainevaluator')
            if Settings.config.has_option('eval_'+domainString, 'mainevaluator'):
                self.mainEvaluator = Settings.config.getint('eval_'+domainString, 'mainevaluator')
                
            self.compareEvaluator = None
            if Settings.config.has_option('eval', 'compareevaluator'):
                self.compareEvaluator = Settings.config.getint('eval', 'compareevaluator')
            if Settings.config.has_option('eval_'+domainString, 'compareevaluator'):
                self.compareEvaluator = Settings.config.getint('eval_'+domainString, 'compareevaluator')
            
        def turnReward(self, turnInfo):
            '''
            Computes the turn reward using turnInfo by calling :func:`_getTurnReward`. Updates total reward and number of turns
            
            :param turnInfo: parameters necessary for computing the turn reward, eg., system act or model of the simulated user.
            :type turnInfo: dict
            :return: int -- the turn reward.
            '''
            reward = self._getTurnReward(turnInfo)
            
            for i in range(0,len(reward)):
                self.total_reward[i] += reward[i]
            self.num_turns += 1
            
            return reward[self.mainEvaluator]
            
        def finalReward(self, finalInfo):
            '''
            Computes the final reward using finalInfo by calling :func:`_getFinalReward`. Updates total reward and dialogue outcome
            
            :param finalInfo: parameters necessary for computing the final reward, eg., task description or subjective feedback.
            :type finalInfo: dict
            :return: int -- the final reward.
            '''
            if self.num_turns > 0:
                final_reward = self._getFinalReward(finalInfo)
                
                for i in range(0,len(final_reward)):
                    self.total_reward[i] += final_reward[i]
                    
                self.rewards.append(self.total_reward)
                self.outcomes.append(self.outcome)
                self.turns.append(self.num_turns)
            else:
                final_reward = [0] * len(self.evaluators)
            
            return final_reward[self.mainEvaluator]
        
        def _getTurnReward(self,turnInfo):
            
            rewardVec = [evaluator._getTurnReward(turnInfo) for evaluator in self.evaluators]
                    
            return rewardVec
               
        def _getFinalReward(self, finalInfo):
            
            rewardVec = [evaluator._getFinalReward(finalInfo) for evaluator in self.evaluators]
            self.outcome = [evaluator.outcome for evaluator in self.evaluators]
    
            return rewardVec
        
        def restart(self):
            """
            Initialise variables and resets internal state.
        
            :param: None
            :returns: None
    
            """
            super(MultiEvaluator, self).restart()
            
            self.total_reward = [0 for _ in self.evaluators] 
            
            for evaluator in self.evaluators:
                evaluator.restart()
            
        def print_dialog_summary(self):
            """
            Prints a summary of the current dialogue. Assumes dialogue outcome represents success. For other types, override methods in sub-class.
            """
            if self.num_turns > 1:
                self._prstr(2, "Evaluation of domain: {} --evaluated by: {}".format(self.domainString, self.evaluator_label))
                l = []
                for i in range(0,len(self.evaluators)):
                    l.append(self.evaluators[i].evaluator_short_label + '{} = {}'.format('*' if self.mainEvaluator == i else '', int(self.outcome[i])))
                s = ', '.join(l)
                s += ', rew = {}, turn = {}'.format(self.total_reward[self.mainEvaluator], self.num_turns)
                self._prstr(2, s)
            return
    
        def print_summary(self):
            """
            Prints the summary of a run - ie multiple dialogs. Assumes dialogue outcome represents success. For other types, override methods in sub-class.
            """
            num_dialogs = len(self.rewards)
            assert(len(self.outcomes) == num_dialogs)
            assert(len(self.turns) == num_dialogs)
            if self.traceDialog > 1: print('-' * 20)
            self._prstr(1, "Results for domain: " + self.domainString + ' --evaluated by: ' + self.evaluator_label)
            
            # computing t-value for confidence interval of 95%
            from scipy import stats
            if num_dialogs < 2:
                tinv = 1
            else:
                tinv = stats.t.ppf(1 - 0.025, num_dialogs - 1)
    
            self._prstr(1, '# of dialogues  = %d' % num_dialogs)
            if num_dialogs:
                for i in range(0,len(self.evaluators)):
                    o = [outcome[i] for outcome in self.outcomes if outcome[i] is not None]
                    if len(o):
                        self._prstr(1, self.evaluators[i]._getResultString(o))
                self._prstr(1, 'Average reward  = %.2f +- %.2f' % (np.mean([reward[self.mainEvaluator] for reward in self.rewards]), \
                                                                tinv * np.std([reward[self.mainEvaluator] for reward in self.rewards]) / np.sqrt(num_dialogs)))    
                self._prstr(1, 'Average turns   = %.2f +- %.2f' % (np.mean(self.turns), \
                                                                tinv * np.std(self.turns) / np.sqrt(num_dialogs)))
            return
        
        def doTraining(self):
            if self.compareEvaluator is None:
                return True
            
            return self.outcome[self.mainEvaluator] == self.outcome[self.compareEvaluator]
            
        def _load_evaluator(self, configString, domainString):
            '''
            Loads and instantiates the respective evaluator as configured by configString. The new object is returned.
                    
            .. Note:
                To dynamically load a class, the __init__() must take one argument: domainString.
            
            :param configString: the config string pointing to the evaluator class to be instantiated
            :type configString: str
            :param domainString: the domain the evaluator will work on. Default is None.
            :type domainString: str
            
            :returns: The evaluator object
            '''
            
    
            try:
                # try to view the config string as a complete module path to the class to be instantiated
                components = configString.split('.')
                packageString = '.'.join(components[:-1]) 
                classString = components[-1]
                mod = __import__(packageString, fromlist=[classString])
                klass = getattr(mod, classString)
                return klass(domainString)
            except ImportError as e:
                print(e)
                logger.error('Unknown domain evaluator "{}" for domain "{}"'.format(configString, domainString))