###############################################################################
# PyDial: Multi-domain Statistical Spoken Dialogue System Software
###############################################################################
#
# Copyright 2015 - 2019
# Cambridge University Engineering Department Dialogue Systems Group
#
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
###############################################################################

'''
RuleTopicTrackers.py - Rule based topic trackers 
==========================================================================

Copyright CUED Dialogue Systems Group 2015 - 2017

.. seealso:: CUED Imports/Dependencies: 

    import :mod:`utils.Settings` |.|
    import :mod:`utils.ContextLogger` |.|
    import :mod:`ontology.OntologyUtils` |.|

************************

'''
__author__ = "cued_dialogue_systems_group"

'''
    Modifications History
    ===============================
    Date        Author  Description
    ===============================
    Jul 20 2016 lmr46   Inferring only the domains configured in the config-file
                        Note that keywords for domains are set in the dictionary here (handcoded)
                        TODO: What happen when the same keyword apply for different domains?
'''
from utils import Settings, ContextLogger
from ontology import OntologyUtils
logger = ContextLogger.getLogger('')

class TopicTrackerInterface(object):
    """Template for any Topic Tracker for the cued-python system
    
    .. Note:
            To dynamically load a class, the __init__() must take one argument: domainString.
    """
    def infer_domain(self,userActHyps=None):
        pass  # Define in actual class. Must set and also return self.current_tracking_result
    
    def restart(self):
        pass  # Define in actual class. May be some notion of state etc to be reset in more advanced topic trackers


class TextBasedSwitchTopicTracker(TopicTrackerInterface):
    """When using texthub, you can enter: switch("CamRestaurants")  which will change domains to CamRestaurants for example.
    -- if switch("XX") not entered, assumes you want to stay in domain of previous turn
    """
    def __init__(self):
        self.restart()
        
    def restart(self):
        self.current_tracking_result = None
        self.FOUND_DOMAIN = False

    def infer_domain(self, userActHyps=None):
        """userActHyps : [(text, prob)]
        """
        if 'switch("' in userActHyps[0][0]:
            candidateDomain = userActHyps[0][0].split('"')[1]  # a little fragile -  
            if candidateDomain in OntologyUtils.available_domains:
                self.current_tracking_result = candidateDomain
                self.FOUND_DOMAIN = True
            else:
                logger.warning("Not a valid domain tag in your switch('X') command - remain with previous domain")
        elif not self.FOUND_DOMAIN:
            msg = '\nSWITCH TOPIC TRACKER USAGE: When using the texthub switch topic tracker '
            msg += '-You should start by saying which domain to switch to.\n' 
            msg += 'Enter exactly (where DOMAINTAG is CamRestaurants,Laptops6 etc): switch("DOMAINTAG")\n'
            msg += 'You can continue on directly by entering for example: switch("DOMAINTAG")i want a cheap one\n'
            msg += 'Alternatively, use a different topic tracker.'
            exit(msg)
        else:
            logger.info('Switch("DOMAINTAG") not detected - staying with previous domain')
        return self.current_tracking_result 


class KeywordSpottingTopicTracker(TopicTrackerInterface):
    """ Just a hacky topic tracker to develop voicehub with. 
    :: Assumptions/Notes
    -- To resolve resturants and hotels will also have to spot location
    -- Assume we will stick with last domain unless we detect one of our keywords
    """
    def __init__(self):
        self.current_tracking_result = None
        self.keywords = dict.fromkeys(OntologyUtils.available_domains, None)
        #lmr46: added some keywords or lexical units ('food')
        #consider to have a Lexicon that groups words per concepts, there are available lexica for English
        #lmr46: Adapting only the domains available in the config file
        domains = Settings.config.get("GENERAL",'domains') # a Hub has checked this exists
        possible_domains = domains.split(',')
        for dom in possible_domains:
            kwds=[]
            if dom=="CamRestaurants":
                kwds=["cambridge","restaurant",'food','eat']
            elif dom=="CamHotels":
                kwds=["cambridge","hotel", "guest house", "guesthouse"]
            elif dom=="SFRestaurants":
                kwds=["san francisco","restaurant", "food","place to eat"]
            elif dom=="SFHotels":
                kwds=["san francisco","hotel", "guest house", "guesthouse", "hostel", "motel", "place to stay"]
            elif dom=="wikipedia":
                kwds=["wiki"]

            self.keywords[dom]=kwds
        # self.keywords["CamRestaurants"] = ["cambridge","restaurant",'food']
        # self.keywords["CamHotels"] = ["cambridge","hotel", "guest house", "guesthouse"]
        # self.keywords["SFRestaurants"] = ["san francisco","restaurant", "food","book"]   # ASR cant recognise much at present -- will develop
        #             # system using CamRestaurants and CamHotels
        # self.keywords["SFHotels"] = ["san francisco","hotel", "guest house", "guesthouse", "hostel", "motel", "book"]
        # self.keywords["wikipedia"] = ["wiki"] # this could be used like "OK Google" or "Alexa"

    def restart(self):
        self.current_tracking_result = None

    def infer_domain(self,userActHyps=None):
        """
        -- Assumptions: Only working with the top hypothesis from ASR
        --              Stick to last domain if nothing spotted in this turn
        --              ORDER IS IMPORTANT -- ie it will hand off to FIRST domain a keyword is spotted in
        """
        # TODO - could require all keywords to be present - e.g to disambiguate cam hotels from SFHotels
        #lmr46: allowing overlapping keywords between domains
        #su259: making current_tracking_result a local variable. method returns none if no new domain has been identified.
        
        current_tracking_result = None
        
        overlappindomains=[]
        for dstring in list(self.keywords.keys()):
            if self._is_a_keyword_in_sentence(self.keywords[dstring],userActHyps[0][0]):
                logger.info(dstring + " keyword found in: " + userActHyps[0][0])
                if "i(=" in userActHyps[0][0] or "inform(query=" in userActHyps[0][0]:
                    current_tracking_result = "wikipedia"  # this is just a hack so i can wiki things like hotels!
                else:
                    overlappindomains.append(dstring)
                    current_tracking_result = dstring                    
                #break  #TODO: Not handling overlapping of keywords between domains - it has to disambiguate!!!

        if len(overlappindomains) > 1:
            current_tracking_result = None

        return current_tracking_result

    def _is_a_keyword_in_sentence(self,keywords, sentence):
        """Note keywords just use the first spotted one ... this needs to be a little more sophisticated to resolve
        SF hotel versus Cambridge hotel
        """
        #TODO - will need changing if/when ASR is good enough to decode LOCATIONS - so that a match will require e.g
        # "CAMBRIDGE" + "RESTAURANT" to count for TT domain
        if keywords is not None:
            for keyword in keywords:
                if keyword in sentence.lower():
                    return True
        return False





#END OF FILE