from typing import List

from pycorenlp import StanfordCoreNLP

from code.helper import FileReader, FileWriter


def get_tokens(sentences: List) -> List[List]:
    '''
    This method get the results of a sentence.

    :param sentences: List containing the sentences
    :return: List of token-lists
    '''
    nlp = StanfordCoreNLP('http://localhost:9000')
    properties = {
        'annotators': 'tokenize',
        'outputFormat': 'json'
    }
    tokens = []
    for sentence in sentences:
        result = nlp.annotate(text=sentence, properties=properties)
        current_tokens = []
        for word in result.get('tokens'):
            current_tokens = current_tokens + [word.get('word')]
        tokens = tokens + [current_tokens]
    return tokens


if __name__ == '__main__':
    file_sentences = FileReader.get_file_as_list('./corpus/projekt1_corpus.txt')
    file_tokens = get_tokens(file_sentences)
    FileWriter.write(file_tokens, './results/corenlp_tokens.txt')