from typing import List

import spacy

from code.helper import FileWriter, FileReader


def get_tokens(model, sentences: List) -> List[List]:
    '''
    This method get the tokens of a sentence.

    :param model: Which model should be used
    :param sentences: List containing the sentences
    :return: List of token-lists
    '''
    tokens = []
    for sentence in sentences:
        doc = model(sentence)
        tokens += [[token.text for token in doc]]
    return tokens


if __name__ == '__main__':
    '''
    Don't forget to install the german model with:
        $ python3 -m spacy download de_core_news_sm
    '''
    nlp = spacy.load('de_core_news_sm')
    file_sentences = FileReader.get_file_as_list('./corpus/projekt1_corpus.txt')
    file_tokens = get_tokens(nlp, file_sentences)
    FileWriter.write(file_tokens, './tokens/spacy_tokens.txt')