from tokenize import String from typing import Dict from src import DBPEDIA_ENTRYPOINT from src.lib.decapper import Decapper from src.lib.engine import SPARQLEngine from src.lib.keys import ResultKeys def __get_all_films_with_offset(offset: int = 0) -> String: """ This method returns the query to find all movies reachable in DBPedia. :param offset: Offset to start at. :return: The query """ return """ PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT DISTINCT (?movie AS ?Movie) WHERE { ?movie rdf:type dbo:Film } ORDER BY DESC(?movie) OFFSET """ + str(offset) def __get_all_comedy_films_with_offset(offset: int = 0) -> String: """ This method returns the query to find all comedy movies reachable in DBPedia. :param offset: Offset to start at. :return: The query """ return """ PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT DISTINCT (?movie AS ?Movie) WHERE { ?movie a <http://dbpedia.org/ontology/Film> . ?movie dct:subject ?subject. FILTER( regex(?subject, "comedy","i" )) } ORDER BY DESC(?movie) OFFSET """ + str(offset) def __get_all_comedy_films_with_directors_birthday_offset(offset: int = 0) -> String: """ This method returns the query to find all comedy movies reachable in DBPedia who have a director with birthday. :param offset: Offset to start at. :return: The query """ return """ PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT DISTINCT (?movie AS ?Movie) WHERE { ?movie a <http://dbpedia.org/ontology/Film> . ?movie dct:subject ?subject. FILTER( regex(?subject, "comedy","i" )) ?movie dbo:director [dbo:birthDate ?dob]. } ORDER BY DESC(?movie) OFFSET """ + str(offset) def __get_all_comedy_films_with_directors_birthday_before_1970_offset(offset: int = 0) -> String: """ This method returns the query to find all comedy movies reachable in DBPedia who have a director with birthday before 1970. :param offset: Offset to start at. :return: The query """ return """ PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT DISTINCT (?movie AS ?Movie) WHERE { ?movie a <http://dbpedia.org/ontology/Film> . ?movie dct:subject ?subject. FILTER( regex(?subject, "comedy","i" )) ?movie dbo:director [dbo:birthDate ?dob]. FILTER (xsd:integer(substr(xsd:string(?dob), 0, 4)) < 1970). } ORDER BY DESC(?movie) OFFSET """ + str(offset) def __get_all_comedy_films_with_directors_birthday_after_or_on_1970_offset(offset: int = 0) -> String: """ This method returns the query to find all comedy movies reachable in DBPedia who have a director with birthday on or after 1970. :param offset: Offset to start at. :return: The query """ return """ PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT DISTINCT (?movie AS ?Movie) WHERE { ?movie a <http://dbpedia.org/ontology/Film> . ?movie dct:subject ?subject. FILTER( regex(?subject, "comedy","i" )) ?movie dbo:director [dbo:birthDate ?dob]. FILTER (xsd:integer(substr(xsd:string(?dob), 0, 4)) >= 1970). } ORDER BY DESC(?movie) OFFSET """ + str(offset) def __get_interlinks_from_dbpedia_to_wikidata() -> String: """ This method gets the interlink information from the dbpedia data to the wikidata data. :return: Information about the interlinks """ return """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX dbc: <http://dbpedia.org/resource/Category:> PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX dct: <http://purl.org/dc/terms/> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> PREFIX foaf: <http://xmlns.com/foaf/0.1/> PREFIX dbp: <http://dbpedia.org/property/> SELECT (COUNT(DISTINCT ?movie) AS ?Movie) (COUNT(DISTINCT ?same) AS ?MovieLink) (COUNT(DISTINCT ?name) AS ?Title) (COUNT(DISTINCT ?director) AS ?Director) (COUNT(DISTINCT ?sameDirector) AS ?DirectorLink) (COUNT(DISTINCT ?directorName) AS ?DirectorName) WHERE { { SELECT DISTINCT ?movie WHERE { SELECT DISTINCT ?movie ?min_year ?max_year WHERE { SELECT DISTINCT ?movie ?director (MIN(?year) AS ?min_year) (MAX(?year) AS ?max_year) WHERE{ # X type Film {?movie rdf:type dbo:Film.} # Imagine a inner join # X subject Comedy {?movie dct:subject dbc:Comedy} UNION # X subject Y; Y like "Comedy". {?movie dct:subject ?y FILTER CONTAINS(lcase(str(?y)), "comedy").} UNION # X genre Y; Y like "Comedy". {?movie dbo:genre ?y FILTER CONTAINS (lcase(str(?y)), "comedy").} # X director Person; Person birth date year {?movie dbo:director ?director. ?director dbo:birthDate ?dob. BIND(xsd:integer(substr(xsd:string(?dob), 0, 4)) as ?year) . } } GROUP BY ?movie ?director }GROUP BY ?movie HAVING (?min_year = ?max_year) }GROUP BY ?movie HAVING (?min_year >= 1970) } OPTIONAL{?movie foaf:name ?name FILTER(LANG(?name)="en").} OPTIONAL{ ?movie dbo:director ?director. OPTIONAL{ ?director owl:sameAs ?sameDirector FILTER CONTAINS (LCASE(STR(?sameDirector)), 'wikidata.org'). } } OPTIONAL{ {?movie dbo:director ?director. ?director rdfs:label ?directorName FILTER(LANG(?directorName)="en").} } OPTIONAL{?movie owl:sameAs ?same FILTER CONTAINS (LCASE(STR(?same)), 'wikidata.org').} } """ def get_statistics(entrypoint: String, query: String, info: String) -> Dict: """ This methods counts all movies for given query. :param info: What is the info :param entrypoint: Entrypoint to use. :param query: The query to be looked for. :return: Dict with information of the result """ offset = 0 itterateable = True entities = [] while itterateable: engine = SPARQLEngine(entrypoint=entrypoint, query=query(offset)) results = engine.get_json_with_query()['results']['bindings'] if len(results) == 0: itterateable = False results = [Decapper(result[ResultKeys.movie.value]).unpack() for result in results] entities += set(results) offset += 10000 independent = len(set(entities)) return { 'info': info, 'entrypoint': entrypoint, 'independent': independent } def get_interlinks(entrypoint: String, query: String, info: String) -> Dict: """ This method gets all interlink information of the dbpedia results. :param info: What is the info :param entrypoint: Entrypoint to use. :param query: The query to be looked for. :return: Dict with information of the result """ engine = SPARQLEngine(entrypoint=entrypoint, query=query).get_json_with_query() return { 'info': info, 'entrypoint': entrypoint, 'movies': int(Decapper(engine['results']['bindings'][0][ResultKeys.movie.value]).unpack()), 'moviesLinked': int(Decapper(engine['results']['bindings'][0][ResultKeys.movie_link.value]).unpack()), 'moviesNames': int(Decapper(engine['results']['bindings'][0][ResultKeys.title.value]).unpack()), 'directors': int(Decapper(engine['results']['bindings'][0][ResultKeys.director.value]).unpack()), 'directorsLinked': int(Decapper(engine['results']['bindings'][0][ResultKeys.director_link.value]).unpack()), 'directorsNames': int(Decapper(engine['results']['bindings'][0]['DirectorName']).unpack()) } def get_all_statistics() -> Dict: """ This method gets all statistics of dbpedia. :return: Dict with all infos. """ return { 'all_films_in_dbpedia': get_statistics(DBPEDIA_ENTRYPOINT, __get_all_films_with_offset, 'all_films_in_dbpedia'), 'all_comedy_films_in_dbpedia': get_statistics(DBPEDIA_ENTRYPOINT, __get_all_comedy_films_with_offset, 'all_comedy_films_in_dbpedia'), 'all_comedy_films_in_dbpedia_with_a_director_who_has_a_birthday': get_statistics(DBPEDIA_ENTRYPOINT, __get_all_comedy_films_with_directors_birthday_offset, 'all_comedy_films_in_dbpedia_with_a_director_who_has_a_birthday'), 'all_comedy_films_in_dbpedia_with_a_director_who_has_a_birthday_before_1970': get_statistics(DBPEDIA_ENTRYPOINT, __get_all_comedy_films_with_directors_birthday_before_1970_offset, 'all_comedy_films_in_dbpedia_with_a_director_who_has_a_birthday_before_1970'), 'all_comedy_films_in_dbpedia_with_a_director_who_has_a_birthday_after_or_on_1970': get_statistics(DBPEDIA_ENTRYPOINT, __get_all_comedy_films_with_directors_birthday_after_or_on_1970_offset, 'all_comedy_films_in_dbpedia_with_a_director_who_has_a_birthday_after_or_on_1970'), 'interlinks_from_dbpedia_to_wikidata': get_interlinks(DBPEDIA_ENTRYPOINT, __get_interlinks_from_dbpedia_to_wikidata(), 'interlinks_from_dbpedia_to_wikidata') }