Select Git revision
build.gradle
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
dbpedia_analyser.py 10.53 KiB
from tokenize import String
from typing import Dict
from src import DBPEDIA_ENTRYPOINT
from src.lib.decapper import Decapper
from src.lib.engine import SPARQLEngine
from src.lib.keys import ResultKeys
def __get_all_films_with_offset(offset: int = 0) -> String:
"""
This method returns the query to find all movies reachable in DBPedia.
:param offset: Offset to start at.
:return: The query
"""
return """
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT DISTINCT (?movie AS ?Movie)
WHERE {
?movie rdf:type dbo:Film
}
ORDER BY DESC(?movie)
OFFSET
""" + str(offset)
def __get_all_comedy_films_with_offset(offset: int = 0) -> String:
"""
This method returns the query to find all comedy movies reachable in DBPedia.
:param offset: Offset to start at.
:return: The query
"""
return """
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT DISTINCT (?movie AS ?Movie)
WHERE {
?movie a <http://dbpedia.org/ontology/Film> .
?movie dct:subject ?subject.
FILTER( regex(?subject, "comedy","i" ))
}
ORDER BY DESC(?movie)
OFFSET
""" + str(offset)
def __get_all_comedy_films_with_directors_birthday_offset(offset: int = 0) -> String:
"""
This method returns the query to find all comedy movies reachable in DBPedia who have a director with birthday.
:param offset: Offset to start at.
:return: The query
"""
return """
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT DISTINCT (?movie AS ?Movie)
WHERE {
?movie a <http://dbpedia.org/ontology/Film> .
?movie dct:subject ?subject.
FILTER( regex(?subject, "comedy","i" ))
?movie dbo:director [dbo:birthDate ?dob].
}
ORDER BY DESC(?movie)
OFFSET
""" + str(offset)
def __get_all_comedy_films_with_directors_birthday_before_1970_offset(offset: int = 0) -> String:
"""
This method returns the query to find all comedy movies reachable in DBPedia who have a director with birthday before 1970.
:param offset: Offset to start at.
:return: The query
"""
return """
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT DISTINCT (?movie AS ?Movie)
WHERE {
?movie a <http://dbpedia.org/ontology/Film> .
?movie dct:subject ?subject.
FILTER( regex(?subject, "comedy","i" ))
?movie dbo:director [dbo:birthDate ?dob].
FILTER (xsd:integer(substr(xsd:string(?dob), 0, 4)) < 1970).
}
ORDER BY DESC(?movie)
OFFSET
""" + str(offset)
def __get_all_comedy_films_with_directors_birthday_after_or_on_1970_offset(offset: int = 0) -> String:
"""
This method returns the query to find all comedy movies reachable in DBPedia who have a director with birthday on or after 1970.
:param offset: Offset to start at.
:return: The query
"""
return """
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT DISTINCT (?movie AS ?Movie)
WHERE {
?movie a <http://dbpedia.org/ontology/Film> .
?movie dct:subject ?subject.
FILTER( regex(?subject, "comedy","i" ))
?movie dbo:director [dbo:birthDate ?dob].
FILTER (xsd:integer(substr(xsd:string(?dob), 0, 4)) >= 1970).
}
ORDER BY DESC(?movie)
OFFSET
""" + str(offset)
def __get_interlinks_from_dbpedia_to_wikidata() -> String:
"""
This method gets the interlink information from the dbpedia data to the wikidata data.
:return: Information about the interlinks
"""
return """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX dbc: <http://dbpedia.org/resource/Category:>
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX dbp: <http://dbpedia.org/property/>
SELECT
(COUNT(DISTINCT ?movie) AS ?Movie)
(COUNT(DISTINCT ?same) AS ?MovieLink)
(COUNT(DISTINCT ?name) AS ?Title)
(COUNT(DISTINCT ?director) AS ?Director)
(COUNT(DISTINCT ?sameDirector) AS ?DirectorLink)
(COUNT(DISTINCT ?directorName) AS ?DirectorName)
WHERE {
{
SELECT DISTINCT ?movie
WHERE {
SELECT DISTINCT ?movie ?min_year ?max_year
WHERE {
SELECT DISTINCT ?movie ?director (MIN(?year) AS ?min_year) (MAX(?year) AS ?max_year)
WHERE{
# X type Film
{?movie rdf:type dbo:Film.}
# Imagine a inner join
# X subject Comedy
{?movie dct:subject dbc:Comedy}
UNION
# X subject Y; Y like "Comedy".
{?movie dct:subject ?y FILTER CONTAINS(lcase(str(?y)), "comedy").}
UNION
# X genre Y; Y like "Comedy".
{?movie dbo:genre ?y FILTER CONTAINS (lcase(str(?y)), "comedy").}
# X director Person; Person birth date year
{?movie dbo:director ?director.
?director dbo:birthDate ?dob.
BIND(xsd:integer(substr(xsd:string(?dob), 0, 4)) as ?year) .
}
} GROUP BY ?movie ?director
}GROUP BY ?movie HAVING (?min_year = ?max_year)
}GROUP BY ?movie HAVING (?min_year >= 1970)
}
OPTIONAL{?movie foaf:name ?name FILTER(LANG(?name)="en").}
OPTIONAL{
?movie dbo:director ?director.
OPTIONAL{
?director owl:sameAs ?sameDirector FILTER CONTAINS (LCASE(STR(?sameDirector)), 'wikidata.org').
}
}
OPTIONAL{
{?movie dbo:director ?director.
?director rdfs:label ?directorName FILTER(LANG(?directorName)="en").}
}
OPTIONAL{?movie owl:sameAs ?same FILTER CONTAINS (LCASE(STR(?same)), 'wikidata.org').}
}
"""
def get_statistics(entrypoint: String, query: String, info: String) -> Dict:
"""
This methods counts all movies for given query.
:param info: What is the info
:param entrypoint: Entrypoint to use.
:param query: The query to be looked for.
:return: Dict with information of the result
"""
offset = 0
itterateable = True
entities = []
while itterateable:
engine = SPARQLEngine(entrypoint=entrypoint, query=query(offset))
results = engine.get_json_with_query()['results']['bindings']
if len(results) == 0:
itterateable = False
results = [Decapper(result[ResultKeys.movie.value]).unpack() for result in results]
entities += set(results)
offset += 10000
independent = len(set(entities))
return {
'info': info,
'entrypoint': entrypoint,
'independent': independent
}
def get_interlinks(entrypoint: String, query: String, info: String) -> Dict:
"""
This method gets all interlink information of the dbpedia results.
:param info: What is the info
:param entrypoint: Entrypoint to use.
:param query: The query to be looked for.
:return: Dict with information of the result
"""
engine = SPARQLEngine(entrypoint=entrypoint, query=query).get_json_with_query()
return {
'info': info,
'entrypoint': entrypoint,
'movies': int(Decapper(engine['results']['bindings'][0][ResultKeys.movie.value]).unpack()),
'moviesLinked': int(Decapper(engine['results']['bindings'][0][ResultKeys.movie_link.value]).unpack()),
'moviesNames': int(Decapper(engine['results']['bindings'][0][ResultKeys.title.value]).unpack()),
'directors': int(Decapper(engine['results']['bindings'][0][ResultKeys.director.value]).unpack()),
'directorsLinked': int(Decapper(engine['results']['bindings'][0][ResultKeys.director_link.value]).unpack()),
'directorsNames': int(Decapper(engine['results']['bindings'][0]['DirectorName']).unpack())
}
def get_all_statistics() -> Dict:
"""
This method gets all statistics of dbpedia.
:return: Dict with all infos.
"""
return {
'all_films_in_dbpedia': get_statistics(DBPEDIA_ENTRYPOINT, __get_all_films_with_offset, 'all_films_in_dbpedia'),
'all_comedy_films_in_dbpedia': get_statistics(DBPEDIA_ENTRYPOINT, __get_all_comedy_films_with_offset,
'all_comedy_films_in_dbpedia'),
'all_comedy_films_in_dbpedia_with_a_director_who_has_a_birthday':
get_statistics(DBPEDIA_ENTRYPOINT, __get_all_comedy_films_with_directors_birthday_offset,
'all_comedy_films_in_dbpedia_with_a_director_who_has_a_birthday'),
'all_comedy_films_in_dbpedia_with_a_director_who_has_a_birthday_before_1970':
get_statistics(DBPEDIA_ENTRYPOINT, __get_all_comedy_films_with_directors_birthday_before_1970_offset,
'all_comedy_films_in_dbpedia_with_a_director_who_has_a_birthday_before_1970'),
'all_comedy_films_in_dbpedia_with_a_director_who_has_a_birthday_after_or_on_1970':
get_statistics(DBPEDIA_ENTRYPOINT, __get_all_comedy_films_with_directors_birthday_after_or_on_1970_offset,
'all_comedy_films_in_dbpedia_with_a_director_who_has_a_birthday_after_or_on_1970'),
'interlinks_from_dbpedia_to_wikidata': get_interlinks(DBPEDIA_ENTRYPOINT,
__get_interlinks_from_dbpedia_to_wikidata(),
'interlinks_from_dbpedia_to_wikidata')
}