Skip to content
Snippets Groups Projects
Commit 484acdc5 authored by Marc Feger's avatar Marc Feger
Browse files

Add statistics for DBPedia; Add Documentation for it

parent ca0f52bf
No related branches found
No related tags found
No related merge requests found
from flask import Flask, render_template from flask import Flask, render_template
from flask.json import jsonify from flask.json import jsonify
from dbpedia_analyser import get_all_statistics
from src.main import assemble_wikidata_groundtruth_english, assemble_dbpedia_groundtruth_english, \ from src.main import assemble_wikidata_groundtruth_english, assemble_dbpedia_groundtruth_english, \
assemble_wikidata_triples, \ assemble_wikidata_triples, \
assemble_dbpedia_triples, test_if_data_is_enlargeable_for_dbpedia, assemble_similarity_triples assemble_dbpedia_triples, test_if_data_is_enlargeable_for_dbpedia, assemble_similarity_triples
# app = Flask(__name__)
app = Flask(__name__, template_folder='.') app = Flask(__name__, template_folder='.')
@app.route('/dbpedia/statistics')
def statistics():
return jsonify(get_all_statistics())
@app.route('/coreferences') @app.route('/coreferences')
def coreferences(): def coreferences():
return jsonify(assemble_similarity_triples()) return jsonify(assemble_similarity_triples())
......
from tokenize import String
from typing import Dict
from src import DBPEDIA_ENTRYPOINT
from src.lib.decapper import Decapper
from src.lib.engine import SPARQLEngine
from src.lib.keys import ResultKeys
def __get_all_films_with_offset(offset: int = 0) -> String:
"""
This method returns the query to find all movies reachable in DBPedia.
:param offset: Offset to start at.
:return: The query
"""
return """
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT DISTINCT (?movie AS ?Movie)
WHERE {
?movie rdf:type dbo:Film
}
ORDER BY DESC(?movie)
OFFSET
""" + str(offset)
def __get_all_comedy_films_with_offset(offset: int = 0) -> String:
"""
This method returns the query to find all comedy movies reachable in DBPedia.
:param offset: Offset to start at.
:return: The query
"""
return """
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT DISTINCT (?movie AS ?Movie)
WHERE {
?movie a <http://dbpedia.org/ontology/Film> .
?movie dct:subject ?subject.
FILTER( regex(?subject, "comedy","i" ))
}
ORDER BY DESC(?movie)
OFFSET
""" + str(offset)
def __get_all_comedy_films_with_directors_birthday_offset(offset: int = 0) -> String:
"""
This method returns the query to find all comedy movies reachable in DBPedia who have a director with birthday.
:param offset: Offset to start at.
:return: The query
"""
return """
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT DISTINCT (?movie AS ?Movie)
WHERE {
?movie a <http://dbpedia.org/ontology/Film> .
?movie dct:subject ?subject.
FILTER( regex(?subject, "comedy","i" ))
?movie dbo:director [dbo:birthDate ?dob].
}
ORDER BY DESC(?movie)
OFFSET
""" + str(offset)
def __get_all_comedy_films_with_directors_birthday_before_1970_offset(offset: int = 0) -> String:
"""
This method returns the query to find all comedy movies reachable in DBPedia who have a director with birthday before 1970.
:param offset: Offset to start at.
:return: The query
"""
return """
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT DISTINCT (?movie AS ?Movie)
WHERE {
?movie a <http://dbpedia.org/ontology/Film> .
?movie dct:subject ?subject.
FILTER( regex(?subject, "comedy","i" ))
?movie dbo:director [dbo:birthDate ?dob].
FILTER (xsd:integer(substr(xsd:string(?dob), 0, 4)) < 1970).
}
ORDER BY DESC(?movie)
OFFSET
""" + str(offset)
def __get_all_comedy_films_with_directors_birthday_after_or_on_1970_offset(offset: int = 0) -> String:
"""
This method returns the query to find all comedy movies reachable in DBPedia who have a director with birthday on or after 1970.
:param offset: Offset to start at.
:return: The query
"""
return """
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT DISTINCT (?movie AS ?Movie)
WHERE {
?movie a <http://dbpedia.org/ontology/Film> .
?movie dct:subject ?subject.
FILTER( regex(?subject, "comedy","i" ))
?movie dbo:director [dbo:birthDate ?dob].
FILTER (xsd:integer(substr(xsd:string(?dob), 0, 4)) >= 1970).
}
ORDER BY DESC(?movie)
OFFSET
""" + str(offset)
def __get_interlinks_from_dbpedia_to_wikidata() -> String:
"""
This method gets the interlink information from the dbpedia data to the wikidata data.
:return: Information about the interlinks
"""
return """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX dbc: <http://dbpedia.org/resource/Category:>
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX dbp: <http://dbpedia.org/property/>
SELECT
(COUNT(DISTINCT ?movie) AS ?Movie)
(COUNT(DISTINCT ?same) AS ?MovieLink)
(COUNT(DISTINCT ?name) AS ?Title)
(COUNT(DISTINCT ?director) AS ?Director)
(COUNT(DISTINCT ?sameDirector) AS ?DirectorLink)
(COUNT(DISTINCT ?directorName) AS ?DirectorName)
WHERE {
{
SELECT DISTINCT ?movie
WHERE {
SELECT DISTINCT ?movie ?min_year ?max_year
WHERE {
SELECT DISTINCT ?movie ?director (MIN(?year) AS ?min_year) (MAX(?year) AS ?max_year)
WHERE{
# X type Film
{?movie rdf:type dbo:Film.}
# Imagine a inner join
# X subject Comedy
{?movie dct:subject dbc:Comedy}
UNION
# X subject Y; Y like "Comedy".
{?movie dct:subject ?y FILTER CONTAINS(lcase(str(?y)), "comedy").}
UNION
# X genre Y; Y like "Comedy".
{?movie dbo:genre ?y FILTER CONTAINS (lcase(str(?y)), "comedy").}
# X director Person; Person birth date year
{?movie dbo:director ?director.
?director dbo:birthDate ?dob.
BIND(xsd:integer(substr(xsd:string(?dob), 0, 4)) as ?year) .
}
} GROUP BY ?movie ?director
}GROUP BY ?movie HAVING (?min_year = ?max_year)
}GROUP BY ?movie HAVING (?min_year >= 1970)
}
OPTIONAL{?movie foaf:name ?name FILTER(LANG(?name)="en").}
OPTIONAL{
?movie dbo:director ?director.
OPTIONAL{
?director owl:sameAs ?sameDirector FILTER CONTAINS (LCASE(STR(?sameDirector)), 'wikidata.org').
}
}
OPTIONAL{
{?movie dbo:director ?director.
?director rdfs:label ?directorName FILTER(LANG(?directorName)="en").}
}
OPTIONAL{?movie owl:sameAs ?same FILTER CONTAINS (LCASE(STR(?same)), 'wikidata.org').}
}
"""
def get_statistics(entrypoint: String, query: String, info: String) -> Dict:
"""
This methods counts all movies for given query.
:param info: What is the info
:param entrypoint: Entrypoint to use.
:param query: The query to be looked for.
:return: Dict with information of the result
"""
offset = 0
itterateable = True
entities = []
while itterateable:
engine = SPARQLEngine(entrypoint=entrypoint, query=query(offset))
results = engine.get_json_with_query()['results']['bindings']
if len(results) == 0:
itterateable = False
results = [Decapper(result[ResultKeys.movie.value]).unpack() for result in results]
entities += set(results)
offset += 10000
independent = len(set(entities))
return {
'info': info,
'entrypoint': entrypoint,
'independent': independent
}
def get_interlinks(entrypoint: String, query: String, info: String) -> Dict:
"""
This method gets all interlink information of the dbpedia results.
:param info: What is the info
:param entrypoint: Entrypoint to use.
:param query: The query to be looked for.
:return: Dict with information of the result
"""
engine = SPARQLEngine(entrypoint=entrypoint, query=query).get_json_with_query()
return {
'info': info,
'entrypoint': entrypoint,
'movies': int(Decapper(engine['results']['bindings'][0][ResultKeys.movie.value]).unpack()),
'moviesLinked': int(Decapper(engine['results']['bindings'][0][ResultKeys.movie_link.value]).unpack()),
'moviesNames': int(Decapper(engine['results']['bindings'][0][ResultKeys.title.value]).unpack()),
'directors': int(Decapper(engine['results']['bindings'][0][ResultKeys.director.value]).unpack()),
'directorsLinked': int(Decapper(engine['results']['bindings'][0][ResultKeys.director_link.value]).unpack()),
'directorsNames': int(Decapper(engine['results']['bindings'][0]['DirectorName']).unpack())
}
def get_all_statistics() -> Dict:
"""
This method gets all statistics of dbpedia.
:return: Dict with all infos.
"""
return {
'all_films_in_dbpedia': get_statistics(DBPEDIA_ENTRYPOINT, __get_all_films_with_offset, 'all_films_in_dbpedia'),
'all_comedy_films_in_dbpedia': get_statistics(DBPEDIA_ENTRYPOINT, __get_all_comedy_films_with_offset,
'all_comedy_films_in_dbpedia'),
'all_comedy_films_in_dbpedia_with_a_director_who_has_a_birthday':
get_statistics(DBPEDIA_ENTRYPOINT, __get_all_comedy_films_with_directors_birthday_offset,
'all_comedy_films_in_dbpedia_with_a_director_who_has_a_birthday'),
'all_comedy_films_in_dbpedia_with_a_director_who_has_a_birthday_before_1970':
get_statistics(DBPEDIA_ENTRYPOINT, __get_all_comedy_films_with_directors_birthday_before_1970_offset,
'all_comedy_films_in_dbpedia_with_a_director_who_has_a_birthday_before_1970'),
'all_comedy_films_in_dbpedia_with_a_director_who_has_a_birthday_after_or_on_1970':
get_statistics(DBPEDIA_ENTRYPOINT, __get_all_comedy_films_with_directors_birthday_after_or_on_1970_offset,
'all_comedy_films_in_dbpedia_with_a_director_who_has_a_birthday_after_or_on_1970'),
'interlinks_from_dbpedia_to_wikidata': get_interlinks(DBPEDIA_ENTRYPOINT,
__get_interlinks_from_dbpedia_to_wikidata(),
'interlinks_from_dbpedia_to_wikidata')
}
...@@ -39,7 +39,7 @@ ...@@ -39,7 +39,7 @@
window.onload = function() { window.onload = function() {
// Begin Swagger UI call region // Begin Swagger UI call region
const ui = SwaggerUIBundle({ const ui = SwaggerUIBundle({
url: "../static/swagger/api_doc.yaml", url: "../static/swagger/docs.yaml",
dom_id: '#swagger-ui', dom_id: '#swagger-ui',
deepLinking: true, deepLinking: true,
presets: [ presets: [
......
...@@ -18,8 +18,3 @@ class ResultKeys(Enum): ...@@ -18,8 +18,3 @@ class ResultKeys(Enum):
production_company = 'ProductionCompany' production_company = 'ProductionCompany'
production_companies = 'ProductionCompanies' production_companies = 'ProductionCompanies'
line_separator = '|' line_separator = '|'
class TitleKey(Enum):
title = 'Title'
label = 'Label'
...@@ -108,3 +108,19 @@ paths: ...@@ -108,3 +108,19 @@ paths:
type: array type: array
items: items:
type: string type: string
/dbpedia/statistics:
get:
summary: This route returns all information about the interlinks from DBPedia to Wikidata.
tags:
- Coreferences
responses:
200:
description: OK
content:
application/json:
schema:
properties:
type: array
items:
type: string
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment