Skip to content
Snippets Groups Projects
Commit e7e0aa41 authored by Marc Feger's avatar Marc Feger
Browse files

Add submission 3

parent 91e68521
Branches
No related tags found
No related merge requests found
...@@ -3,7 +3,7 @@ from flask.json import jsonify ...@@ -3,7 +3,7 @@ from flask.json import jsonify
from src.main import assemble_wikidata_groundtruth_english, assemble_dbpedia_groundtruth_english, \ from src.main import assemble_wikidata_groundtruth_english, assemble_dbpedia_groundtruth_english, \
assemble_wikidata_triples, \ assemble_wikidata_triples, \
assemble_dbpedia_triples, test_if_data_is_enlargeable_for_dbpedia assemble_dbpedia_triples, test_if_data_is_enlargeable_for_dbpedia, assemble_similarity_triples
app = Flask(__name__) app = Flask(__name__)
...@@ -12,6 +12,11 @@ app = Flask(__name__) ...@@ -12,6 +12,11 @@ app = Flask(__name__)
# Todo: Füge eine Route ein die die zusätlichen Werte in die Triple-Dateien einbringt. # Todo: Füge eine Route ein die die zusätlichen Werte in die Triple-Dateien einbringt.
# Todo: Es soll vorher immer erst der Grunddatensatz erzeugt werden und dann die zusätzlichen Informationen ergänzt werden. # Todo: Es soll vorher immer erst der Grunddatensatz erzeugt werden und dann die zusätzlichen Informationen ergänzt werden.
@app.route('/coreferences')
def coreferences():
return jsonify(assemble_similarity_triples())
@app.route('/dbpedia/collect/enlargeable') @app.route('/dbpedia/collect/enlargeable')
def dbpedia_data_can_enlarge(): def dbpedia_data_can_enlarge():
return jsonify(test_if_data_is_enlargeable_for_dbpedia()) return jsonify(test_if_data_is_enlargeable_for_dbpedia())
......
...@@ -3,8 +3,10 @@ from enum import Enum ...@@ -3,8 +3,10 @@ from enum import Enum
class ResultKeys(Enum): class ResultKeys(Enum):
movie = 'Movie' movie = 'Movie'
movie_link = 'MovieLink'
title = 'Title' title = 'Title'
director = 'Director' director = 'Director'
director_link = 'DirectorLink'
author = 'Author' author = 'Author'
cast = 'Cast' cast = 'Cast'
published = 'Published' published = 'Published'
......
...@@ -256,3 +256,47 @@ def assemble_dbpedia_triples() -> List: ...@@ -256,3 +256,47 @@ def assemble_dbpedia_triples() -> List:
value=production_companies).as_ete_triple()] value=production_companies).as_ete_triple()]
FileWriter(destination='static/dbpedia/dbpedia.txt', data=triples).as_string(new_line=True) FileWriter(destination='static/dbpedia/dbpedia.txt', data=triples).as_string(new_line=True)
return triples return triples
def assemble_similarity_triples() -> List:
"""
This method assembles the similarity between the wikidata and the dbpedia datase.
This will be done in two instances.
First level:
-> Take a look the owl:sameAs
Second level:
-> Take a look if two instances have the same name and the same director/directors
:return: None
"""
dbpedia_data = FileReader(source='static/dbpedia/dbpedia_groundtruth.txt').as_json()
wikidata_data = FileReader(source='static/wikidata/wikidata_groundtruth.txt').as_json()
triples = []
for wiki in wikidata_data:
wiki_movie = Decapper(wiki[ResultKeys.movie.value]).unpack()
for dbpedia in dbpedia_data:
if Decapper(wiki[ResultKeys.movie.value]).unpack() == \
Decapper(dbpedia[ResultKeys.movie_link.value]).unpack():
triples += [Statement(subject=wiki_movie,
predicate='http://www.w3.org/2002/07/owl#sameAs',
value=Decapper(dbpedia[ResultKeys.movie.value]).unpack()
).as_ete_triple()
]
break
if Decapper(wiki[ResultKeys.title.value]).unpack() == Decapper(dbpedia[ResultKeys.title.value]).unpack():
wiki_set = set(
Decapper(wiki[ResultKeys.director.value]).unpack().split(ResultKeys.line_separator.value))
dbpedia_set = set(
Decapper(dbpedia[ResultKeys.director_link.value]).unpack().split(ResultKeys.line_separator.value))
if len(wiki_set & dbpedia_set) > 0:
triples += [Statement(subject=wiki_movie,
predicate='http://www.w3.org/2002/07/owl#sameAs',
value=Decapper(dbpedia[ResultKeys.movie.value]).unpack()
).as_ete_triple()
]
break
FileWriter(destination='static/coreferences/coreferences.txt', data=triples).as_string(new_line=True)
return triples
This diff is collapsed.
...@@ -15,10 +15,13 @@ SELECT DISTINCT (?movie AS ?Movie) ...@@ -15,10 +15,13 @@ SELECT DISTINCT (?movie AS ?Movie)
(GROUP_CONCAT(DISTINCT ?releaseDate;separator="|") AS ?Published) (GROUP_CONCAT(DISTINCT ?releaseDate;separator="|") AS ?Published)
(GROUP_CONCAT(DISTINCT ?subject;separator="|") AS ?Subject) (GROUP_CONCAT(DISTINCT ?subject;separator="|") AS ?Subject)
(GROUP_CONCAT(DISTINCT ?genre;separator="|") AS ?Genre) (GROUP_CONCAT(DISTINCT ?genre;separator="|") AS ?Genre)
(GROUP_CONCAT(DISTINCT ?duration/60;separator="|") AS ?Duration) (GROUP_CONCAT(DISTINCT ?duration/60;separator="|") AS ?Duration) # todo: change to original value
(GROUP_CONCAT(DISTINCT ?abstract;separator="|") AS ?Description) (GROUP_CONCAT(DISTINCT ?abstract;separator="|") AS ?Description)
(GROUP_CONCAT(DISTINCT ?distributor;separator="|") AS ?Distributor) (GROUP_CONCAT(DISTINCT ?distributor;separator="|") AS ?Distributor)
(GROUP_CONCAT(DISTINCT ?productionCompanies;separator="|") AS ?ProductionCompanies) (GROUP_CONCAT(DISTINCT ?productionCompanies;separator="|") AS ?ProductionCompanies)
(GROUP_CONCAT(DISTINCT ?sameDirector;separator="|") AS ?DirectorLink)
(GROUP_CONCAT(DISTINCT ?sameMovie;separator="|") AS ?MovieLink)
WHERE { WHERE {
{ {
SELECT DISTINCT ?movie SELECT DISTINCT ?movie
...@@ -50,7 +53,13 @@ WHERE { ...@@ -50,7 +53,13 @@ WHERE {
}GROUP BY ?movie HAVING (?min_year >= 1970) }GROUP BY ?movie HAVING (?min_year >= 1970)
} }
OPTIONAL{?movie foaf:name ?name FILTER(LANG(?name)="en").} OPTIONAL{?movie foaf:name ?name FILTER(LANG(?name)="en").}
OPTIONAL{?movie dbo:director ?director.} OPTIONAL{?movie owl:sameAs ?sameMovie FILTER CONTAINS (LCASE(STR(?sameMovie)), 'wikidata.org').}
OPTIONAL{
?movie dbo:director ?director.
OPTIONAL{
?director owl:sameAs ?sameDirector FILTER CONTAINS (LCASE(STR(?sameDirector)), 'wikidata.org').
}
}
OPTIONAL{?movie dbo:author ?author.} OPTIONAL{?movie dbo:author ?author.}
OPTIONAL{?movie dbo:starring ?cast_member.} OPTIONAL{?movie dbo:starring ?cast_member.}
OPTIONAL{?movie dbo:releaseDate ?releaseDate.} OPTIONAL{?movie dbo:releaseDate ?releaseDate.}
......
This diff is collapsed.
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment