Skip to content
Snippets Groups Projects
Commit e8208db0 authored by Marc Feger's avatar Marc Feger
Browse files

Remove route to collect seperate titles

parent b9f833cc
No related branches found
No related tags found
No related merge requests found
......@@ -3,7 +3,7 @@ from flask.json import jsonify
from src.main import assemble_wikidata_groundtruth_english, assemble_dbpedia_groundtruth_english, \
assemble_wikidata_triples, \
assemble_dbpedia_triples, collect_wikidata_results_with_title, test_if_data_is_enlargeable_for_dbpedia
assemble_dbpedia_triples, test_if_data_is_enlargeable_for_dbpedia
app = Flask(__name__)
......@@ -17,11 +17,6 @@ def dbpedia_data_can_enlarge():
return jsonify(test_if_data_is_enlargeable_for_dbpedia())
@app.route('/wikidata/collect/title')
def wikidata_collect_title():
return jsonify(collect_wikidata_results_with_title())
@app.route('/dbpedia/n3')
def dbpedia_n3():
return jsonify(assemble_dbpedia_triples())
......
......@@ -254,55 +254,3 @@ def assemble_dbpedia_triples() -> List:
predicate=dbp + 'productionCompanies',
value=production_companies).as_ete_triple()]
return triples
def collect_wikidata_results_with_title() -> Dict:
"""
This method collects the wikidata results with a Title in different languages.
Therefore rdfs:label and wdt:title are used.
:warning: Takes a long time.
:return: The List of all titles grouped in a dict with the corresponding film.
"""
data = FileReader(source='static/wikidata_groundtruth.txt').as_json()
titles = {}
progress = 0
print('[GET] Collect Wikidata film labels and titles in different languages: ')
print(str('\t') + str(progress) + ' of ' + str(len(data)))
for result in data:
movie_id = Decapper(result[ResultKeys.movie.value]).unpack()
query_label = FileReader(source='static/wikidata_title.sparql') \
.as_string() \
.replace('subject', '<' + movie_id + '>') \
.replace('predicate', 'rdfs:label') \
.replace('value', 'Label')
engine_label = SPARQLEngine(entrypoint=WIKIDATA_ENTRYPOINT, query=query_label)
query_title = FileReader(source='static/wikidata_title.sparql') \
.as_string() \
.replace('subject', '<' + movie_id + '>') \
.replace('predicate', 'wdt:P1476') \
.replace('value', 'Title')
engine_title = SPARQLEngine(entrypoint=WIKIDATA_ENTRYPOINT, query=query_title)
titles[movie_id] = {
'Labels': engine_label.get_json_with_query()["results"]["bindings"],
'Titles': engine_title.get_json_with_query()["results"]["bindings"]
}
progress += 1
if progress % 100 == 0:
print(str('\t') + str(progress) + ' of ' + str(len(data)))
print('Done')
print('[WRITE] Data to static/wikidata_titles_and_labels.txt')
writer = FileWriter(destination='static/wikidata_titles_and_labels.txt', data=titles)
writer.as_filtered_json()
print('Done')
return titles
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX schema: <http://schema.org/>
SELECT DISTINCT ?value
WHERE{
OPTIONAL {subject predicate ?value.}
}
\ No newline at end of file
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment