Skip to content
Snippets Groups Projects
Commit 5ebf4eb2 authored by Marc Feger's avatar Marc Feger
Browse files

Add all additional information for wikidata title and label

parent 5962eeef
No related branches found
No related tags found
No related merge requests found
from flask import Flask, Response
from flask import Flask
from flask.json import jsonify
from src.main import assemble_wikidata_groundtruth, assemble_dbpedia_groundtruth, assemble_wikidata_triples, \
assemble_dbpedia_triples
from src.main import assemble_wikidata_groundtruth_english, assemble_dbpedia_groundtruth_english, \
assemble_wikidata_triples, \
assemble_dbpedia_triples, collect_wikidata_results_with_title
app = Flask(__name__)
@app.route('/wikidata/collect/title')
def wikidata_collect_title():
return jsonify(collect_wikidata_results_with_title())
@app.route('/dbpedia/n3')
def dbpedia_n3():
return jsonify(assemble_dbpedia_triples())
......@@ -17,14 +23,14 @@ def wikidata_n3():
return jsonify(assemble_wikidata_triples())
@app.route('/wikidata/groundtruth')
@app.route('/wikidata/groundtruth/english')
def wikidata_groundtruth():
return jsonify(assemble_wikidata_groundtruth())
return jsonify(assemble_wikidata_groundtruth_english())
@app.route('/dbpedia/groundtruth')
@app.route('/dbpedia/groundtruth/english')
def dbpedia_groundtruth():
return jsonify(assemble_dbpedia_groundtruth())
return jsonify(assemble_dbpedia_groundtruth_english())
@app.route('/')
......
......@@ -33,7 +33,7 @@ class SPARQLEngine(object):
self.results = self.engine.query().convert()
return self.results
def get_json_test(self) -> Dict:
def get_json_with_query(self) -> Dict:
"""
This method returns the results as JSON.
......
......@@ -23,6 +23,15 @@ class FileWriter(object):
with open(self.destination, 'w+') as outfile:
json.dump(self.data["results"]["bindings"], outfile)
def as_filtered_json(self) -> None:
"""
This method writes the data as json to the destination.
:return: None
"""
with open(self.destination, 'w+') as outfile:
json.dump(self.data, outfile)
def as_string(self) -> None:
"""
This method writes the data as a string linewise to the destination.
......
import json
from tokenize import String
from typing import Dict, List
......@@ -11,7 +12,7 @@ from src.wikidata.keys import ResultKeys
from src.dbpedia.keys import ResultKeys
def assemble_wikidata_groundtruth() -> Dict:
def assemble_wikidata_groundtruth_english() -> Dict:
"""
This method assembles the groundtruth for the data in wikidata.
It also writes them to /static/data/wikidata_groundtruth.txt
......@@ -25,7 +26,7 @@ def assemble_wikidata_groundtruth() -> Dict:
return data
def assemble_dbpedia_groundtruth() -> Dict:
def assemble_dbpedia_groundtruth_english() -> Dict:
"""
This method assembles the groundtruth for the data in dbpedia.
It also writes them to /static/data/dbpedia_groundtruth.txt
......@@ -74,6 +75,7 @@ def assemble_wikidata_triples() -> List:
triples += [NTriple(subject=movie, predicate=wdt + 'P161', value=cast).as_string()]
if ResultKeys.published.value in result.keys():
# Todo: Complete date -> Maybe baby there exists different dates -> New published -> okay !
for published in Decapper(result[ResultKeys.published.value]).unpack().split(
ResultKeys.line_separator.value):
if published:
......@@ -141,6 +143,7 @@ def assemble_dbpedia_triples() -> List:
triples += [NTriple(subject=movie, predicate=dbo + 'starring', value=cast).as_string()]
if ResultKeys.published.value in result.keys():
# Todo: Complete date -> Maybe baby there exists different dates -> New published -> okay !
for published in Decapper(result[ResultKeys.published.value]).unpack().split(
ResultKeys.line_separator.value):
if published:
......@@ -181,3 +184,55 @@ def assemble_dbpedia_triples() -> List:
triples += [NTriple(subject=movie, predicate=dbp + 'productionCompanies',
value=production_companies).as_string()]
return triples
def collect_wikidata_results_with_title() -> Dict:
"""
This method collects the wikidata results with a Title in different languages.
Therefore rdfs:label and wdt:title are used.
:warning: Takes a long time.
:return: The List of all titles grouped in a dict with the corresponding film.
"""
data = FileReader(source='static/wikidata_groundtruth.txt').as_json()
titles = {}
progress = 0
print('[GET] Collect Wikidata film labels and titles in different languages: ')
print(str('\t') + str(progress) + ' of ' + str(len(data)))
for result in data:
movie_id = Decapper(result[ResultKeys.movie.value]).unpack()
query_label = FileReader(source='static/wikidata_title.sparql') \
.as_string() \
.replace('subject', '<' + movie_id + '>') \
.replace('predicate', 'rdfs:label') \
.replace('value', 'Label')
engine_label = SPARQLEngine(entrypoint=WIKIDATA_ENTRYPOINT, query=query_label)
query_title = FileReader(source='static/wikidata_title.sparql') \
.as_string() \
.replace('subject', '<' + movie_id + '>') \
.replace('predicate', 'wdt:P1476') \
.replace('value', 'Title')
engine_title = SPARQLEngine(entrypoint=WIKIDATA_ENTRYPOINT, query=query_title)
titles[movie_id] = {
'Labels': engine_label.get_json_with_query()["results"]["bindings"],
'Titles': engine_title.get_json_with_query()["results"]["bindings"]
}
progress += 1
if progress % 100 == 0:
print(str('\t') + str(progress) + ' of ' + str(len(data)))
print('Done')
print('[WRITE] Data to static/wikidata_titles_and_labels.txt')
writer = FileWriter(destination='static/wikidata_titles_and_labels.txt', data=titles)
writer.as_filtered_json()
print('Done')
return titles
......@@ -13,3 +13,8 @@ class ResultKeys(Enum):
description = 'Description'
production_company = 'ProductionCompany'
line_separator = '|'
class TitleKey(Enum):
title = 'Title'
label = 'Label'
This diff is collapsed.
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX schema: <http://schema.org/>
SELECT DISTINCT ?value
WHERE{
OPTIONAL {subject predicate ?value.}
}
\ No newline at end of file
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment