Skip to content
Snippets Groups Projects
Commit 7cb94194 authored by Marc Feger's avatar Marc Feger
Browse files

Add Triple Writer; Add Triple-Representation for Wikidata data

parent 30552af0
No related branches found
No related tags found
No related merge requests found
...@@ -61,7 +61,6 @@ dist/ ...@@ -61,7 +61,6 @@ dist/
downloads/ downloads/
eggs/ eggs/
.eggs/ .eggs/
lib/
lib64/ lib64/
parts/ parts/
sdist/ sdist/
......
from flask import Flask from flask import Flask, Response
from flask.json import jsonify from flask.json import jsonify
from src.main import assemble_wikidata_groundtruth, assemble_dbpedia_groundtruth from src.main import assemble_wikidata_groundtruth, assemble_dbpedia_groundtruth, assemble_wikidata_triples
app = Flask(__name__) app = Flask(__name__)
@app.route('/wikidata/n3')
def wikidata_n3():
return jsonify(assemble_wikidata_triples())
@app.route('/wikidata/groundtruth') @app.route('/wikidata/groundtruth')
def wikidata_groundtruth(): def wikidata_groundtruth():
return jsonify(assemble_wikidata_groundtruth()) return jsonify(assemble_wikidata_groundtruth())
......
from typing import Dict from tokenize import String
from typing import Dict, List
from src import WIKIDATA_ENTRYPOINT, DBPEDIA_ENTRYPOINT from src import WIKIDATA_ENTRYPOINT, DBPEDIA_ENTRYPOINT
from src.lib.decapper import Decapper
from src.lib.engine import SPARQLEngine from src.lib.engine import SPARQLEngine
from src.lib.reader import FileReader
from src.lib.ntriple import NTriple
from src.lib.writer import FileWriter from src.lib.writer import FileWriter
from src.wikidata.keys import ResultKeys
def assemble_wikidata_groundtruth() -> Dict: def assemble_wikidata_groundtruth() -> Dict:
...@@ -31,3 +36,64 @@ def assemble_dbpedia_groundtruth() -> Dict: ...@@ -31,3 +36,64 @@ def assemble_dbpedia_groundtruth() -> Dict:
writer = FileWriter(destination='static/dbpedia_groundtruth.txt', data=data) writer = FileWriter(destination='static/dbpedia_groundtruth.txt', data=data)
writer.as_json() writer.as_json()
return data return data
def assemble_wikidata_triples() -> List:
"""
This method assembles the N-Triples of wikidata.
:return:
"""
data = FileReader(source='static/wikidata_groundtruth.txt').as_json()
wdt = 'http://www.wikidata.org/prop/direct/'
schema = 'http://schema.org/'
triples: List[String] = []
for result in data:
movie = Decapper(result[ResultKeys.movie.value]).unpack()
if ResultKeys.title.value in result.keys():
for title in Decapper(result[ResultKeys.title.value]).unpack().split(ResultKeys.line_separator.value):
if title:
triples += [NTriple(subject=movie, predicate=wdt + 'P1476', value=title).as_string()]
if ResultKeys.director.value in result.keys():
for director in Decapper(result[ResultKeys.director.value]).unpack().split(ResultKeys.line_separator.value):
if director:
triples += [NTriple(subject=movie, predicate=wdt + 'P57', value=director).as_string()]
if ResultKeys.author.value in result.keys():
for author in Decapper(result[ResultKeys.author.value]).unpack().split(ResultKeys.line_separator.value):
if author:
triples += [NTriple(subject=movie, predicate=wdt + 'P58', value=author).as_string()]
if ResultKeys.cast.value in result.keys():
for cast in Decapper(result[ResultKeys.cast.value]).unpack().split(ResultKeys.line_separator.value):
if cast:
triples += [NTriple(subject=movie, predicate=wdt + 'P161', value=cast).as_string()]
if ResultKeys.published.value in result.keys():
for published in Decapper(result[ResultKeys.published.value]).unpack().split(ResultKeys.line_separator.value):
if published:
triples += [NTriple(subject=movie, predicate=wdt + 'P577', value=published).as_string()]
if ResultKeys.genre.value in result.keys():
for genre in Decapper(result[ResultKeys.genre.value]).unpack().split(ResultKeys.line_separator.value):
if genre:
triples += [NTriple(subject=movie, predicate=wdt + 'P136', value=genre).as_string()]
if ResultKeys.duration.value in result.keys():
for duration in Decapper(result[ResultKeys.duration.value]).unpack().split(ResultKeys.line_separator.value):
if duration:
triples += [NTriple(subject=movie, predicate=wdt + 'P2047', value=duration).as_string()]
if ResultKeys.description.value in result.keys():
for description in Decapper(result[ResultKeys.description.value]).unpack().split(ResultKeys.line_separator.value):
if description:
triples += [NTriple(subject=movie, predicate=schema + 'description', value=description).as_string()]
if ResultKeys.production_company.value in result.keys():
for production_company in Decapper(result[ResultKeys.production_company.value]).unpack().split(ResultKeys.line_separator.value):
if production_company:
triples += [NTriple(subject=movie, predicate=wdt + 'P272', value=production_company).as_string()]
return triples
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment