Skip to content
Snippets Groups Projects
Commit 5962eeef authored by Marc Feger's avatar Marc Feger
Browse files

Add dbpedia triples

parent 79d4e4f5
No related branches found
No related tags found
No related merge requests found
from flask import Flask, Response from flask import Flask, Response
from flask.json import jsonify from flask.json import jsonify
from src.main import assemble_wikidata_groundtruth, assemble_dbpedia_groundtruth, assemble_wikidata_triples from src.main import assemble_wikidata_groundtruth, assemble_dbpedia_groundtruth, assemble_wikidata_triples, \
assemble_dbpedia_triples
app = Flask(__name__) app = Flask(__name__)
@app.route('/dbpedia/n3')
def dbpedia_n3():
return jsonify(assemble_dbpedia_triples())
@app.route('/wikidata/n3') @app.route('/wikidata/n3')
def wikidata_n3(): def wikidata_n3():
return jsonify(assemble_wikidata_triples()) return jsonify(assemble_wikidata_triples())
......
from enum import Enum
class ResultKeys(Enum):
movie = 'Movie'
title = 'Title'
director = 'Director'
author = 'Author'
cast = 'Cast'
published = 'Published'
subject = 'Subject'
genre = 'Genre'
duration = 'Duration'
description = 'Description'
distributor = 'Distributor'
production_companies = 'ProductionCompanies'
line_separator = '|'
...@@ -8,6 +8,7 @@ from src.lib.reader import FileReader ...@@ -8,6 +8,7 @@ from src.lib.reader import FileReader
from src.lib.ntriple import NTriple from src.lib.ntriple import NTriple
from src.lib.writer import FileWriter from src.lib.writer import FileWriter
from src.wikidata.keys import ResultKeys from src.wikidata.keys import ResultKeys
from src.dbpedia.keys import ResultKeys
def assemble_wikidata_groundtruth() -> Dict: def assemble_wikidata_groundtruth() -> Dict:
...@@ -42,7 +43,7 @@ def assemble_wikidata_triples() -> List: ...@@ -42,7 +43,7 @@ def assemble_wikidata_triples() -> List:
""" """
This method assembles the N-Triples of wikidata. This method assembles the N-Triples of wikidata.
:return: :return: List of all triples.
""" """
data = FileReader(source='static/wikidata_groundtruth.txt').as_json() data = FileReader(source='static/wikidata_groundtruth.txt').as_json()
...@@ -73,7 +74,8 @@ def assemble_wikidata_triples() -> List: ...@@ -73,7 +74,8 @@ def assemble_wikidata_triples() -> List:
triples += [NTriple(subject=movie, predicate=wdt + 'P161', value=cast).as_string()] triples += [NTriple(subject=movie, predicate=wdt + 'P161', value=cast).as_string()]
if ResultKeys.published.value in result.keys(): if ResultKeys.published.value in result.keys():
for published in Decapper(result[ResultKeys.published.value]).unpack().split(ResultKeys.line_separator.value): for published in Decapper(result[ResultKeys.published.value]).unpack().split(
ResultKeys.line_separator.value):
if published: if published:
triples += [NTriple(subject=movie, predicate=wdt + 'P577', value=published).as_string()] triples += [NTriple(subject=movie, predicate=wdt + 'P577', value=published).as_string()]
...@@ -88,12 +90,94 @@ def assemble_wikidata_triples() -> List: ...@@ -88,12 +90,94 @@ def assemble_wikidata_triples() -> List:
triples += [NTriple(subject=movie, predicate=wdt + 'P2047', value=duration).as_string()] triples += [NTriple(subject=movie, predicate=wdt + 'P2047', value=duration).as_string()]
if ResultKeys.description.value in result.keys(): if ResultKeys.description.value in result.keys():
for description in Decapper(result[ResultKeys.description.value]).unpack().split(ResultKeys.line_separator.value): for description in Decapper(result[ResultKeys.description.value]).unpack().split(
ResultKeys.line_separator.value):
if description: if description:
triples += [NTriple(subject=movie, predicate=schema + 'description', value=description).as_string()] triples += [NTriple(subject=movie, predicate=schema + 'description', value=description).as_string()]
if ResultKeys.production_company.value in result.keys(): if ResultKeys.production_company.value in result.keys():
for production_company in Decapper(result[ResultKeys.production_company.value]).unpack().split(ResultKeys.line_separator.value): for production_company in Decapper(result[ResultKeys.production_company.value]).unpack().split(
ResultKeys.line_separator.value):
if production_company: if production_company:
triples += [NTriple(subject=movie, predicate=wdt + 'P272', value=production_company).as_string()] triples += [NTriple(subject=movie, predicate=wdt + 'P272', value=production_company).as_string()]
return triples return triples
def assemble_dbpedia_triples() -> List:
"""
This method assembles the N-Triples of dbpedia.
:return: List of all triples.
"""
data = FileReader(source='static/dbpedia_groundtruth.txt').as_json()
foaf = 'http://xmlns.com/foaf/0.1/'
dbo = 'http://dbpedia.org/ontology/'
dct = 'http://purl.org/dc/terms/'
dbp = 'http://dbpedia.org/property/'
triples: List[String] = []
for result in data:
movie = Decapper(result[ResultKeys.movie.value]).unpack()
if ResultKeys.title.value in result.keys():
for title in Decapper(result[ResultKeys.title.value]).unpack().split(ResultKeys.line_separator.value):
if title:
triples += [NTriple(subject=movie, predicate=foaf + 'name', value=title).as_string()]
if ResultKeys.director.value in result.keys():
for director in Decapper(result[ResultKeys.director.value]).unpack().split(ResultKeys.line_separator.value):
if director:
triples += [NTriple(subject=movie, predicate=dbo + 'director', value=director).as_string()]
if ResultKeys.author.value in result.keys():
for author in Decapper(result[ResultKeys.author.value]).unpack().split(ResultKeys.line_separator.value):
if author:
triples += [NTriple(subject=movie, predicate=dbo + 'author', value=author).as_string()]
if ResultKeys.cast.value in result.keys():
for cast in Decapper(result[ResultKeys.cast.value]).unpack().split(ResultKeys.line_separator.value):
if cast:
triples += [NTriple(subject=movie, predicate=dbo + 'starring', value=cast).as_string()]
if ResultKeys.published.value in result.keys():
for published in Decapper(result[ResultKeys.published.value]).unpack().split(
ResultKeys.line_separator.value):
if published:
triples += [NTriple(subject=movie, predicate=dbo + 'releaseDate', value=published).as_string()]
if ResultKeys.subject.value in result.keys():
for subject in Decapper(result[ResultKeys.subject.value]).unpack().split(ResultKeys.line_separator.value):
if subject:
triples += [NTriple(subject=movie, predicate=dct + 'subject', value=subject).as_string()]
if ResultKeys.genre.value in result.keys():
for genre in Decapper(result[ResultKeys.genre.value]).unpack().split(ResultKeys.line_separator.value):
if genre:
triples += [NTriple(subject=movie, predicate=dbo + 'genre', value=genre).as_string()]
if ResultKeys.duration.value in result.keys():
for duration in Decapper(result[ResultKeys.duration.value]).unpack().split(ResultKeys.line_separator.value):
if duration:
triples += [NTriple(subject=movie, predicate=dbo + 'runtime', value=duration).as_string()]
if ResultKeys.description.value in result.keys():
for description in Decapper(result[ResultKeys.description.value]).unpack().split(
ResultKeys.line_separator.value):
if description:
triples += [NTriple(subject=movie, predicate=dbo + 'abstract', value=description).as_string()]
if ResultKeys.distributor in result.keys():
for distributor in Decapper(result[ResultKeys.distributor.value]).unpack().split(
ResultKeys.line_separator.value):
if distributor:
triples += [NTriple(subject=movie, predicate=dbo + 'distributor',
value=distributor).as_string()]
if ResultKeys.production_companies in result.keys():
for production_companies in Decapper(result[ResultKeys.production_companies.value]).unpack().split(
ResultKeys.line_separator.value):
if production_companies:
triples += [NTriple(subject=movie, predicate=dbp + 'productionCompanies',
value=production_companies).as_string()]
return triples
...@@ -4,8 +4,9 @@ PREFIX dbo: <http://dbpedia.org/ontology/> ...@@ -4,8 +4,9 @@ PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX dct: <http://purl.org/dc/terms/> PREFIX dct: <http://purl.org/dc/terms/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/> PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX dbp: <http://dbpedia.org/property/>
SELECT ?movie SELECT DISTINCT (?movie AS ?Movie)
(GROUP_CONCAT(DISTINCT ?name;separator="|") AS ?Title) (GROUP_CONCAT(DISTINCT ?name;separator="|") AS ?Title)
(GROUP_CONCAT(DISTINCT ?director;separator="|") AS ?Director) (GROUP_CONCAT(DISTINCT ?director;separator="|") AS ?Director)
......
This diff is collapsed.
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment