From 725627b15bb7d6b1805b0835a165e413422b1074 Mon Sep 17 00:00:00 2001 From: feger <marc.feger@hhu.de> Date: Mon, 13 May 2019 23:23:59 +0200 Subject: [PATCH] Fix Typo in mapping.csv; Add more documentation to README.md --- README.md | 168 ++++++++++++++++++++++++++++++++++++- static/mapping/mapping.csv | 2 +- 2 files changed, 168 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 61127f1..db0bafa 100644 --- a/README.md +++ b/README.md @@ -666,4 +666,170 @@ The following query will be edited it can be seen in the diagram. }GROUP BY ?movie HAVING (?min_year = ?max_year) }GROUP BY ?movie HAVING (?min_year >= 1970) -As a result, `1433` comedy films with a director born after or in 1970 were found. \ No newline at end of file +As a result, `1433` comedy films with a director born after or in 1970 were found. + +## Task 3 +In this step the additional information is queried by the `OPTIONAL`-operator and the basic queries of the previous step. + +### Wikidata + + PREFIX wd: <http://www.wikidata.org/entity/> + PREFIX wdt: <http://www.wikidata.org/prop/direct/> + PREFIX schema: <http://schema.org/> + PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> + + SELECT + DISTINCT (?movie AS ?Movie) + (GROUP_CONCAT(DISTINCT ?title;separator="|") AS ?Title) + (GROUP_CONCAT(DISTINCT ?director;separator="|") AS ?Director) + (GROUP_CONCAT(DISTINCT ?author;separator="|") AS ?Author) + (GROUP_CONCAT(DISTINCT ?cast_member;separator="|") AS ?Cast) + (GROUP_CONCAT(DISTINCT ?publish_date;separator="|") AS ?Published) + (GROUP_CONCAT(DISTINCT ?genre;separator="|") AS ?Genre) + (GROUP_CONCAT(DISTINCT ?duration;separator="|") AS ?Duration) + (GROUP_CONCAT(DISTINCT ?description;separator="|") AS ?Description) + (GROUP_CONCAT(DISTINCT ?production_company;separator="|") AS ?ProductionCompany) + WHERE { + { + SELECT DISTINCT ?movie + WHERE { + SELECT ?movie ?director ?min_dob ?max_dob + WHERE{ + SELECT ?movie ?director (MIN(YEAR(?dob)) AS ?min_dob) (MAX(YEAR(?dob)) AS ?max_dob) + WHERE{ + # node who instances of films. + {?movie wdt:P31 wd:Q11424.} + UNION + # X instances of Y; Y subclass of films. + {?movie wdt:P31 [wdt:P279 wd:Q11424].} + # X instances of Y; Y instance of film genre. + UNION + {?movie wdt:P31 [wdt:P31 wd:Q201658].} + UNION + # X instances of Y; Y instance of or subclass of Z; Z is Y; Y subclass of film. + {?movie wdt:P31 [wdt:P31|wdt:P279 [wdt:P279 wd:Q11424]].} + UNION + # X instances of Y; Y instance of or subclass of Z; Z is Y; Y subclass of film genre. + {?movie wdt:P31 [wdt:P31|wdt:P279 [wdt:P31 wd:Q201658]].} + + # Imagine a inner join + + # X genre comedy. + {?movie wdt:P136 wd:Q40831.} + UNION + # X genre G; G subclass of comedy. + {?movie wdt:P136 [wdt:P279 wd:Q40831].} + + # X director exists and who has a birthday + {?movie wdt:P57 ?director. + ?director wdt:P569 ?dob.} + } GROUP BY ?movie ?director + HAVING (?min_dob = ?max_dob) + } GROUP BY ?movie ?director ?min_dob ?max_dob + HAVING (?min_dob >= 1970) + }GROUP BY ?movie + } + #OPTIONAL {?movie wdt:P1476 ?title.} # titel oder label ? + OPTIONAL {?movie rdfs:label ?title FILTER (LANG(?title) = "en")} # titel oder label ? + OPTIONAL {?movie wdt:P57 ?director.} + OPTIONAL {?movie wdt:P58 ?author.} + OPTIONAL {?movie wdt:P161 ?cast_member.} + OPTIONAL {?movie wdt:P577 ?publish_date.} + OPTIONAL {?movie wdt:P136 ?genre.} + OPTIONAL {?movie wdt:P2047 ?duration.} + OPTIONAL {?movie schema:description ?description FILTER ( lang(?description) = "en" ).} # may take a while + OPTIONAL {?movie wdt:P272 ?production_company} + } GROUP BY ?movie + +### DBPedia + PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + PREFIX dbc: <http://dbpedia.org/resource/Category:> + PREFIX dbo: <http://dbpedia.org/ontology/> + PREFIX dct: <http://purl.org/dc/terms/> + PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> + PREFIX foaf: <http://xmlns.com/foaf/0.1/> + PREFIX dbp: <http://dbpedia.org/property/> + + SELECT DISTINCT (?movie AS ?Movie) + (GROUP_CONCAT(DISTINCT ?name;separator="|") AS ?Title) + (GROUP_CONCAT(DISTINCT ?director;separator="|") AS ?Director) + (GROUP_CONCAT(DISTINCT ?author;separator="|") AS ?Author) + (GROUP_CONCAT(DISTINCT ?cast_member;separator="|") AS ?Cast) + (GROUP_CONCAT(DISTINCT ?releaseDate;separator="|") AS ?Published) + (GROUP_CONCAT(DISTINCT ?subject;separator="|") AS ?Subject) + (GROUP_CONCAT(DISTINCT ?genre;separator="|") AS ?Genre) + (GROUP_CONCAT(DISTINCT ?duration/60;separator="|") AS ?Duration) # todo: change to original value + (GROUP_CONCAT(DISTINCT ?abstract;separator="|") AS ?Description) + (GROUP_CONCAT(DISTINCT ?distributor;separator="|") AS ?Distributor) + (GROUP_CONCAT(DISTINCT ?productionCompanies;separator="|") AS ?ProductionCompanies) + (GROUP_CONCAT(DISTINCT ?sameDirector;separator="|") AS ?DirectorLink) + (GROUP_CONCAT(DISTINCT ?sameMovie;separator="|") AS ?MovieLink) + WHERE { + { + SELECT DISTINCT ?movie + WHERE { + SELECT DISTINCT ?movie ?min_year ?max_year + WHERE { + SELECT DISTINCT ?movie ?director (MIN(?year) AS ?min_year) (MAX(?year) AS ?max_year) + WHERE{ + # X type Film + {?movie rdf:type dbo:Film.} + # Imagine a inner join + # X subject Comedy + {?movie dct:subject dbc:Comedy} + UNION + # X subject Y; Y like "Comedy". + {?movie dct:subject ?y FILTER CONTAINS(lcase(str(?y)), "comedy").} + UNION + # X genre Y; Y like "Comedy". + {?movie dbo:genre ?y FILTER CONTAINS (lcase(str(?y)), "comedy").} + # X director Person; Person birth date year + {?movie dbo:director ?director. + ?director dbo:birthDate ?dob. + BIND(xsd:integer(substr(xsd:string(?dob), 0, 4)) as ?year) . + } + } GROUP BY ?movie ?director + }GROUP BY ?movie HAVING (?min_year = ?max_year) + }GROUP BY ?movie HAVING (?min_year >= 1970) + } + OPTIONAL{?movie foaf:name ?name FILTER(LANG(?name)="en").} + OPTIONAL{?movie owl:sameAs ?sameMovie FILTER CONTAINS (LCASE(STR(?sameMovie)), 'wikidata.org').} + OPTIONAL{ + ?movie dbo:director ?director. + OPTIONAL{ + ?director owl:sameAs ?sameDirector FILTER CONTAINS (LCASE(STR(?sameDirector)), 'wikidata.org'). + } + } + OPTIONAL{?movie dbo:author ?author.} + OPTIONAL{?movie dbo:starring ?cast_member.} + OPTIONAL{?movie dbo:releaseDate ?releaseDate.} + OPTIONAL{?movie dct:subject ?subject FILTER CONTAINS(lcase(str(?subject)), "comedy").} + OPTIONAL{?movie dbo:genre ?genre FILTER CONTAINS(lcase(str(?genre)), "comedy").} + OPTIONAL{?movie dbo:runtime ?duration.} + OPTIONAL{?movie dbo:abstract ?abstract FILTER(LANG(?abstract)="en").} + OPTIONAL{?movie dbo:distributor ?distributor.} + OPTIONAL{?movie dbp:productionCompanies ?productionCompanies FILTER CONTAINS (lcase(str(?productionCompanies)), "http").} + }GROUP BY ?movie + +## Task 4 + +I decided to map the data from DBPedia to Wikidata. +I made this decision because I get some DBPedia properties from several sources. + +The mapping will look like this: + +Variable |Wikidata |DBPedia +-----------------:|:------------------------------------------:|:------------------------------------------------| +Titel |<http://www.w3.org/2000/01/rdf-schema#label>|<http://xmlns.com/foaf/0.1/name> +Director |<http://www.wikidata.org/prop/direct/P57> |<http://dbpedia.org/ontology/director> +Author |<http://www.wikidata.org/prop/direct/P58> |<http://dbpedia.org/ontology/author> +Cast Member |<http://www.wikidata.org/prop/direct/P161> |<http://dbpedia.org/ontology/starring> +Date published |<http://www.wikidata.org/prop/direct/P577> |<http://dbpedia.org/ontology/releaseDate> +Genre |<http://www.wikidata.org/prop/direct/P136> |<http://purl.org/dc/terms/subject> +Genre |<http://www.wikidata.org/prop/direct/P136> |<http://dbpedia.org/ontology/genre> +Duration |<http://www.wikidata.org/prop/direct/P2047> |<http://dbpedia.org/ontology/runtime> +Description |<http://schema.org/description> |<http://dbpedia.org/ontology/abstract> +Production company|<http://dbpedia.org/ontology/P272> |<http://dbpedia.org/ontology/distributor> +Production company|<http://dbpedia.org/ontology/P272> |<http://dbpedia.org/property/productionCompanies> + +It is important to note that the Genre and Production Company properties in DBPedia were created from multiple sources. For the Production Company it is important to note that the distributor is the same as a Production Company. \ No newline at end of file diff --git a/static/mapping/mapping.csv b/static/mapping/mapping.csv index c2bf496..b13fa28 100644 --- a/static/mapping/mapping.csv +++ b/static/mapping/mapping.csv @@ -7,6 +7,6 @@ Date published,<http://www.wikidata.org/prop/direct/P577>,<http://dbpedia.org/on Genre,<http://www.wikidata.org/prop/direct/P136>,<http://purl.org/dc/terms/subject> Genre,<http://www.wikidata.org/prop/direct/P136>,<http://dbpedia.org/ontology/genre> Duration,<http://www.wikidata.org/prop/direct/P2047>,<http://dbpedia.org/ontology/runtime> -Description," <http://schema.org/description>",<http://dbpedia.org/ontology/abstract> +Description,<http://schema.org/description>,<http://dbpedia.org/ontology/abstract> Production company,<http://dbpedia.org/ontology/P272>,<http://dbpedia.org/ontology/distributor> Production company,<http://dbpedia.org/ontology/P272>,<http://dbpedia.org/property/productionCompanies> \ No newline at end of file -- GitLab