diff --git a/README.md b/README.md index 61127f15fe86c1f87e63c2bdae618e16fb1e668c..db0bafa6b50c97733f84ef746ca539f421b1da0a 100644 --- a/README.md +++ b/README.md @@ -666,4 +666,170 @@ The following query will be edited it can be seen in the diagram. }GROUP BY ?movie HAVING (?min_year = ?max_year) }GROUP BY ?movie HAVING (?min_year >= 1970) -As a result, `1433` comedy films with a director born after or in 1970 were found. \ No newline at end of file +As a result, `1433` comedy films with a director born after or in 1970 were found. + +## Task 3 +In this step the additional information is queried by the `OPTIONAL`-operator and the basic queries of the previous step. + +### Wikidata + + PREFIX wd: <http://www.wikidata.org/entity/> + PREFIX wdt: <http://www.wikidata.org/prop/direct/> + PREFIX schema: <http://schema.org/> + PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> + + SELECT + DISTINCT (?movie AS ?Movie) + (GROUP_CONCAT(DISTINCT ?title;separator="|") AS ?Title) + (GROUP_CONCAT(DISTINCT ?director;separator="|") AS ?Director) + (GROUP_CONCAT(DISTINCT ?author;separator="|") AS ?Author) + (GROUP_CONCAT(DISTINCT ?cast_member;separator="|") AS ?Cast) + (GROUP_CONCAT(DISTINCT ?publish_date;separator="|") AS ?Published) + (GROUP_CONCAT(DISTINCT ?genre;separator="|") AS ?Genre) + (GROUP_CONCAT(DISTINCT ?duration;separator="|") AS ?Duration) + (GROUP_CONCAT(DISTINCT ?description;separator="|") AS ?Description) + (GROUP_CONCAT(DISTINCT ?production_company;separator="|") AS ?ProductionCompany) + WHERE { + { + SELECT DISTINCT ?movie + WHERE { + SELECT ?movie ?director ?min_dob ?max_dob + WHERE{ + SELECT ?movie ?director (MIN(YEAR(?dob)) AS ?min_dob) (MAX(YEAR(?dob)) AS ?max_dob) + WHERE{ + # node who instances of films. + {?movie wdt:P31 wd:Q11424.} + UNION + # X instances of Y; Y subclass of films. + {?movie wdt:P31 [wdt:P279 wd:Q11424].} + # X instances of Y; Y instance of film genre. + UNION + {?movie wdt:P31 [wdt:P31 wd:Q201658].} + UNION + # X instances of Y; Y instance of or subclass of Z; Z is Y; Y subclass of film. + {?movie wdt:P31 [wdt:P31|wdt:P279 [wdt:P279 wd:Q11424]].} + UNION + # X instances of Y; Y instance of or subclass of Z; Z is Y; Y subclass of film genre. + {?movie wdt:P31 [wdt:P31|wdt:P279 [wdt:P31 wd:Q201658]].} + + # Imagine a inner join + + # X genre comedy. + {?movie wdt:P136 wd:Q40831.} + UNION + # X genre G; G subclass of comedy. + {?movie wdt:P136 [wdt:P279 wd:Q40831].} + + # X director exists and who has a birthday + {?movie wdt:P57 ?director. + ?director wdt:P569 ?dob.} + } GROUP BY ?movie ?director + HAVING (?min_dob = ?max_dob) + } GROUP BY ?movie ?director ?min_dob ?max_dob + HAVING (?min_dob >= 1970) + }GROUP BY ?movie + } + #OPTIONAL {?movie wdt:P1476 ?title.} # titel oder label ? + OPTIONAL {?movie rdfs:label ?title FILTER (LANG(?title) = "en")} # titel oder label ? + OPTIONAL {?movie wdt:P57 ?director.} + OPTIONAL {?movie wdt:P58 ?author.} + OPTIONAL {?movie wdt:P161 ?cast_member.} + OPTIONAL {?movie wdt:P577 ?publish_date.} + OPTIONAL {?movie wdt:P136 ?genre.} + OPTIONAL {?movie wdt:P2047 ?duration.} + OPTIONAL {?movie schema:description ?description FILTER ( lang(?description) = "en" ).} # may take a while + OPTIONAL {?movie wdt:P272 ?production_company} + } GROUP BY ?movie + +### DBPedia + PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + PREFIX dbc: <http://dbpedia.org/resource/Category:> + PREFIX dbo: <http://dbpedia.org/ontology/> + PREFIX dct: <http://purl.org/dc/terms/> + PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> + PREFIX foaf: <http://xmlns.com/foaf/0.1/> + PREFIX dbp: <http://dbpedia.org/property/> + + SELECT DISTINCT (?movie AS ?Movie) + (GROUP_CONCAT(DISTINCT ?name;separator="|") AS ?Title) + (GROUP_CONCAT(DISTINCT ?director;separator="|") AS ?Director) + (GROUP_CONCAT(DISTINCT ?author;separator="|") AS ?Author) + (GROUP_CONCAT(DISTINCT ?cast_member;separator="|") AS ?Cast) + (GROUP_CONCAT(DISTINCT ?releaseDate;separator="|") AS ?Published) + (GROUP_CONCAT(DISTINCT ?subject;separator="|") AS ?Subject) + (GROUP_CONCAT(DISTINCT ?genre;separator="|") AS ?Genre) + (GROUP_CONCAT(DISTINCT ?duration/60;separator="|") AS ?Duration) # todo: change to original value + (GROUP_CONCAT(DISTINCT ?abstract;separator="|") AS ?Description) + (GROUP_CONCAT(DISTINCT ?distributor;separator="|") AS ?Distributor) + (GROUP_CONCAT(DISTINCT ?productionCompanies;separator="|") AS ?ProductionCompanies) + (GROUP_CONCAT(DISTINCT ?sameDirector;separator="|") AS ?DirectorLink) + (GROUP_CONCAT(DISTINCT ?sameMovie;separator="|") AS ?MovieLink) + WHERE { + { + SELECT DISTINCT ?movie + WHERE { + SELECT DISTINCT ?movie ?min_year ?max_year + WHERE { + SELECT DISTINCT ?movie ?director (MIN(?year) AS ?min_year) (MAX(?year) AS ?max_year) + WHERE{ + # X type Film + {?movie rdf:type dbo:Film.} + # Imagine a inner join + # X subject Comedy + {?movie dct:subject dbc:Comedy} + UNION + # X subject Y; Y like "Comedy". + {?movie dct:subject ?y FILTER CONTAINS(lcase(str(?y)), "comedy").} + UNION + # X genre Y; Y like "Comedy". + {?movie dbo:genre ?y FILTER CONTAINS (lcase(str(?y)), "comedy").} + # X director Person; Person birth date year + {?movie dbo:director ?director. + ?director dbo:birthDate ?dob. + BIND(xsd:integer(substr(xsd:string(?dob), 0, 4)) as ?year) . + } + } GROUP BY ?movie ?director + }GROUP BY ?movie HAVING (?min_year = ?max_year) + }GROUP BY ?movie HAVING (?min_year >= 1970) + } + OPTIONAL{?movie foaf:name ?name FILTER(LANG(?name)="en").} + OPTIONAL{?movie owl:sameAs ?sameMovie FILTER CONTAINS (LCASE(STR(?sameMovie)), 'wikidata.org').} + OPTIONAL{ + ?movie dbo:director ?director. + OPTIONAL{ + ?director owl:sameAs ?sameDirector FILTER CONTAINS (LCASE(STR(?sameDirector)), 'wikidata.org'). + } + } + OPTIONAL{?movie dbo:author ?author.} + OPTIONAL{?movie dbo:starring ?cast_member.} + OPTIONAL{?movie dbo:releaseDate ?releaseDate.} + OPTIONAL{?movie dct:subject ?subject FILTER CONTAINS(lcase(str(?subject)), "comedy").} + OPTIONAL{?movie dbo:genre ?genre FILTER CONTAINS(lcase(str(?genre)), "comedy").} + OPTIONAL{?movie dbo:runtime ?duration.} + OPTIONAL{?movie dbo:abstract ?abstract FILTER(LANG(?abstract)="en").} + OPTIONAL{?movie dbo:distributor ?distributor.} + OPTIONAL{?movie dbp:productionCompanies ?productionCompanies FILTER CONTAINS (lcase(str(?productionCompanies)), "http").} + }GROUP BY ?movie + +## Task 4 + +I decided to map the data from DBPedia to Wikidata. +I made this decision because I get some DBPedia properties from several sources. + +The mapping will look like this: + +Variable |Wikidata |DBPedia +-----------------:|:------------------------------------------:|:------------------------------------------------| +Titel |<http://www.w3.org/2000/01/rdf-schema#label>|<http://xmlns.com/foaf/0.1/name> +Director |<http://www.wikidata.org/prop/direct/P57> |<http://dbpedia.org/ontology/director> +Author |<http://www.wikidata.org/prop/direct/P58> |<http://dbpedia.org/ontology/author> +Cast Member |<http://www.wikidata.org/prop/direct/P161> |<http://dbpedia.org/ontology/starring> +Date published |<http://www.wikidata.org/prop/direct/P577> |<http://dbpedia.org/ontology/releaseDate> +Genre |<http://www.wikidata.org/prop/direct/P136> |<http://purl.org/dc/terms/subject> +Genre |<http://www.wikidata.org/prop/direct/P136> |<http://dbpedia.org/ontology/genre> +Duration |<http://www.wikidata.org/prop/direct/P2047> |<http://dbpedia.org/ontology/runtime> +Description |<http://schema.org/description> |<http://dbpedia.org/ontology/abstract> +Production company|<http://dbpedia.org/ontology/P272> |<http://dbpedia.org/ontology/distributor> +Production company|<http://dbpedia.org/ontology/P272> |<http://dbpedia.org/property/productionCompanies> + +It is important to note that the Genre and Production Company properties in DBPedia were created from multiple sources. For the Production Company it is important to note that the distributor is the same as a Production Company. \ No newline at end of file diff --git a/static/mapping/mapping.csv b/static/mapping/mapping.csv index c2bf49633de21aaa35dc78ae26e5a3bc6e324f8a..b13fa28fc5d6d9eac431ed3a5ebc5d6f3e167972 100644 --- a/static/mapping/mapping.csv +++ b/static/mapping/mapping.csv @@ -7,6 +7,6 @@ Date published,<http://www.wikidata.org/prop/direct/P577>,<http://dbpedia.org/on Genre,<http://www.wikidata.org/prop/direct/P136>,<http://purl.org/dc/terms/subject> Genre,<http://www.wikidata.org/prop/direct/P136>,<http://dbpedia.org/ontology/genre> Duration,<http://www.wikidata.org/prop/direct/P2047>,<http://dbpedia.org/ontology/runtime> -Description," <http://schema.org/description>",<http://dbpedia.org/ontology/abstract> +Description,<http://schema.org/description>,<http://dbpedia.org/ontology/abstract> Production company,<http://dbpedia.org/ontology/P272>,<http://dbpedia.org/ontology/distributor> Production company,<http://dbpedia.org/ontology/P272>,<http://dbpedia.org/property/productionCompanies> \ No newline at end of file