Skip to content
Snippets Groups Projects
Commit d74a6e7f authored by Marc Feger's avatar Marc Feger
Browse files

Update wikidata query and documentation to get each movie who has at least one...

Update wikidata query and documentation to get each movie who has at least one director born after or in 1970.
parent 060d5645
Branches
No related tags found
No related merge requests found
Images/Structure_Wikidata_director.jpg

47.9 KiB

...@@ -105,6 +105,31 @@ wd:Q40831<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y: ...@@ -105,6 +105,31 @@ wd:Q40831<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:
</y:ShapeNode> </y:ShapeNode>
</data> </data>
</node> </node>
<node id="n8">
<data key="d6">
<y:ShapeNode>
<y:Geometry height="100.0" width="100.0" x="229.0" y="610.0"/>
<y:Fill color="#00CCFF" transparent="false"/>
<y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/>
<y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="15" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="56.998046875" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="77.42529296875" x="11.287353515625" xml:space="preserve" y="21.5009765625">xsd:int
xsd:string
xsd:date<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel>
<y:Shape type="ellipse"/>
</y:ShapeNode>
</data>
</node>
<node id="n9">
<data key="d6">
<y:ShapeNode>
<y:Geometry height="100.0" width="100.0" x="267.0" y="450.0"/>
<y:Fill color="#00CCFF" transparent="false"/>
<y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/>
<y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="15" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="39.33203125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="54.21484375" x="22.892578125" xml:space="preserve" y="30.333984375">human
wd:Q5<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel>
<y:Shape type="ellipse"/>
</y:ShapeNode>
</data>
</node>
<edge id="e0" source="n0" target="n1"> <edge id="e0" source="n0" target="n1">
<data key="d10"> <data key="d10">
<y:PolyLineEdge> <y:PolyLineEdge>
...@@ -240,7 +265,31 @@ wdt:P279<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" default ...@@ -240,7 +265,31 @@ wdt:P279<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" default
</y:PolyLineEdge> </y:PolyLineEdge>
</data> </data>
</edge> </edge>
<edge id="e10" source="n0" target="n7"> <edge id="e10" source="n0" target="n9">
<data key="d10">
<y:PolyLineEdge>
<y:Path sx="0.0" sy="0.0" tx="0.0" ty="0.0"/>
<y:LineStyle color="#3366FF" type="line" width="1.0"/>
<y:Arrows source="none" target="standard"/>
<y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="32.265625" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="50.892578125" x="-62.16219439102474" xml:space="preserve" y="10.795548842601079">director
wdt:P57<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="0.0" distance="30.0" distanceToCenter="true" position="right" ratio="0.2932278091863161" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel>
<y:BendStyle smoothed="false"/>
</y:PolyLineEdge>
</data>
</edge>
<edge id="e11" source="n9" target="n8">
<data key="d10">
<y:PolyLineEdge>
<y:Path sx="0.0" sy="0.0" tx="0.0" ty="0.0"/>
<y:LineStyle color="#3366FF" type="line" width="1.0"/>
<y:Arrows source="none" target="standard"/>
<y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="32.265625" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="76.59765625" x="-104.96068557124818" xml:space="preserve" y="1.9767591570766854">date of birth
wdt:P569<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="60.6731274819218" distanceToCenter="true" position="right" ratio="0.03391911414948138" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel>
<y:BendStyle smoothed="false"/>
</y:PolyLineEdge>
</data>
</edge>
<edge id="e12" source="n0" target="n7">
<data key="d10"> <data key="d10">
<y:PolyLineEdge> <y:PolyLineEdge>
<y:Path sx="0.0" sy="0.0" tx="0.0" ty="0.0"/> <y:Path sx="0.0" sy="0.0" tx="0.0" ty="0.0"/>
......
...@@ -288,4 +288,308 @@ Then all films whose subject is derived from comedy will be determined. ...@@ -288,4 +288,308 @@ Then all films whose subject is derived from comedy will be determined.
This results in `10000` comedy movies (oh wonder !). This results in `10000` comedy movies (oh wonder !).
The query was structured as follows: The query was structured as follows:
![formula wikidata](./Images/formula_dbpedia.png) ![formula dbpedia](./Images/formula_dbpedia.png)
\ No newline at end of file
## Task 2
Next, all films from Wikidata and DBPedia are collected whose directors bore before or in 1970.
### Wikidata
The search scheme of Wikidata will be extended by a director.
This is indicated by the blue arrows.
![structure wikidata with director](./Images/Structure_Wikidata_director.jpg)
The first thing to be checked is how many comedy movies ever have a director.
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT DISTINCT ?movie
WHERE{
# node who instances of films.
{?movie wdt:P31 wd:Q11424.}
UNION
# X instances of Y; Y subclass of films.
{?movie wdt:P31 [wdt:P279 wd:Q11424].}
# X instances of Y; Y instance of film genre.
UNION
{?movie wdt:P31 [wdt:P31 wd:Q201658].}
UNION
# X instances of Y; Y instance of or subclass of Z; Z is Y; Y subclass of film.
{?movie wdt:P31 [wdt:P31|wdt:P279 [wdt:P279 wd:Q11424]].}
UNION
# X instances of Y; Y instance of or subclass of Z; Z is Y; Y subclass of film genre.
{?movie wdt:P31 [wdt:P31|wdt:P279 [wdt:P31 wd:Q201658]].}
# Imagine a inner join
# X genre comedy.
{?movie wdt:P136 wd:Q40831.}
UNION
# X genre G; G subclass of comedy.
{?movie wdt:P136 [wdt:P279 wd:Q40831].}
# X director exists
{?movie wdt:P57 []}
}GROUP BY ?movie
There are a total of `28964` comedy films that also have a director.
This leads to a loss of a total of `4637` comedy films that do not have a director.
Then it is examined how many of the comedy films with director also have a director with a date of birth.
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT DISTINCT ?movie #?director ?dob
WHERE{
# node who instances of films.
{?movie wdt:P31 wd:Q11424.}
UNION
# X instances of Y; Y subclass of films.
{?movie wdt:P31 [wdt:P279 wd:Q11424].}
# X instances of Y; Y instance of film genre.
UNION
{?movie wdt:P31 [wdt:P31 wd:Q201658].}
UNION
# X instances of Y; Y instance of or subclass of Z; Z is Y; Y subclass of film.
{?movie wdt:P31 [wdt:P31|wdt:P279 [wdt:P279 wd:Q11424]].}
UNION
# X instances of Y; Y instance of or subclass of Z; Z is Y; Y subclass of film genre.
{?movie wdt:P31 [wdt:P31|wdt:P279 [wdt:P31 wd:Q201658]].}
# Imagine a inner join
# X genre comedy.
{?movie wdt:P136 wd:Q40831.}
UNION
# X genre G; G subclass of comedy.
{?movie wdt:P136 [wdt:P279 wd:Q40831].}
# X director exists and who has a birthday
{?movie wdt:P57 [wdt:P569 ?dob].}
} GROUP BY ?movie
This results in a loss of `1060` comedy movies whose director has no date of birth.
This leaves `27903` comedy movies with directors and a date of birth.
Next, all films are selected that have at least one director with ambiguous birth years.
As the task is limited to the year of birth only, ambiguous information about the month and
the day will be ignored. The following query considers how many directors exist at all.
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT DISTINCT ?director
WHERE{
# node who instances of films.
{?movie wdt:P31 wd:Q11424.}
UNION
# X instances of Y; Y subclass of films.
{?movie wdt:P31 [wdt:P279 wd:Q11424].}
# X instances of Y; Y instance of film genre.
UNION
{?movie wdt:P31 [wdt:P31 wd:Q201658].}
UNION
# X instances of Y; Y instance of or subclass of Z; Z is Y; Y subclass of film.
{?movie wdt:P31 [wdt:P31|wdt:P279 [wdt:P279 wd:Q11424]].}
UNION
# X instances of Y; Y instance of or subclass of Z; Z is Y; Y subclass of film genre.
{?movie wdt:P31 [wdt:P31|wdt:P279 [wdt:P31 wd:Q201658]].}
# Imagine a inner join
# X genre comedy.
{?movie wdt:P136 wd:Q40831.}
UNION
# X genre G; G subclass of comedy.
{?movie wdt:P136 [wdt:P279 wd:Q40831].}
# X director exists and who has a birthday
{?movie wdt:P57 ?director.
?director wdt:P569 ?dob.}
} GROUP BY ?director
There are a total of `10693` directors for the `27903` comedy films.
Out of these directors, those are selected who have more than one date of birth.
For this purpose it is checked whether the smallest and largest data of the year of
birth correspond with each other. If this is not the case, the information is ambiguous.
Such directors should be ignored.
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT ?director (MIN(YEAR(?dob)) AS ?min_dob) (MAX(YEAR(?dob)) AS ?max_dob)
WHERE{
# node who instances of films.
{?movie wdt:P31 wd:Q11424.}
UNION
# X instances of Y; Y subclass of films.
{?movie wdt:P31 [wdt:P279 wd:Q11424].}
# X instances of Y; Y instance of film genre.
UNION
{?movie wdt:P31 [wdt:P31 wd:Q201658].}
UNION
# X instances of Y; Y instance of or subclass of Z; Z is Y; Y subclass of film.
{?movie wdt:P31 [wdt:P31|wdt:P279 [wdt:P279 wd:Q11424]].}
UNION
# X instances of Y; Y instance of or subclass of Z; Z is Y; Y subclass of film genre.
{?movie wdt:P31 [wdt:P31|wdt:P279 [wdt:P31 wd:Q201658]].}
# Imagine a inner join
# X genre comedy.
{?movie wdt:P136 wd:Q40831.}
UNION
# X genre G; G subclass of comedy.
{?movie wdt:P136 [wdt:P279 wd:Q40831].}
# X director exists and who has a birthday
{?movie wdt:P57 ?director.
?director wdt:P569 ?dob.}
} GROUP BY ?director
HAVING (?min_dob != ?max_dob)
All directors are grouped according to their ID.
Then the individual directors are applied against the smallest and largest year of birth.
This has the advantage that every director who has more than one year of birth has unequal
entries for the minimum and maximum year of birth.
Altogether `51` directors with ambiguous birth year are recognized.
The `YEAR` operator can interpret dates like `20.century` of the director `Troy Miller (wdt:Q3541033)` with `2000`.
Then all directors with a unique year of birth are searched.
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT ?director (MIN(YEAR(?dob)) AS ?min_dob) (MAX(YEAR(?dob)) AS ?max_dob)
WHERE{
# node who instances of films.
{?movie wdt:P31 wd:Q11424.}
UNION
# X instances of Y; Y subclass of films.
{?movie wdt:P31 [wdt:P279 wd:Q11424].}
# X instances of Y; Y instance of film genre.
UNION
{?movie wdt:P31 [wdt:P31 wd:Q201658].}
UNION
# X instances of Y; Y instance of or subclass of Z; Z is Y; Y subclass of film.
{?movie wdt:P31 [wdt:P31|wdt:P279 [wdt:P279 wd:Q11424]].}
UNION
# X instances of Y; Y instance of or subclass of Z; Z is Y; Y subclass of film genre.
{?movie wdt:P31 [wdt:P31|wdt:P279 [wdt:P31 wd:Q201658]].}
# Imagine a inner join
# X genre comedy.
{?movie wdt:P136 wd:Q40831.}
UNION
# X genre G; G subclass of comedy.
{?movie wdt:P136 [wdt:P279 wd:Q40831].}
# X director exists and who has a birthday
{?movie wdt:P57 ?director.
?director wdt:P569 ?dob.}
} GROUP BY ?director
HAVING (?min_dob = ?max_dob)
This determines `10627` directors with a unique year.
All directors born after or in 1970 can then be searched out.
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT ?director ?min_dob
WHERE {
SELECT ?movie ?director (MIN(YEAR(?dob)) AS ?min_dob) (MAX(YEAR(?dob)) AS ?max_dob)
WHERE{
# node who instances of films.
{?movie wdt:P31 wd:Q11424.}
UNION
# X instances of Y; Y subclass of films.
{?movie wdt:P31 [wdt:P279 wd:Q11424].}
# X instances of Y; Y instance of film genre.
UNION
{?movie wdt:P31 [wdt:P31 wd:Q201658].}
UNION
# X instances of Y; Y instance of or subclass of Z; Z is Y; Y subclass of film.
{?movie wdt:P31 [wdt:P31|wdt:P279 [wdt:P279 wd:Q11424]].}
UNION
# X instances of Y; Y instance of or subclass of Z; Z is Y; Y subclass of film genre.
{?movie wdt:P31 [wdt:P31|wdt:P279 [wdt:P31 wd:Q201658]].}
# Imagine a inner join
# X genre comedy.
{?movie wdt:P136 wd:Q40831.}
UNION
# X genre G; G subclass of comedy.
{?movie wdt:P136 [wdt:P279 wd:Q40831].}
# X director exists and who has a birthday
{?movie wdt:P57 ?director.
?director wdt:P569 ?dob.}
} GROUP BY ?movie ?director
HAVING (?min_dob = ?max_dob)
} GROUP BY ?director ?min_dob
HAVING (?min_dob >= 1970)
This will identify `1950` directors born in or after 1970.
In total, the corresponding films can then be selected by searching and grouping `?movie`.
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT DISTINCT ?movie
WHERE {
SELECT ?movie ?director ?min_dob
WHERE {
SELECT ?movie ?director (MIN(YEAR(?dob)) AS ?min_dob) (MAX(YEAR(?dob)) AS ?max_dob)
WHERE{
# node who instances of films.
{?movie wdt:P31 wd:Q11424.}
UNION
# X instances of Y; Y subclass of films.
{?movie wdt:P31 [wdt:P279 wd:Q11424].}
# X instances of Y; Y instance of film genre.
UNION
{?movie wdt:P31 [wdt:P31 wd:Q201658].}
UNION
# X instances of Y; Y instance of or subclass of Z; Z is Y; Y subclass of film.
{?movie wdt:P31 [wdt:P31|wdt:P279 [wdt:P279 wd:Q11424]].}
UNION
# X instances of Y; Y instance of or subclass of Z; Z is Y; Y subclass of film genre.
{?movie wdt:P31 [wdt:P31|wdt:P279 [wdt:P31 wd:Q201658]].}
# Imagine a inner join
# X genre comedy.
{?movie wdt:P136 wd:Q40831.}
UNION
# X genre G; G subclass of comedy.
{?movie wdt:P136 [wdt:P279 wd:Q40831].}
# X director exists and who has a birthday
{?movie wdt:P57 ?director.
?director wdt:P569 ?dob.}
} GROUP BY ?movie ?director
HAVING (?min_dob = ?max_dob)
} GROUP BY ?movie ?director ?min_dob
HAVING (?min_dob >= 1970)
} GROUP BY ?movie
Thus a total of `2887` Comdey films are recognized which have at least one
author whose year of birth is after or in the year 1970.
An example of the quality of the results can be found in the computer animated comdey film `Brave (wdt:Q126796)`.
This has the three directors: `Steve Purcell (wdt:Q1390504)` with the birth dates `1959` and `1961`
and `Mark Andrews (wdt:Q1408804)` with the birth date `1970` and `Brenda Chapman (wdt:Q429715)`
who was born on `1 November 1962`.
\ No newline at end of file
PREFIX wd: <http://www.wikidata.org/entity/> PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/> PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT DISTINCT ?movie SELECT DISTINCT ?movie
WHERE {
SELECT ?movie ?director ?min_dob ?max_dob
WHERE{ WHERE{
SELECT ?movie ?director (MIN(YEAR(?dob)) AS ?min_dob) (MAX(YEAR(?dob)) AS ?max_dob)
WHERE{
# node who instances of films. # node who instances of films.
{?movie wdt:P31 wd:Q11424.} {?movie wdt:P31 wd:Q11424.}
UNION UNION
...@@ -28,5 +32,13 @@ ...@@ -28,5 +32,13 @@
UNION UNION
# X genre G; G subclass of comedy. # X genre G; G subclass of comedy.
{?movie wdt:P136 [wdt:P279 wd:Q40831].} {?movie wdt:P136 [wdt:P279 wd:Q40831].}
}
GROUP BY ?movie # X director exists and who has a birthday
\ No newline at end of file {?movie wdt:P57 ?director.
?director wdt:P569 ?dob.}
} GROUP BY ?movie ?director
HAVING (?min_dob = ?max_dob)
} GROUP BY ?movie ?director ?min_dob ?max_dob
HAVING (?min_dob >= 1970)
}GROUP BY ?movie
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment