= Cypher: A data exploration of D-BAS

== D-BAS

image::https://dbas.cs.uni-duesseldorf.de/static/images/logo/black.svg?x=1549315407[width=200,float=right]

https://dbas.cs.uni-duesseldorf.de[D-BAS] is a dialog-based argumentation system which presents its discussions within a graph structure.

The discussion graph is stored internally in the form of tables.

This presentation will convert the tabular representation of the data into a graph structure and explain suitable https://neo4j.com/developer/cypher-query-language/[Cypher] requests.

== Connect to the D-BAS database
:password: pass:a['<span value-key="password">FooBar</span>']
:table: pass:a['<span value-key="table">textversions</span>']

Since this tool is located in the same network as D-BAS, the containers **web** and **db** can be addressed.
Therefore, queries can be sent directly to the D-BAS database.
So that Neo4j can submit the requests to PostgreSQL, the plugins https://neo4j-contrib.github.io/neo4j-apoc-procedures/[APOC] and the https://jdbc.postgresql.org/[PostgreSQL JDBC Driver] are provided.

The following example connects Neo4J to the D-BAS database **db**.

Please enter the password of **db**:
++++
Password: <input style="display:inline;width:30%;" value-for="password" class="form-control" value="FooBar" size="40">
Table: <input style="display:inline;width:30%;" value-for="table" class="form-control" value="textversions" size="40">
++++

[source, cypher,subs=attributes]
----
WITH 'jdbc:postgresql://db/discussion?user=postgres&password=' + {password} AS url
CALL apoc.load.jdbc(url, {table})
YIELD row
RETURN row
----

== Creating the statement nodes
:password: pass:a['<span value-key="password">FooBar</span>']


++++
Password: <input style="display:inline;width:30%;" value-for="password" class="form-control" value="FooBar" size="40">
++++

[source, cypher,subs=attributes]
----
WITH 'jdbc:postgresql://db/discussion?user=postgres&password=' + {password} AS url
CALL apoc.load.jdbc(url, 'statements')
YIELD row
MERGE (a:Statement{uid:row.uid, is_position:row.is_position, is_disabled:row.is_disabled})
RETURN a
----

== Filling the statement nodes with textversions
:password: pass:a['<span value-key="password">FooBar</span>']


++++
Password: <input style="display:inline;width:30%;" value-for="password" class="form-control" value="FooBar" size="40">
++++

[source, cypher,subs=attributes]
----
WITH 'jdbc:postgresql://db/discussion?user=postgres&password=' + 'FooBar' AS url
CALL apoc.load.jdbc(url, 'textversions')
YIELD row
MATCH (a:Statement{uid:row.statement_uid})
WHERE NOT EXISTS(a.content)
SET a += {content:row.content}
RETURN a
----

== Creating the user nodes
:password: pass:a['<span value-key="password">FooBar</span>']


++++
Password: <input style="display:inline;width:30%;" value-for="password" class="form-control" value="FooBar" size="40">
++++

[source, cypher,subs=attributes]
----
WITH 'jdbc:postgresql://db/discussion?user=postgres&password=' + {password} AS url
CALL apoc.load.jdbc(url, 'users')
YIELD row
MERGE (a:User{uid:row.uid, public_nickname:row.public_nickname})
RETURN a
----

== Create relation between Users and Statements
:password: pass:a['<span value-key="password">FooBar</span>']


++++
Password: <input style="display:inline;width:30%;" value-for="password" class="form-control" value="FooBar" size="40">
++++

[source, cypher,subs=attributes]
----
WITH 'jdbc:postgresql://db/discussion?user=postgres&password=' + {password} AS url
CALL apoc.load.jdbc(url, 'textversions')
YIELD row
MATCH (a:User), (b:Statement)
WHERE a.uid = row.author_uid AND b.uid = row.statement_uid
MERGE (a)-[r:HAS_WRITTEN]->(b)
RETURN a, b, r
----

== Create issue nodes
:password: pass:a['<span value-key="password">FooBar</span>']


++++
Password: <input style="display:inline;width:30%;" value-for="password" class="form-control" value="FooBar" size="40">
++++

[source, cypher,subs=attributes]
----
WITH 'jdbc:postgresql://db/discussion?user=postgres&password=' + 'FooBar' AS url
CALL apoc.load.jdbc(url, 'issues')
YIELD row
MERGE (a:Issue{uid:row.uid, title:row.title})
RETURN a
----

== Connect statements with issues
:password: pass:a['<span value-key="password">FooBar</span>']


++++
Password: <input style="display:inline;width:30%;" value-for="password" class="form-control" value="FooBar" size="40">
++++

[source, cypher,subs=attributes]
----
WITH 'jdbc:postgresql://db/discussion?user=postgres&password=' + 'FooBar' AS url
CALL apoc.load.jdbc(url, 'statement_to_issue')
YIELD row
MATCH (a:Statement{uid:row.statement_uid}), (b:Issue{uid:row.issue_uid})
MERGE (a)-[r:WRITTEN_IN]->(b)
RETURN a,b,r
----

== Every User who has written a Position likes it
:max_rating: pass:a['<span value-key="max_rating">2</span>']


++++
Rating between 0 and : <input style="display:inline;width:30%;" value-for="max_rating" class="form-control" value="2" size="40">
++++

[source, cypher,subs=attributes]
----
MATCH (a:User)-[:HAS_WRITTEN]->(b:Statement{is_position:True})
MERGE (a)-[r:LIKES{rating:toInt({max_rating})}]->(b)
RETURN a, b, r
----

== Every User who has written none gets a random rating for a Position
:max_rating: pass:a['<span value-key="max_rating">2</span>']


++++
Rating between 0 and : <input style="display:inline;width:30%;" value-for="max_rating" class="form-control" value="2" size="40">
++++

[source, cypher,subs=attributes]
----
MATCH (a:User)
WHERE NOT EXISTS((a)-[:LIKES]->())
MATCH (b:Statement)
WHERE b.is_position
MERGE (a)-[:LIKES{rating:round(rand()*toInt({max_rating}))}]->(b)
----

== Delete every Rating with 0.0

[source, cypher,subs=attributes]
----
MATCH ()-[r:LIKES{rating:0.0}]->()
DETACH DELETE r
----

== Get sub-graph
:max_number: pass:a['<span value-key="max_number">10</span>']


++++
Uid between 0 and : <input style="display:inline;width:30%;" value-for="max_number" class="form-control" value="10" size="40">
++++

[source, cypher,subs=attributes]
----
MATCH (a:User), (b:Statement{is_position:True})
WHERE a.uid in range(0, toInt({max_number}))
RETURN a,b
----

== Get k-nearest-neighbours with Pearson-Correlation
:k_neighbors: pass:a['<span value-key="k_neighbors">5</span>']
:user: pass:a['<span value-key="user">Björn</span>']


++++
Find top <input style="display:inline;width:30%;" value-for="k_neighbors" class="form-control" value="5" size="40">-neighbors for <input
style="display:inline;width:30%;" value-for="user" class="form-control" value="Björn" size="40">
++++

[source, cypher,subs=attributes]
----
MATCH (p1:User {public_nickname: {user}})-[l:LIKES]->(statement)
WITH p1, algo.similarity.asVector(statement, l.rating) AS p1Vector

MATCH (p2:User)-[l:LIKES]->(statement) WHERE p2 <> p1
WITH p1, p2, p1Vector, algo.similarity.asVector(statement, l.rating) AS p2Vector
RETURN p1.public_nickname AS from,
       p2.public_nickname AS to,
       algo.similarity.pearson(p1Vector, p2Vector, {vectorType: "maps"}) AS similarity
ORDER BY similarity DESC
LIMIT toInt({k_neighbors})
----

== Get two User and their connection
:user_a: pass:a['<span value-key="user_a">Björn</span>']
:user_b: pass:a['<span value-key="user_b">Christian</span>']


++++
User A: <input style="display:inline;width:30%;" value-for="user_a" class="form-control" value="Björn" size="40"> User B: <input
style="display:inline;width:30%;" value-for="user_b" class="form-control" value="Christian" size="40">
++++

[source, cypher,subs=attributes]
----
match (a:User{public_nickname:{user_a}}), (b:User{public_nickname:{user_b}}), (s:Statement)
WHERE (a)-[:LIKES]->(s) or (b)-[:LIKES]->(s)
return a,b,s
----

== Get Top-N Prediction for User with Pearson-Similarity + Weighted-Average + kNN
:k_neighbors: pass:a['<span value-key="k_neighbors">5</span>']
:top_n: pass:a['<span value-key="top_n">5</span>']
:user: pass:a['<span value-key="user">Björn</span>']


++++
Find Top-<input style="display:inline;width:30%;" value-for="top_n" class="form-control" value="5" size="40"> Predictions for <input
style="display:inline;width:30%;" value-for="user" class="form-control" value="Björn" size="40"> in the <input style="display:inline;width:30%;"
value-for="k_neighbors" class="form-control" value="5" size="40">-NN
++++

[source, cypher,subs=attributes]
----
MATCH (p1:User {public_nickname: {user}})-[l:LIKES]->(statement)
WITH p1, algo.similarity.asVector(statement, l.rating) AS p1Vector

MATCH (p2:User)-[l:LIKES]->(statement) WHERE p2 <> p1
WITH p1, p2, p1Vector, algo.similarity.asVector(statement, l.rating) AS p2Vector
WITH p1 AS from, p2 AS to, algo.similarity.pearson(p1Vector, p2Vector, {vectorType: "maps"}) AS similarity
ORDER BY similarity DESC limit toInt({k_neighbors})

MATCH (to)-[r:LIKES]->(s:Statement) WHERE NOT EXISTS((from)-[:LIKES]->(s))
RETURN from , to, s, sum(similarity * r.rating)/FILTER(x in [sum(abs(similarity)), 1] WHERE NOT x=0)[0] AS prediction
ORDER BY prediction DESC LIMIT toInt({top_n})
----

== Get Top-N Prediction for User with Pearson-Similarity + Mean-Centering + kNN
:k_neighbors: pass:a['<span value-key="k_neighbors">5</span>']
:top_n: pass:a['<span value-key="top_n">5</span>']
:user: pass:a['<span value-key="user">Björn</span>']


++++
Find Top-<input style="display:inline;width:30%;" value-for="top_n" class="form-control" value="5" size="40"> Predictions for <input
style="display:inline;width:30%;" value-for="user" class="form-control" value="Björn" size="40"> in the <input style="display:inline;width:30%;"
value-for="k_neighbors" class="form-control" value="5" size="40">-NN
++++

[source, cypher,subs=attributes]
----
MATCH (p1:User {public_nickname: {user}})-[l:LIKES]->(statement)
WITH p1, algo.similarity.asVector(statement, l.rating) AS p1Vector, avg(l.rating) as u1_avg

MATCH (p2:User)-[l:LIKES]->(statement) WHERE p2 <> p1
WITH p1, p2, u1_avg, p1Vector, algo.similarity.asVector(statement, l.rating) AS p2Vector, avg(l.rating) as u2_avg
WITH p1 AS from, p2 AS to, algo.similarity.pearson(p1Vector, p2Vector, {vectorType: "maps"}) AS similarity, u1_avg, u2_avg
ORDER BY similarity DESC limit toInt({k_neighbors})

MATCH (to)-[r:LIKES]->(s:Statement) WHERE NOT EXISTS((from)-[:LIKES]->(s))
RETURN from , to, s, u1_avg + sum(similarity * (r.rating-u2_avg))/FILTER(x in [sum(abs(similarity)), 1] WHERE NOT x=0)[0] AS prediction
ORDER BY prediction DESC LIMIT toInt({top_n})
----

== Get Top-N Prediction for User with Pearson-Similarity + Z-Score-Normalization + kNN
:k_neighbors: pass:a['<span value-key="k_neighbors">5</span>']
:top_n: pass:a['<span value-key="top_n">5</span>']
:user: pass:a['<span value-key="user">Björn</span>']


++++
Find Top-<input style="display:inline;width:30%;" value-for="top_n" class="form-control" value="5" size="40"> Predictions for <input
style="display:inline;width:30%;" value-for="user" class="form-control" value="Björn" size="40"> in the <input style="display:inline;width:30%;"
value-for="k_neighbors" class="form-control" value="5" size="40">-NN
++++

[source, cypher,subs=attributes]
----
MATCH (p1:User {public_nickname: {user}})-[l:LIKES]->(statement)
WITH p1, algo.similarity.asVector(statement, l.rating) AS p1Vector, avg(l.rating) as u1_avg, stDev(l.rating) as u1_std

MATCH (p2:User)-[l:LIKES]->(statement) WHERE p2 <> p1
WITH p1, p2, u1_avg, u1_std, p1Vector, algo.similarity.asVector(statement, l.rating) AS p2Vector, avg(l.rating) as u2_avg, stDev(l.rating) as u2_std
WITH p1 AS from, p2 AS to, algo.similarity.pearson(p1Vector, p2Vector, {vectorType: "maps"}) AS similarity, u1_avg, u1_std, u2_avg, u2_std
ORDER BY similarity DESC limit toInt({k_neighbors})

MATCH (to)-[r:LIKES]->(s:Statement) WHERE NOT EXISTS((from)-[:LIKES]->(s))
RETURN from , to, s, u1_avg + u1_std* sum(similarity*(r.rating-u2_avg)/u2_std)/FILTER(x in [sum(abs(similarity)), 1] WHERE NOT x=0)[0] AS prediction
ORDER BY prediction DESC LIMIT toInt({top_n})
----