= Cypher: A data exploration of D-BAS == D-BAS image::https://dbas.cs.uni-duesseldorf.de/static/images/logo/black.svg?x=1549315407[width=200,float=right] https://dbas.cs.uni-duesseldorf.de[D-BAS] is a dialog-based argumentation system which presents its discussions within a graph structure. The discussion graph is stored internally in the form of tables. This presentation will convert the tabular representation of the data into a graph structure and explain suitable https://neo4j.com/developer/cypher-query-language/[Cypher] requests. == Connect to the D-BAS database :password: pass:a['<span value-key="password">FooBar</span>'] :table: pass:a['<span value-key="table">textversions</span>'] Since this tool is located in the same network as D-BAS, the containers **web** and **db** can be addressed. Therefore, queries can be sent directly to the D-BAS database. So that Neo4j can submit the requests to PostgreSQL, the plugins https://neo4j-contrib.github.io/neo4j-apoc-procedures/[APOC] and the https://jdbc.postgresql.org/[PostgreSQL JDBC Driver] are provided. The following example connects Neo4J to the D-BAS database **db**. Please enter the password of **db**: ++++ Password: <input style="display:inline;width:30%;" value-for="password" class="form-control" value="FooBar" size="40"> Table: <input style="display:inline;width:30%;" value-for="table" class="form-control" value="textversions" size="40"> ++++ [source, cypher,subs=attributes] ---- WITH 'jdbc:postgresql://db/discussion?user=postgres&password=' + {password} AS url CALL apoc.load.jdbc(url, {table}) YIELD row RETURN row ---- == Creating the statement nodes :password: pass:a['<span value-key="password">FooBar</span>'] ++++ Password: <input style="display:inline;width:30%;" value-for="password" class="form-control" value="FooBar" size="40"> ++++ [source, cypher,subs=attributes] ---- WITH 'jdbc:postgresql://db/discussion?user=postgres&password=' + {password} AS url CALL apoc.load.jdbc(url, 'statements') YIELD row MERGE (a:Statement{uid:row.uid, is_position:row.is_position, is_disabled:row.is_disabled}) RETURN a ---- == Filling the statement nodes with textversions :password: pass:a['<span value-key="password">FooBar</span>'] ++++ Password: <input style="display:inline;width:30%;" value-for="password" class="form-control" value="FooBar" size="40"> ++++ [source, cypher,subs=attributes] ---- WITH 'jdbc:postgresql://db/discussion?user=postgres&password=' + 'FooBar' AS url CALL apoc.load.jdbc(url, 'textversions') YIELD row MATCH (a:Statement{uid:row.statement_uid}) WHERE NOT EXISTS(a.content) SET a += {content:row.content} RETURN a ---- == Creating the user nodes :password: pass:a['<span value-key="password">FooBar</span>'] ++++ Password: <input style="display:inline;width:30%;" value-for="password" class="form-control" value="FooBar" size="40"> ++++ [source, cypher,subs=attributes] ---- WITH 'jdbc:postgresql://db/discussion?user=postgres&password=' + {password} AS url CALL apoc.load.jdbc(url, 'users') YIELD row MERGE (a:User{uid:row.uid, public_nickname:row.public_nickname}) RETURN a ---- == Create relation between Users and Statements :password: pass:a['<span value-key="password">FooBar</span>'] ++++ Password: <input style="display:inline;width:30%;" value-for="password" class="form-control" value="FooBar" size="40"> ++++ [source, cypher,subs=attributes] ---- WITH 'jdbc:postgresql://db/discussion?user=postgres&password=' + {password} AS url CALL apoc.load.jdbc(url, 'textversions') YIELD row MATCH (a:User), (b:Statement) WHERE a.uid = row.author_uid AND b.uid = row.statement_uid MERGE (a)-[r:HAS_WRITTEN]->(b) RETURN a, b, r ---- == Create issue nodes :password: pass:a['<span value-key="password">FooBar</span>'] ++++ Password: <input style="display:inline;width:30%;" value-for="password" class="form-control" value="FooBar" size="40"> ++++ [source, cypher,subs=attributes] ---- WITH 'jdbc:postgresql://db/discussion?user=postgres&password=' + 'FooBar' AS url CALL apoc.load.jdbc(url, 'issues') YIELD row MERGE (a:Issue{uid:row.uid, title:row.title}) RETURN a ---- == Connect statements with issues :password: pass:a['<span value-key="password">FooBar</span>'] ++++ Password: <input style="display:inline;width:30%;" value-for="password" class="form-control" value="FooBar" size="40"> ++++ [source, cypher,subs=attributes] ---- WITH 'jdbc:postgresql://db/discussion?user=postgres&password=' + 'FooBar' AS url CALL apoc.load.jdbc(url, 'statement_to_issue') YIELD row MATCH (a:Statement{uid:row.statement_uid}), (b:Issue{uid:row.issue_uid}) MERGE (a)-[r:WRITTEN_IN]->(b) RETURN a,b,r ---- == Every User who has written a Position likes it :max_rating: pass:a['<span value-key="max_rating">2</span>'] ++++ Rating between 0 and : <input style="display:inline;width:30%;" value-for="max_rating" class="form-control" value="2" size="40"> ++++ [source, cypher,subs=attributes] ---- MATCH (a:User)-[:HAS_WRITTEN]->(b:Statement{is_position:True}) MERGE (a)-[r:LIKES{rating:toInt({max_rating})}]->(b) RETURN a, b, r ---- == Every User who has written none gets a random rating for a Position :max_rating: pass:a['<span value-key="max_rating">2</span>'] ++++ Rating between 0 and : <input style="display:inline;width:30%;" value-for="max_rating" class="form-control" value="2" size="40"> ++++ [source, cypher,subs=attributes] ---- MATCH (a:User) WHERE NOT EXISTS((a)-[:LIKES]->()) MATCH (b:Statement) WHERE b.is_position MERGE (a)-[:LIKES{rating:round(rand()*toInt({max_rating}))}]->(b) ---- == Delete every Rating with 0.0 [source, cypher,subs=attributes] ---- MATCH ()-[r:LIKES{rating:0.0}]->() DETACH DELETE r ---- == Get sub-graph :max_number: pass:a['<span value-key="max_number">10</span>'] ++++ Uid between 0 and : <input style="display:inline;width:30%;" value-for="max_number" class="form-control" value="10" size="40"> ++++ [source, cypher,subs=attributes] ---- MATCH (a:User), (b:Statement{is_position:True}) WHERE a.uid in range(0, toInt({max_number})) RETURN a,b ---- == Get k-nearest-neighbours with Pearson-Correlation :k_neighbors: pass:a['<span value-key="k_neighbors">5</span>'] :user: pass:a['<span value-key="user">Björn</span>'] ++++ Find top <input style="display:inline;width:30%;" value-for="k_neighbors" class="form-control" value="5" size="40">-neighbors for <input style="display:inline;width:30%;" value-for="user" class="form-control" value="Björn" size="40"> ++++ [source, cypher,subs=attributes] ---- MATCH (p1:User {public_nickname: {user}})-[l:LIKES]->(statement) WITH p1, algo.similarity.asVector(statement, l.rating) AS p1Vector MATCH (p2:User)-[l:LIKES]->(statement) WHERE p2 <> p1 WITH p1, p2, p1Vector, algo.similarity.asVector(statement, l.rating) AS p2Vector RETURN p1.public_nickname AS from, p2.public_nickname AS to, algo.similarity.pearson(p1Vector, p2Vector, {vectorType: "maps"}) AS similarity ORDER BY similarity DESC LIMIT toInt({k_neighbors}) ---- == Get two User and their connection :user_a: pass:a['<span value-key="user_a">Björn</span>'] :user_b: pass:a['<span value-key="user_b">Christian</span>'] ++++ User A: <input style="display:inline;width:30%;" value-for="user_a" class="form-control" value="Björn" size="40"> User B: <input style="display:inline;width:30%;" value-for="user_b" class="form-control" value="Christian" size="40"> ++++ [source, cypher,subs=attributes] ---- match (a:User{public_nickname:{user_a}}), (b:User{public_nickname:{user_b}}), (s:Statement) WHERE (a)-[:LIKES]->(s) or (b)-[:LIKES]->(s) return a,b,s ---- == Get Top-N Prediction for User with Pearson-Similarity + Weighted-Average + kNN :k_neighbors: pass:a['<span value-key="k_neighbors">5</span>'] :top_n: pass:a['<span value-key="top_n">5</span>'] :user: pass:a['<span value-key="user">Björn</span>'] ++++ Find Top-<input style="display:inline;width:30%;" value-for="top_n" class="form-control" value="5" size="40"> Predictions for <input style="display:inline;width:30%;" value-for="user" class="form-control" value="Björn" size="40"> in the <input style="display:inline;width:30%;" value-for="k_neighbors" class="form-control" value="5" size="40">-NN ++++ [source, cypher,subs=attributes] ---- MATCH (p1:User {public_nickname: {user}})-[l:LIKES]->(statement) WITH p1, algo.similarity.asVector(statement, l.rating) AS p1Vector MATCH (p2:User)-[l:LIKES]->(statement) WHERE p2 <> p1 WITH p1, p2, p1Vector, algo.similarity.asVector(statement, l.rating) AS p2Vector WITH p1 AS from, p2 AS to, algo.similarity.pearson(p1Vector, p2Vector, {vectorType: "maps"}) AS similarity ORDER BY similarity DESC limit toInt({k_neighbors}) MATCH (to)-[r:LIKES]->(s:Statement) WHERE NOT EXISTS((from)-[:LIKES]->(s)) RETURN from , to, s, sum(similarity * r.rating)/FILTER(x in [sum(abs(similarity)), 1] WHERE NOT x=0)[0] AS prediction ORDER BY prediction DESC LIMIT toInt({top_n}) ---- == Get Top-N Prediction for User with Pearson-Similarity + Mean-Centering + kNN :k_neighbors: pass:a['<span value-key="k_neighbors">5</span>'] :top_n: pass:a['<span value-key="top_n">5</span>'] :user: pass:a['<span value-key="user">Björn</span>'] ++++ Find Top-<input style="display:inline;width:30%;" value-for="top_n" class="form-control" value="5" size="40"> Predictions for <input style="display:inline;width:30%;" value-for="user" class="form-control" value="Björn" size="40"> in the <input style="display:inline;width:30%;" value-for="k_neighbors" class="form-control" value="5" size="40">-NN ++++ [source, cypher,subs=attributes] ---- MATCH (p1:User {public_nickname: {user}})-[l:LIKES]->(statement) WITH p1, algo.similarity.asVector(statement, l.rating) AS p1Vector, avg(l.rating) as u1_avg MATCH (p2:User)-[l:LIKES]->(statement) WHERE p2 <> p1 WITH p1, p2, u1_avg, p1Vector, algo.similarity.asVector(statement, l.rating) AS p2Vector, avg(l.rating) as u2_avg WITH p1 AS from, p2 AS to, algo.similarity.pearson(p1Vector, p2Vector, {vectorType: "maps"}) AS similarity, u1_avg, u2_avg ORDER BY similarity DESC limit toInt({k_neighbors}) MATCH (to)-[r:LIKES]->(s:Statement) WHERE NOT EXISTS((from)-[:LIKES]->(s)) RETURN from , to, s, u1_avg + sum(similarity * (r.rating-u2_avg))/FILTER(x in [sum(abs(similarity)), 1] WHERE NOT x=0)[0] AS prediction ORDER BY prediction DESC LIMIT toInt({top_n}) ---- == Get Top-N Prediction for User with Pearson-Similarity + Z-Score-Normalization + kNN :k_neighbors: pass:a['<span value-key="k_neighbors">5</span>'] :top_n: pass:a['<span value-key="top_n">5</span>'] :user: pass:a['<span value-key="user">Björn</span>'] ++++ Find Top-<input style="display:inline;width:30%;" value-for="top_n" class="form-control" value="5" size="40"> Predictions for <input style="display:inline;width:30%;" value-for="user" class="form-control" value="Björn" size="40"> in the <input style="display:inline;width:30%;" value-for="k_neighbors" class="form-control" value="5" size="40">-NN ++++ [source, cypher,subs=attributes] ---- MATCH (p1:User {public_nickname: {user}})-[l:LIKES]->(statement) WITH p1, algo.similarity.asVector(statement, l.rating) AS p1Vector, avg(l.rating) as u1_avg, stDev(l.rating) as u1_std MATCH (p2:User)-[l:LIKES]->(statement) WHERE p2 <> p1 WITH p1, p2, u1_avg, u1_std, p1Vector, algo.similarity.asVector(statement, l.rating) AS p2Vector, avg(l.rating) as u2_avg, stDev(l.rating) as u2_std WITH p1 AS from, p2 AS to, algo.similarity.pearson(p1Vector, p2Vector, {vectorType: "maps"}) AS similarity, u1_avg, u1_std, u2_avg, u2_std ORDER BY similarity DESC limit toInt({k_neighbors}) MATCH (to)-[r:LIKES]->(s:Statement) WHERE NOT EXISTS((from)-[:LIKES]->(s)) RETURN from , to, s, u1_avg + u1_std* sum(similarity*(r.rating-u2_avg)/u2_std)/FILTER(x in [sum(abs(similarity)), 1] WHERE NOT x=0)[0] AS prediction ORDER BY prediction DESC LIMIT toInt({top_n}) ----