Skip to content
Snippets Groups Projects
Commit e5f3b5e7 authored by Marc Feger's avatar Marc Feger
Browse files

Add Mean-Centering and Z-Score-Normalization for K-NN with Pearson-Correlation

parent 3052144e
No related branches found
No related tags found
No related merge requests found
...@@ -258,9 +258,62 @@ MATCH (p2:User)-[l:LIKES]->(statement) WHERE p2 <> p1 ...@@ -258,9 +258,62 @@ MATCH (p2:User)-[l:LIKES]->(statement) WHERE p2 <> p1
WITH p1, p2, p1Vector, algo.similarity.asVector(statement, l.rating) AS p2Vector WITH p1, p2, p1Vector, algo.similarity.asVector(statement, l.rating) AS p2Vector
WITH p1 AS from, p2 AS to, algo.similarity.pearson(p1Vector, p2Vector, {vectorType: "maps"}) AS similarity WITH p1 AS from, p2 AS to, algo.similarity.pearson(p1Vector, p2Vector, {vectorType: "maps"}) AS similarity
ORDER BY similarity DESC limit toInt({k_neighbors}) ORDER BY similarity DESC limit toInt({k_neighbors})
WHERE similarity <> 0.0
MATCH (to)-[r:LIKES]->(s:Statement) WHERE NOT EXISTS((from)-[:LIKES]->(s)) MATCH (to)-[r:LIKES]->(s:Statement) WHERE NOT EXISTS((from)-[:LIKES]->(s))
return from , to, s, sum(similarity * r.rating)/sum(abs(similarity)) as prediction RETURN from , to, s, sum(similarity * r.rating)/FILTER(x in [sum(abs(similarity)), 1] WHERE NOT x=0)[0] AS prediction
ORDER BY prediction DESC LIMIT toInt({top_n})
----
== Get Top-N Prediction for User with Pearson-Similarity + Mean-Centering + kNN
:k_neighbors: pass:a['<span value-key="k_neighbors">5</span>']
:top_n: pass:a['<span value-key="top_n">5</span>']
:user: pass:a['<span value-key="user">Björn</span>']
++++
Find Top-<input style="display:inline;width:30%;" value-for="top_n" class="form-control" value="5" size="40"> Predictions for <input
style="display:inline;width:30%;" value-for="user" class="form-control" value="Björn" size="40"> in the <input style="display:inline;width:30%;"
value-for="k_neighbors" class="form-control" value="5" size="40">-NN
++++
[source, cypher,subs=attributes]
----
MATCH (p1:User {public_nickname: {user}})-[l:LIKES]->(statement)
WITH p1, algo.similarity.asVector(statement, l.rating) AS p1Vector, avg(l.rating) as u1_avg
MATCH (p2:User)-[l:LIKES]->(statement) WHERE p2 <> p1
WITH p1, p2, u1_avg, p1Vector, algo.similarity.asVector(statement, l.rating) AS p2Vector, avg(l.rating) as u2_avg
WITH p1 AS from, p2 AS to, algo.similarity.pearson(p1Vector, p2Vector, {vectorType: "maps"}) AS similarity, u1_avg, u2_avg
ORDER BY similarity DESC limit toInt({k_neighbors})
MATCH (to)-[r:LIKES]->(s:Statement) WHERE NOT EXISTS((from)-[:LIKES]->(s))
RETURN from , to, s, u1_avg + sum(similarity * (r.rating-u2_avg))/FILTER(x in [sum(abs(similarity)), 1] WHERE NOT x=0)[0] AS prediction
ORDER BY prediction DESC LIMIT toInt({top_n})
----
== Get Top-N Prediction for User with Pearson-Similarity + Z-Score-Normalization + kNN
:k_neighbors: pass:a['<span value-key="k_neighbors">5</span>']
:top_n: pass:a['<span value-key="top_n">5</span>']
:user: pass:a['<span value-key="user">Björn</span>']
++++
Find Top-<input style="display:inline;width:30%;" value-for="top_n" class="form-control" value="5" size="40"> Predictions for <input
style="display:inline;width:30%;" value-for="user" class="form-control" value="Björn" size="40"> in the <input style="display:inline;width:30%;"
value-for="k_neighbors" class="form-control" value="5" size="40">-NN
++++
[source, cypher,subs=attributes]
----
MATCH (p1:User {public_nickname: {user}})-[l:LIKES]->(statement)
WITH p1, algo.similarity.asVector(statement, l.rating) AS p1Vector, avg(l.rating) as u1_avg, stDev(l.rating) as u1_std
MATCH (p2:User)-[l:LIKES]->(statement) WHERE p2 <> p1
WITH p1, p2, u1_avg, u1_std, p1Vector, algo.similarity.asVector(statement, l.rating) AS p2Vector, avg(l.rating) as u2_avg, stDev(l.rating) as u2_std
WITH p1 AS from, p2 AS to, algo.similarity.pearson(p1Vector, p2Vector, {vectorType: "maps"}) AS similarity, u1_avg, u1_std, u2_avg, u2_std
ORDER BY similarity DESC limit toInt({k_neighbors})
MATCH (to)-[r:LIKES]->(s:Statement) WHERE NOT EXISTS((from)-[:LIKES]->(s))
RETURN from , to, s, u1_avg + u1_std* sum(similarity*(r.rating-u2_avg)/u2_std)/FILTER(x in [sum(abs(similarity)), 1] WHERE NOT x=0)[0] AS prediction
ORDER BY prediction DESC LIMIT toInt({top_n}) ORDER BY prediction DESC LIMIT toInt({top_n})
---- ----
\ No newline at end of file
...@@ -376,10 +376,67 @@ MATCH (p2:User)-[l:LIKES]->(statement) WHERE p2 <> p1 ...@@ -376,10 +376,67 @@ MATCH (p2:User)-[l:LIKES]->(statement) WHERE p2 <> p1
WITH p1, p2, p1Vector, algo.similarity.asVector(statement, l.rating) AS p2Vector WITH p1, p2, p1Vector, algo.similarity.asVector(statement, l.rating) AS p2Vector
WITH p1 AS from, p2 AS to, algo.similarity.pearson(p1Vector, p2Vector, {vectorType: "maps"}) AS similarity WITH p1 AS from, p2 AS to, algo.similarity.pearson(p1Vector, p2Vector, {vectorType: "maps"}) AS similarity
ORDER BY similarity DESC limit toInt('<span value-key="k_neighbors">5</span>') ORDER BY similarity DESC limit toInt('<span value-key="k_neighbors">5</span>')
WHERE similarity <> 0.0
MATCH (to)-[r:LIKES]->(s:Statement) WHERE NOT EXISTS((from)-[:LIKES]->(s)) MATCH (to)-[r:LIKES]->(s:Statement) WHERE NOT EXISTS((from)-[:LIKES]->(s))
return from , to, s, sum(similarity * r.rating)/sum(abs(similarity)) as prediction RETURN from , to, s, sum(similarity * r.rating)/FILTER(x in [sum(abs(similarity)), 1] WHERE NOT x=0)[0] AS prediction
ORDER BY prediction DESC LIMIT toInt('<span value-key="top_n">5</span>')<!--/code--></pre>
</div>
</div>
</div>
</div>
</slide>
<slide class="row-fluid">
<div class="col-sm-12">
<h3>Get Top-N Prediction for User with Pearson-Similarity + Mean-Centering + kNN</h3>
<br/>
<div>
Find Top-<input style="display:inline;width:30%;" value-for="top_n" class="form-control" value="5" size="40"> Predictions for <input
style="display:inline;width:30%;" value-for="user" class="form-control" value="Björn" size="40"> in the <input style="display:inline;width:30%;"
value-for="k_neighbors" class="form-control" value="5" size="40">-NN
<div class="listingblock">
<div class="content">
<pre mode="cypher" class="highlight pre-scrollable programlisting cm-s-neo code runnable standalone-example ng-binding" data-lang="cypher" lang="cypher"><!--code class="cypher language-cypher"-->MATCH (p1:User {public_nickname: '<span value-key="user">Björn</span>'})-[l:LIKES]->(statement)
WITH p1, algo.similarity.asVector(statement, l.rating) AS p1Vector, avg(l.rating) as u1_avg
MATCH (p2:User)-[l:LIKES]->(statement) WHERE p2 <> p1
WITH p1, p2, u1_avg, p1Vector, algo.similarity.asVector(statement, l.rating) AS p2Vector, avg(l.rating) as u2_avg
WITH p1 AS from, p2 AS to, algo.similarity.pearson(p1Vector, p2Vector, {vectorType: "maps"}) AS similarity, u1_avg, u2_avg
ORDER BY similarity DESC limit toInt('<span value-key="k_neighbors">5</span>')
MATCH (to)-[r:LIKES]->(s:Statement) WHERE NOT EXISTS((from)-[:LIKES]->(s))
RETURN from , to, s, u1_avg + sum(similarity * (r.rating-u2_avg))/FILTER(x in [sum(abs(similarity)), 1] WHERE NOT x=0)[0] AS prediction
ORDER BY prediction DESC LIMIT toInt('<span value-key="top_n">5</span>')<!--/code--></pre>
</div>
</div>
</div>
</div>
</slide>
<slide class="row-fluid">
<div class="col-sm-12">
<h3>Get Top-N Prediction for User with Pearson-Similarity + Z-Score-Normalization + kNN</h3>
<br/>
<div>
Find Top-<input style="display:inline;width:30%;" value-for="top_n" class="form-control" value="5" size="40"> Predictions for <input
style="display:inline;width:30%;" value-for="user" class="form-control" value="Björn" size="40"> in the <input style="display:inline;width:30%;"
value-for="k_neighbors" class="form-control" value="5" size="40">-NN
<div class="listingblock">
<div class="content">
<pre mode="cypher" class="highlight pre-scrollable programlisting cm-s-neo code runnable standalone-example ng-binding" data-lang="cypher" lang="cypher"><!--code class="cypher language-cypher"-->MATCH (p1:User {public_nickname: '<span value-key="user">Björn</span>'})-[l:LIKES]->(statement)
WITH p1, algo.similarity.asVector(statement, l.rating) AS p1Vector, avg(l.rating) as u1_avg, stDev(l.rating) as u1_std
MATCH (p2:User)-[l:LIKES]->(statement) WHERE p2 <> p1
WITH p1, p2, u1_avg, u1_std, p1Vector, algo.similarity.asVector(statement, l.rating) AS p2Vector, avg(l.rating) as u2_avg, stDev(l.rating) as u2_std
WITH p1 AS from, p2 AS to, algo.similarity.pearson(p1Vector, p2Vector, {vectorType: "maps"}) AS similarity, u1_avg, u1_std, u2_avg, u2_std
ORDER BY similarity DESC limit toInt('<span value-key="k_neighbors">5</span>')
MATCH (to)-[r:LIKES]->(s:Statement) WHERE NOT EXISTS((from)-[:LIKES]->(s))
RETURN from , to, s, u1_avg + u1_std* sum(similarity*(r.rating-u2_avg)/u2_std)/FILTER(x in [sum(abs(similarity)), 1] WHERE NOT x=0)[0] AS prediction
ORDER BY prediction DESC LIMIT toInt('<span value-key="top_n">5</span>')<!--/code--></pre> ORDER BY prediction DESC LIMIT toInt('<span value-key="top_n">5</span>')<!--/code--></pre>
</div> </div>
</div> </div>
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment