From 633bc75634ed8cdc5cdf72b0baf187c7a23d7bf4 Mon Sep 17 00:00:00 2001
From: jaste111 <jan.steimann@hhu.de>
Date: Mon, 28 Jun 2021 10:57:45 +0200
Subject: [PATCH] Add S3 config and use real csv file

---
 .gitlab-ci.yml              | 7 +++++++
 ComputeCommentEmbeddings.py | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 65a605b..2c75d15 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,10 +1,17 @@
 image: "python:3.7"
 
+before_script:
+  - wget https://dl.min.io/client/mc/release/linux-amd64/mc
+  - chmod +x mc
+  - mc alias set S3Uni $MINIO_HOST $MINIO_ACCESS_KEY $MINIO_SECRET_KEY
+
 python_run:
   script:
     - pip install flair
     - pip install torch
     - pip install pandas
+    - mc cp S3Uni/steimann/CommentsApril2017.csv .
     - python3 ComputeCommentEmbeddings.py
+    - mc cp embedded_comments.pt S3Uni/steimann/
   tags:
     - "ht"
\ No newline at end of file
diff --git a/ComputeCommentEmbeddings.py b/ComputeCommentEmbeddings.py
index 93bf104..ed792af 100644
--- a/ComputeCommentEmbeddings.py
+++ b/ComputeCommentEmbeddings.py
@@ -18,6 +18,6 @@ def compute_embedding(comments: dict) -> dict:
 
 
 if __name__ == '__main__':
-    comments = get_argument_list(path="", filename="CommentsApril2017_DEV.csv")
+    comments = get_argument_list(path="", filename="CommentsApril2017.csv")
     comments = compute_embedding(comments)
     torch.save(comments, "embedded_comments.pt")
-- 
GitLab