Skip to content
Snippets Groups Projects
Commit ee1e6130 authored by Marc Feger's avatar Marc Feger
Browse files

Add check for valid published data; move melter

parent 5378974b
No related branches found
No related tags found
No related merge requests found
from tokenize import String from tokenize import String
from typing import Dict, List from typing import Dict, List
from melter import melt_wikidata_and_dbpedia, clean_wikidata from src.melter import melt_wikidata_and_dbpedia, clean_wikidata
from src import WIKIDATA_ENTRYPOINT, DBPEDIA_ENTRYPOINT from src import WIKIDATA_ENTRYPOINT, DBPEDIA_ENTRYPOINT
from src.lib.composer import Composer from src.lib.composer import Composer
from src.lib.decapper import Decapper from src.lib.decapper import Decapper
......
...@@ -149,6 +149,7 @@ def clean_wikidata() -> List: ...@@ -149,6 +149,7 @@ def clean_wikidata() -> List:
for data in melted_data: for data in melted_data:
movie = Decapper(data[ResultKeys.movie.value]).unpack() movie = Decapper(data[ResultKeys.movie.value]).unpack()
duration = Decapper(data[ResultKeys.duration.value]).unpack() duration = Decapper(data[ResultKeys.duration.value]).unpack()
published = Decapper(data[ResultKeys.published.value]).unpack()
if movie in bad_films: if movie in bad_films:
continue continue
if duration: if duration:
...@@ -156,6 +157,11 @@ def clean_wikidata() -> List: ...@@ -156,6 +157,11 @@ def clean_wikidata() -> List:
continue continue
if not all([(int(i) >= 0) for i in duration.split(ResultKeys.line_separator.value)]): if not all([(int(i) >= 0) for i in duration.split(ResultKeys.line_separator.value)]):
continue continue
if published:
if not all(
[(i[10] == 'T' and i[19] == 'Z' and i.count(':') == 2 and i.count('-') == 2)
for i in published.split(ResultKeys.line_separator.value)]):
continue
cleaned += [data] cleaned += [data]
FileWriter(destination='./static/wikidata/cleaned.txt', data=cleaned).as_json_of_list() FileWriter(destination='./static/wikidata/cleaned.txt', data=cleaned).as_json_of_list()
return cleaned return cleaned
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment