-
Notifications
You must be signed in to change notification settings - Fork 2
/
create_posts.py
90 lines (69 loc) · 2.12 KB
/
create_posts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# -*- coding: utf-8 -*-
import json
""" Reads the lyrics corpus in JSON and create seperate files for _data and for _posts """
file_name_structure = "2020-04-18-item-DIV_ID.md"
post_structure = """---
layout: post
title: TITLE
performer: PERFORMER
dialect: Sorani
audio: "AUDIO"
categories:
- CATEGORY
comments: true
---
<div class="language-plaintext highlighter-rouge">
<div class="highlight">
<pre class="highlight">
<code>
{% for line in site.data.itemDIV_ID.item.lg %}
{{ line }}
{% endfor %}
</code>
</pre>
</div>
</div>
"""
json_structure = """
{
"item": {
"head": "TITLE",
"singer": "PERFORMER",
"audio": "AUDIO",
"lg": [TEXT]
}
}
"""
with open("KurdishLyricsCorpus.json") as f:
corpus = json.load(f)
with open("else/audio_indices.tsv", "r") as f:
audio_indices = {i.split("\t")[0]: i.split("\t")[2] for i in f.read().split("\n")}
for item in corpus["lyrics"]:
item_post_file = post_structure
item_json_file = json_structure
item_json_content = list()
lg = item["div"]
# assign data
for key, value in {"TITLE": lg["head"], "PERFORMER": lg["singer"], "AUDIO": lg["audio"]}.items():
if not len(value):
value = "unassigned"
item_post_file = item_post_file.replace(key, value)
item_json_file = item_json_file.replace(key, value)
# assign lyrics text
for line in lg["lg"]:
if type(line) == str:
item_json_content.append(line)
elif type(line) == list:
item_json_content.extend(line)
elif type(line) == dict:
item_json_content.append(line["l"])
item_json_file = item_json_file.replace("TEXT", ',\n\t'.join("\"{0}\"".format(x) for x in item_json_content))
# assign category and ID
item_post_file = item_post_file.replace("CATEGORY", audio_indices[item["@id"]])
item_post_file = item_post_file.replace("DIV_ID", item["@id"])
item_json_file = item_json_file.replace("DIV_ID", item["@id"])
# save files
with open("_posts/%s"%file_name_structure.replace("DIV_ID", item["@id"]), 'w', encoding='utf8') as f:
f.write(item_post_file)
with open("_data/item%s.json"%item["@id"], 'w', encoding='utf8') as f:
f.write(item_json_file)