/
main.py
129 lines (101 loc) · 4.02 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import os, sys, logging
from typing import List
import frontmatter
import pandas as pd
import yaml
LICENSE_TXT = """
metadata_to_csv Copyright (C) 2024 Maxime Bonin
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
"""
PRODUCT_ASCII = """
_ _ _ _
_ __ ___ ___| |_ __ _ __| | __ _| |_ __ _ | |_ ___ ___ _____ __
| '_ ` _ \ / _ \ __/ _` |/ _` |/ _` | __/ _` | | __/ _ \ / __/ __\ \ / /
| | | | | | __/ || (_| | (_| | (_| | || (_| | | || (_) | | (__\__ \\ V /
|_| |_| |_|\___|\__\__,_|\__,_|\__,_|\__\__,_|___\__\___/___\___|___/ \_/
|_____| |_____|
"""
def get_all_files_from(directory: str, allowed_extensions: List[str]) -> List[str]:
"""
Walks given directory tree and returns all files path in a list
Parameters:
- directory (str): name of the directory to extract files from
- allowed_extensions List(str): list of allowed extensions.
Returns:
List[str]: a list of the path to all files contained within given directory
"""
files = []
for root, _, filenames in os.walk(directory):
for filename in filenames:
_, file_extension = os.path.splitext(filename)
if file_extension.lower() in allowed_extensions:
full_path = os.path.join(root, filename)
files.append(os.path.abspath(full_path))
return files
def get_all_metadata_from(files: List[str], encoding: str) -> List:
"""
Discovers keys from frontmatter and extracts all of its metadata
Parameters:
- files: List of strings that are absolute paths toward files
Returns:
- List of metadata objects (dictionaries)
"""
metadata_list = []
for file in files:
with open(file, encoding=encoding) as f:
metadata_obj = {}
content = frontmatter.load(f)
for key in sorted(content.keys()):
metadata_obj[key] = content[key]
metadata_obj["filepath"] = file
metadata_list.append(metadata_obj)
return metadata_list
def main(dir_path: str, configs: dict) -> None:
files = get_all_files_from(
dir_path, allowed_extensions=configs["allowed_extensions"]
)
metadata_list = get_all_metadata_from(files=files, encoding=configs["encoding"])
df = pd.DataFrame(metadata_list)
df.to_csv(configs["csv_file_name"], index=False)
if __name__ == "__main__":
print(PRODUCT_ASCII)
print(LICENSE_TXT)
try:
files_directory_name = sys.argv[1]
except:
print("No root directory argument was given as first parameter")
print("root file directory(within current directory) name is expected\r")
sys.exit(1)
try:
with open("config.yml", "r") as config_file:
configs = yaml.load(config_file, Loader=yaml.FullLoader)
logging.basicConfig(
level=(
logging.INFO
if configs["logging_level"] == "INFO"
else logging.ERROR
),
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
except Exception as e:
logging.error("could not read configuration file config.yml and set logger")
sys.exit(1)
try:
csv_filename = sys.argv[2]
except:
logging.info(
f"no csv_filename provided as second argument. will use default value: {configs['csv_file_name']}"
)
try:
main(f"{os.getcwd()}{os.sep}{files_directory_name}", configs=configs)
except Exception as e:
logging.error(f"could not produce CSV file")
logging.error(f"details: {str(e)}", exc_info=True)
sys.exit(1)
logging.info(
f'find your CSV metadata file at: {os.getcwd()}{os.sep}{configs["csv_file_name"]}'
)
sys.exit(0)