Skip to content

Commit e097d0a

Browse files
committed
Update __init__.py
1 parent 49d516d commit e097d0a

File tree

1 file changed

+259
-1
lines changed

1 file changed

+259
-1
lines changed

vcpkg_sbom/__init__.py

Lines changed: 259 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,266 @@
1+
import argparse
2+
import collections
3+
import datetime
4+
import hashlib
5+
import io
6+
import json
7+
import os
8+
import pathlib
9+
import re
10+
import typing
11+
112
import rich
13+
import spdx_tools.spdx.document_utils
14+
import spdx_tools.spdx.model as spdx
15+
import spdx_tools.spdx.parser.parse_anything
16+
import spdx_tools.spdx.spdx_element_utils
17+
import spdx_tools.spdx.validation
18+
import spdx_tools.spdx.validation.uri_validators
19+
import spdx_tools.spdx.writer.write_anything
20+
import toolz
21+
22+
23+
# NOTE: from https://github.com/Textualize/rich/discussions/482
24+
class TableProgress(rich.Progress):
25+
def __init__(self, table_max_rows: int, name: str, *args, **kwargs) -> None:
26+
self.results = collections.deque(maxlen=table_max_rows)
27+
self.name = name
28+
self.update_table()
29+
super().__init__(*args, **kwargs)
30+
31+
def update_table(self, result: typing.Optional[typing.Tuple[str]] = None):
32+
if result is not None:
33+
self.results.append(result)
34+
table = rich.Table()
35+
table.add_column("ID")
36+
table.add_column(self.name, width=80)
37+
38+
for row_cells in self.results:
39+
table.add_row(*row_cells)
40+
41+
self.table = table
42+
43+
def get_renderable(
44+
self,
45+
) -> typing.Union[rich.console.ConsoleRenderable, rich.console.RichCast, str]:
46+
renderable = rich.console.Group(self.table, *self.get_renderables())
47+
return renderable
48+
49+
50+
def _add_vcpkg_spdx(
51+
doc: spdx.Document,
52+
spdx_json_paths: typing.Sequence[pathlib.Path],
53+
pbar: TableProgress,
54+
) -> typing.Mapping[str, str]:
55+
unique_ids = collections.defaultdict(lambda: 0)
56+
license_info = {}
57+
total_spdx = len(spdx_json_paths)
58+
task = pbar.add_task("Merging spdx: ", total=total_spdx)
59+
for idx, spdx_json_path in enumerate(spdx_json_paths):
60+
spdx_i = spdx_tools.spdx.parser.parse_anything.parse_file(str(spdx_json_path))
61+
62+
temp_ids = {}
63+
for spdx_pkg in spdx_i.packages:
64+
if not isinstance(
65+
spdx_pkg.download_location, (spdx.SpdxNoAssertion, spdx.SpdxNone)
66+
) and spdx_tools.spdx.validation.uri_validators.validate_uri(
67+
spdx_pkg.download_location
68+
):
69+
if spdx_pkg.download_location == "git+@":
70+
spdx_pkg.download_location = spdx.SpdxNoAssertion()
71+
temp_ids[spdx_pkg.spdx_id] = unique_ids[spdx_pkg.spdx_id]
72+
unique_ids[spdx_pkg.spdx_id] += 1
73+
spdx_pkg.spdx_id = f"{spdx_pkg.spdx_id}-{temp_ids[spdx_pkg.spdx_id]}"
74+
license_info[spdx_pkg.name] = str(spdx_pkg.license_concluded)
75+
76+
for spdx_file in spdx_i.files:
77+
algos = set(
78+
toolz.map(
79+
lambda c: c.algorithm,
80+
toolz.unique(spdx_file.checksums, lambda c: c.algorithm),
81+
)
82+
)
83+
if spdx.ChecksumAlgorithm.SHA1 not in algos:
84+
with open(pathlib.Path(spdx_file.name), "rb") as f:
85+
h = hashlib.new("sha1")
86+
h.update(f.read())
87+
digest = h.hexdigest()
88+
# digest = hashlib.file_digest(f, "sha1").hexdigest() # only valid in py > 3.11
89+
spdx_file.checksums.append(
90+
spdx.Checksum(spdx.ChecksumAlgorithm.SHA1, digest)
91+
)
92+
if not spdx_file.license_info_in_file:
93+
spdx_file.license_info_in_file = [spdx.SpdxNoAssertion()]
94+
temp_ids[spdx_file.spdx_id] = unique_ids[spdx_file.spdx_id]
95+
unique_ids[spdx_file.spdx_id] += 1
96+
spdx_file.spdx_id = f"{spdx_file.spdx_id}-{temp_ids[spdx_file.spdx_id]}"
97+
98+
for spdx_rel in spdx_i.relationships:
99+
spdx_rel.spdx_element_id = (
100+
f"{spdx_rel.spdx_element_id}-{temp_ids[spdx_rel.spdx_element_id]}"
101+
)
102+
spdx_rel.related_spdx_element_id = f"{spdx_rel.related_spdx_element_id}-{temp_ids[spdx_rel.related_spdx_element_id]}"
103+
104+
## merge
105+
doc.packages.extend(spdx_i.packages)
106+
doc.files.extend(spdx_i.files)
107+
# Add 'DESCRIBES' relationship between master and child documents, then import all relationships in child docs
108+
relationship = spdx.Relationship(
109+
doc.creation_info.spdx_id,
110+
spdx.RelationshipType.DESCRIBES,
111+
spdx_i.creation_info.spdx_id,
112+
)
113+
doc.relationships.append(relationship)
114+
doc.relationships.extend(spdx_i.relationships)
115+
doc.snippets.extend(spdx_i.snippets)
116+
doc.extracted_licensing_info.extend(spdx_i.extracted_licensing_info)
117+
doc.annotations.extend(spdx_i.annotations)
118+
119+
pbar.update(task, advance=1)
120+
pbar.update_table((f"{idx}", f"{spdx_json_path.parts[-2]}"))
121+
return license_info
122+
123+
124+
def _add_licenses(
125+
writer: io.TextIOBase,
126+
license_paths: typing.Sequence[pathlib.Path],
127+
pbar: TableProgress,
128+
) -> None:
129+
total_licenses = len(license_paths)
130+
task = pbar.add_task("Merging copyrights: ", total=total_licenses)
131+
for idx, license_path in enumerate(license_paths):
132+
pkg_name = license_path.parts[-2]
133+
writer.writelines(
134+
[
135+
os.linesep,
136+
"*" * 80,
137+
os.linesep,
138+
pkg_name,
139+
os.linesep,
140+
"*" * 80,
141+
os.linesep,
142+
]
143+
)
144+
with open(license_path, "r") as f:
145+
writer.write(f.read())
146+
pbar.update(task, advance=1)
147+
148+
149+
def _parse_args():
150+
parser = argparse.ArgumentParser(
151+
"vcpkg-sbom",
152+
"A software bill of materials extracter and merger for `vcpkg` manifest projects.",
153+
)
154+
parser.add_argument(
155+
"vcpkg_root",
156+
type=str,
157+
help="Path to the `vcpkg_installed` folder of your manifest project.",
158+
)
159+
parser.add_argument(
160+
"-t",
161+
"--triplet",
162+
type=str,
163+
default="x64-windows",
164+
help="The `vcpkg` triplet to use.",
165+
)
166+
parser.add_argument(
167+
"-p",
168+
"--project",
169+
type=str,
170+
default="project",
171+
help="The project's name that will be used for the merged output files.",
172+
)
173+
parser.add_argument(
174+
"-n",
175+
"--namespace",
176+
type=str,
177+
default="spdx.org/spdxdocs",
178+
help="The software's namespace to use for the `spdx` file.",
179+
)
180+
parser.add_argument(
181+
"-o",
182+
"--organization",
183+
type=str,
184+
default="org",
185+
help="The organization or company name to use for the `spdx` file.",
186+
)
187+
parser.add_argument(
188+
"-e",
189+
"--email",
190+
type=str,
191+
default="info@org.com",
192+
help="The email to use for the `spdx` file.",
193+
)
194+
parser.add_argument(
195+
"-c",
196+
"--copyright",
197+
action="store_true",
198+
help="Additionally extract and merge all copyright files in a `*.txt` file.",
199+
)
200+
parser.add_argument(
201+
"-l",
202+
"--license",
203+
action="store_true",
204+
help="Additionally extract and merge all license types in a `*.json` file.",
205+
)
206+
return parser.parse_args()
2207

3208

4209
def run():
5-
rich.print("vcpkg-sbom")
210+
args = _parse_args()
211+
vcpkg_triplet_path = pathlib.Path(args.vcpkg_root) / pathlib.Path(args.triplet)
212+
213+
spdx_json_paths = set()
214+
for spdx_json_path in vcpkg_triplet_path.glob("**/share/**/*.spdx.json"):
215+
# *_, inner_triplet, _, pkg_name, __ = spdx_json_path.parts
216+
spdx_json_paths.add(spdx_json_path)
217+
218+
total_spdx = len(spdx_json_paths)
219+
with TableProgress(table_max_rows=total_spdx, name="Package") as pbar:
220+
actor = spdx.Actor(spdx.ActorType.ORGANIZATION, args.organization, args.email)
221+
merged = spdx.Document(
222+
spdx.CreationInfo(
223+
"SPDX-2.3",
224+
"SPDXRef-DOCUMENT",
225+
args.project,
226+
args.namespace,
227+
[actor],
228+
datetime.datetime.now(),
229+
)
230+
)
231+
license_info = _add_vcpkg_spdx(merged, spdx_json_paths, pbar)
232+
console = rich.console.Console()
233+
with console.status(
234+
f"[bold green]Validating & writing `{args.project}.spdx.json` ...",
235+
spinner="circle",
236+
) as status:
237+
spdx_tools.spdx.writer.write_anything.write_file(
238+
merged, f"{args.project}.spdx.json", validate=True
239+
)
240+
241+
with rich.progress.Progress() as pbar:
242+
if args.copyright:
243+
rich.print("Extracting & merging copyrights ...")
244+
copyright_paths = set()
245+
for copyright_path in vcpkg_triplet_path.glob("**/share/**/copyright"):
246+
# *_, inner_triplet, _, pkg_name, __ = copyright_path.parts
247+
copyright_paths.add(copyright_path)
248+
with open(f"{args.project}_EULA.txt", "w") as copyright_file:
249+
_add_licenses(copyright_file, copyright_paths, pbar)
250+
251+
if args.license:
252+
unique_licenses = set()
253+
pattern = re.compile(r"AND|OR")
254+
for license in license_info.values():
255+
if "NOASSERTION" not in license:
256+
splits = pattern.split(license)
257+
for split in splits:
258+
unique_licenses.add(re.sub("[()]", "", split.strip()))
259+
with open(f"{args.project}_license_info.json", "w") as license_info_file:
260+
json.dump(
261+
{"unique": list(unique_licenses), "per_package": license_info},
262+
license_info_file,
263+
)
6264

7265

8266
if __name__ == "__main__":

0 commit comments

Comments
 (0)