collect_files.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. #!/usr/bin/env python3
  2. #
  3. # Copyright (C) 2019 Intel Corporation. All rights reserved.
  4. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  5. #
  6. """
  7. The script operates on such directories and files
  8. |-- core
  9. | `-- deps
  10. | |-- emscripten
  11. |-- samples
  12. | `-- workloads
  13. | |-- include
  14. `-- test-tools
  15. |-- pick-up-emscripten_headers
  16. | |-- collect_files.py
  17. """
  18. import argparse
  19. import hashlib
  20. import logging
  21. import os
  22. import pathlib
  23. import shutil
  24. import sys
  25. import tarfile
  26. import tempfile
  27. import urllib
  28. import urllib.request
  29. logger = logging.getLogger("pick-up-emscripten-headers")
  30. external_repos = {
  31. "emscripten": {
  32. "sha256": "c5524755b785d8f4b83eb3214fdd3ac4b2e1b1a4644df4c63f06e5968f48f90e",
  33. "store_dir": "core/deps/emscripten",
  34. "strip_prefix": "emscripten-3.0.0",
  35. "url": "https://github.com/emscripten-core/emscripten/archive/refs/tags/3.0.0.tar.gz",
  36. }
  37. }
  38. # TODO: can we use headers from wasi-libc and clang directly ?
  39. emscripten_headers_src_dst = [
  40. ("include/compat/emmintrin.h", "sse/emmintrin.h"),
  41. ("include/compat/immintrin.h", "sse/immintrin.h"),
  42. ("include/compat/smmintrin.h", "sse/smmintrin.h"),
  43. ("include/compat/xmmintrin.h", "sse/xmmintrin.h"),
  44. ("lib/libc/musl/include/pthread.h", "libc/musl/pthread.h"),
  45. ("lib/libc/musl/include/signal.h", "libc/musl/signal.h"),
  46. ("lib/libc/musl/include/netdb.h", "libc/musl/netdb.h"),
  47. ("lib/libc/musl/include/sys/wait.h", "libc/musl/sys/wait.h"),
  48. ("lib/libc/musl/include/sys/socket.h", "libc/musl/sys/socket.h"),
  49. ("lib/libc/musl/include/setjmp.h", "libc/musl/setjmp.h"),
  50. ("lib/libc/musl/arch/emscripten/bits/setjmp.h", "libc/musl/bits/setjmp.h"),
  51. ]
  52. def checksum(name, local_file):
  53. sha256 = hashlib.sha256()
  54. with open(local_file, "rb") as f:
  55. bytes = f.read(4096)
  56. while bytes:
  57. sha256.update(bytes)
  58. bytes = f.read(4096)
  59. return sha256.hexdigest() == external_repos[name]["sha256"]
  60. def download(url, local_file):
  61. logger.debug(f"download from {url}")
  62. urllib.request.urlretrieve(url, local_file)
  63. return local_file.exists()
  64. def unpack(tar_file, strip_prefix, dest_dir):
  65. # extract .tar.gz to /tmp, then move back without strippred prefix directories
  66. with tempfile.TemporaryDirectory() as tmp:
  67. with tarfile.open(tar_file) as tar:
  68. logger.debug(f"extract to {tmp}")
  69. def is_within_directory(directory, target):
  70. abs_directory = os.path.abspath(directory)
  71. abs_target = os.path.abspath(target)
  72. prefix = os.path.commonprefix([abs_directory, abs_target])
  73. return prefix == abs_directory
  74. def safe_extract(tar, path=".", members=None, *, numeric_owner=False):
  75. for member in tar.getmembers():
  76. member_path = os.path.join(path, member.name)
  77. if not is_within_directory(path, member_path):
  78. raise Exception("Attempted Path Traversal in Tar File")
  79. tar.extractall(path, members, numeric_owner=numeric_owner)
  80. safe_extract(tar, tmp)
  81. strip_prefix_dir = (
  82. pathlib.Path(tmp).joinpath(strip_prefix + os.path.sep).resolve()
  83. )
  84. if not strip_prefix_dir.exists():
  85. logger.error(f"extract {tar_file.name} failed")
  86. return False
  87. # mv /tmp/${strip_prefix} dest_dir/*
  88. logger.debug(f"move {strip_prefix_dir} to {dest_dir}")
  89. shutil.copytree(
  90. str(strip_prefix_dir),
  91. str(dest_dir),
  92. copy_function=shutil.move,
  93. dirs_exist_ok=True,
  94. )
  95. return True
  96. def download_repo(name, root):
  97. if not name in external_repos:
  98. logger.error(f"{name} is not a known repository")
  99. return False
  100. store_dir = root.joinpath(f'{external_repos[name]["store_dir"]}').resolve()
  101. download_flag = store_dir.joinpath("DOWNLOADED")
  102. if store_dir.exists() and download_flag.exists():
  103. logger.info(
  104. f"bypass downloading '{store_dir.relative_to(root)}'. Or to remove it and try again if needs a new release"
  105. )
  106. return True
  107. # download only when the target is neither existed nor broken
  108. download_dir = pathlib.Path("/tmp/pick-up-emscripten-headers/")
  109. download_dir.mkdir(exist_ok=True)
  110. tar_name = pathlib.Path(external_repos[name]["url"]).name
  111. tar_file = download_dir.joinpath(tar_name)
  112. if tar_file.exists():
  113. if checksum(name, tar_file):
  114. logger.debug(f"use pre-downloaded {tar_file}")
  115. else:
  116. logger.debug(f"{tar_file} is broken, remove it")
  117. tar_file.unlink()
  118. if not tar_file.exists():
  119. if not download(external_repos[name]["url"], tar_file) or not checksum(
  120. name, tar_file
  121. ):
  122. logger.error(f"download {name} failed")
  123. return False
  124. # unpack and removing *strip_prefix*
  125. if not unpack(tar_file, external_repos[name]["strip_prefix"], store_dir):
  126. return False
  127. # leave a FLAG
  128. download_flag.touch()
  129. # leave download files in /tmp
  130. logger.info(f"Has downloaded and stored in {store_dir.relative_to(root)}")
  131. return True
  132. def collect_headers(root, install_location):
  133. if not install_location.exists():
  134. logger.error(f"{install_location} does not found")
  135. return False
  136. install_flag = install_location.joinpath("INSTALLED").resolve()
  137. if install_flag.exists():
  138. logger.info(
  139. f"bypass downloading '{install_location}'. Or to remove it and try again if needs a new one"
  140. )
  141. return True
  142. emscripten_home = root.joinpath(
  143. f'{external_repos["emscripten"]["store_dir"]}'
  144. ).resolve()
  145. if not emscripten_home.exists():
  146. logger.error(f"{emscripten_home} does not found")
  147. return False
  148. emscripten_headers = emscripten_home.joinpath("system").resolve()
  149. for (src, dst) in emscripten_headers_src_dst:
  150. src = emscripten_headers.joinpath(src)
  151. dst = install_location.joinpath(dst)
  152. dst.parent.mkdir(parents=True, exist_ok=True)
  153. shutil.copy(src, dst)
  154. install_flag.touch()
  155. logger.info(f"Has installed in {install_location}")
  156. return True
  157. def main():
  158. parser = argparse.ArgumentParser(
  159. description="collect headers from emscripten for workload compilation"
  160. )
  161. parser.add_argument(
  162. "--install",
  163. type=str,
  164. required=True,
  165. help="identify installation location",
  166. )
  167. parser.add_argument(
  168. "--loglevel",
  169. type=str,
  170. default="INFO",
  171. choices=[
  172. "ERROR",
  173. "WARNING",
  174. "INFO",
  175. ],
  176. help="the logging level",
  177. )
  178. options = parser.parse_args()
  179. console = logging.StreamHandler()
  180. console.setFormatter(logging.Formatter("%(asctime)s - %(message)s"))
  181. logger.setLevel(getattr(logging, options.loglevel))
  182. logger.addHandler(console)
  183. logger.propagate = False
  184. # locate the root of WAMR
  185. current_file = pathlib.Path(__file__)
  186. if current_file.is_symlink():
  187. current_file = pathlib.Path(os.readlink(current_file))
  188. root = current_file.parent.joinpath("../..").resolve()
  189. logger.info(f"The root of WAMR is {root}")
  190. # download repos
  191. for repo in external_repos.keys():
  192. if not download_repo(repo, root):
  193. return False
  194. if not collect_headers(root, pathlib.Path(options.install)):
  195. return False
  196. return True
  197. if __name__ == "__main__":
  198. sys.exit(0 if main() else 1)