ci_fetch_submodule.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. #!/usr/bin/env python
  2. # SPDX-FileCopyrightText: 2022 Espressif Systems (Shanghai) CO LTD
  3. # SPDX-License-Identifier: Apache-2.0
  4. # internal use only for CI
  5. # download archive of one commit instead of cloning entire submodule repo
  6. import argparse
  7. import os
  8. import re
  9. import shutil
  10. import subprocess
  11. import time
  12. from typing import Any, List
  13. import gitlab_api
  14. SUBMODULE_PATTERN = re.compile(r"\[submodule \"([^\"]+)\"]")
  15. PATH_PATTERN = re.compile(r'path\s+=\s+(\S+)')
  16. URL_PATTERN = re.compile(r'url\s+=\s+(\S+)')
  17. SUBMODULE_ARCHIVE_TEMP_FOLDER = 'submodule_archive'
  18. # need to match the one defined in CI yaml files for caching purpose
  19. SUBMODULE_ARCHIVE_CACHE_DIR = '.cache/submodule_archives'
  20. class SubModule(object):
  21. # We don't need to support recursive submodule clone now
  22. GIT_LS_TREE_OUTPUT_PATTERN = re.compile(r'\d+\s+commit\s+([0-9a-f]+)\s+')
  23. def __init__(self, gitlab_inst: gitlab_api.Gitlab, path: str, url: str) -> None:
  24. self.path = path
  25. self.url = url
  26. self.gitlab_inst = gitlab_inst
  27. self.project_id = self._get_project_id(url)
  28. self.commit_id = self._get_commit_id(path)
  29. def _get_commit_id(self, path: str) -> str:
  30. output = subprocess.check_output(['git', 'ls-tree', 'HEAD', path]).decode()
  31. # example output: 160000 commit d88a262fbdf35e5abb372280eb08008749c3faa0 components/esp_wifi/lib
  32. match = self.GIT_LS_TREE_OUTPUT_PATTERN.search(output)
  33. return match.group(1) if match is not None else ''
  34. def _get_project_id(self, url: str) -> Any:
  35. base_name = os.path.basename(url)
  36. project_id = self.gitlab_inst.get_project_id(os.path.splitext(base_name)[0], # remove .git
  37. namespace='espressif')
  38. return project_id
  39. def download_archive(self) -> None:
  40. print('Update submodule: {}: {}'.format(self.path, self.commit_id))
  41. path_name = self.gitlab_inst.download_archive(self.commit_id, SUBMODULE_ARCHIVE_TEMP_FOLDER,
  42. self.project_id, SUBMODULE_ARCHIVE_CACHE_DIR)
  43. renamed_path = os.path.join(os.path.dirname(path_name), os.path.basename(self.path))
  44. os.rename(path_name, renamed_path)
  45. shutil.rmtree(self.path, ignore_errors=True)
  46. shutil.move(renamed_path, os.path.dirname(self.path))
  47. def update_submodule(git_module_file: str, submodules_to_update: List) -> None:
  48. gitlab_inst = gitlab_api.Gitlab()
  49. submodules = []
  50. with open(git_module_file, 'r') as f:
  51. data = f.read()
  52. match = SUBMODULE_PATTERN.search(data)
  53. if match is not None:
  54. while True:
  55. next_match = SUBMODULE_PATTERN.search(data, pos=match.end())
  56. if next_match:
  57. end_pos = next_match.start()
  58. else:
  59. end_pos = len(data)
  60. path_match = PATH_PATTERN.search(data, pos=match.end(), endpos=end_pos)
  61. url_match = URL_PATTERN.search(data, pos=match.end(), endpos=end_pos)
  62. path = path_match.group(1) if path_match is not None else ''
  63. url = url_match.group(1) if url_match is not None else ''
  64. filter_result = True
  65. if submodules_to_update:
  66. if path not in submodules_to_update:
  67. filter_result = False
  68. if filter_result:
  69. submodules.append(SubModule(gitlab_inst, path, url))
  70. match = next_match
  71. if not match:
  72. break
  73. shutil.rmtree(SUBMODULE_ARCHIVE_TEMP_FOLDER, ignore_errors=True)
  74. for submodule in submodules:
  75. submodule.download_archive()
  76. if __name__ == '__main__':
  77. start_time = time.time()
  78. parser = argparse.ArgumentParser()
  79. parser.add_argument('--repo_path', '-p', default='.', help='repo path')
  80. parser.add_argument('--submodule', '-s', default='all',
  81. help='Submodules to update. By default update all submodules. '
  82. 'For multiple submodules, separate them with `;`. '
  83. '`all` and `none` are special values that indicates we fetch all / none submodules')
  84. args = parser.parse_args()
  85. if args.submodule == 'none':
  86. print("don't need to update submodules")
  87. exit(0)
  88. if args.submodule == 'all':
  89. _submodules = []
  90. else:
  91. _submodules = args.submodule.split(';')
  92. update_submodule(os.path.join(args.repo_path, '.gitmodules'), _submodules)
  93. print('total time spent on update submodule: {:.02f}s'.format(time.time() - start_time))