gitlab_api.py 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. import argparse
  2. import os
  3. import re
  4. import tarfile
  5. import tempfile
  6. import time
  7. import zipfile
  8. from functools import wraps
  9. import gitlab
  10. try:
  11. from typing import Any, Callable, Dict, List, Optional
  12. TR = Callable[..., Any]
  13. except ImportError:
  14. pass
  15. def retry(func): # type: (TR) -> TR
  16. """
  17. This wrapper will only catch several exception types associated with
  18. "network issues" and retry the whole function.
  19. """
  20. @wraps(func)
  21. def wrapper(self, *args, **kwargs): # type: (Gitlab, Any, Any) -> Any
  22. retried = 0
  23. while True:
  24. try:
  25. res = func(self, *args, **kwargs)
  26. except (IOError, EOFError, gitlab.exceptions.GitlabError) as e:
  27. if isinstance(e, gitlab.exceptions.GitlabError):
  28. if e.response_code == 500:
  29. # retry on this error
  30. pass
  31. elif e.response_code == 404 and os.environ.get('LOCAL_GITLAB_HTTPS_HOST', None):
  32. # remove the environment variable "LOCAL_GITLAB_HTTPS_HOST" and retry
  33. os.environ.pop('LOCAL_GITLAB_HTTPS_HOST', None)
  34. else:
  35. # other GitlabErrors aren't retried
  36. raise e
  37. retried += 1
  38. if retried > self.DOWNLOAD_ERROR_MAX_RETRIES:
  39. raise e # get out of the loop
  40. else:
  41. print('Network failure in {}, retrying ({})'.format(getattr(func, '__name__', '(unknown callable)'), retried))
  42. time.sleep(2 ** retried) # wait a bit more after each retry
  43. continue
  44. else:
  45. break
  46. return res
  47. return wrapper
  48. class Gitlab(object):
  49. JOB_NAME_PATTERN = re.compile(r"(\w+)(\s+(\d+)/(\d+))?")
  50. DOWNLOAD_ERROR_MAX_RETRIES = 3
  51. def __init__(self, project_id=None): # type: (Optional[int]) -> None
  52. config_data_from_env = os.getenv("PYTHON_GITLAB_CONFIG")
  53. if config_data_from_env:
  54. # prefer to load config from env variable
  55. with tempfile.NamedTemporaryFile('w', delete=False) as temp_file:
  56. temp_file.write(config_data_from_env)
  57. config_files = [temp_file.name] # type: Optional[List[str]]
  58. else:
  59. # otherwise try to use config file at local filesystem
  60. config_files = None
  61. self._init_gitlab_inst(project_id, config_files)
  62. @retry
  63. def _init_gitlab_inst(self, project_id, config_files): # type: (Optional[int], Optional[List[str]]) -> None
  64. gitlab_id = os.getenv('LOCAL_GITLAB_HTTPS_HOST') # if None, will use the default gitlab server
  65. self.gitlab_inst = gitlab.Gitlab.from_config(gitlab_id=gitlab_id, config_files=config_files)
  66. self.gitlab_inst.auth()
  67. if project_id:
  68. self.project = self.gitlab_inst.projects.get(project_id)
  69. else:
  70. self.project = None
  71. @retry
  72. def get_project_id(self, name, namespace=None): # type: (str, Optional[str]) -> int
  73. """
  74. search project ID by name
  75. :param name: project name
  76. :param namespace: namespace to match when we have multiple project with same name
  77. :return: project ID
  78. """
  79. projects = self.gitlab_inst.projects.list(search=name)
  80. res = []
  81. for project in projects:
  82. if namespace is None:
  83. if len(projects) == 1:
  84. res.append(project.id)
  85. break
  86. if project.namespace["path"] == namespace:
  87. if project.name == name:
  88. res.insert(0, project.id)
  89. else:
  90. res.append(project.id)
  91. if not res:
  92. raise ValueError("Can't find project")
  93. return int(res[0])
  94. @retry
  95. def download_artifacts(self, job_id, destination): # type (int, str) -> None
  96. """
  97. download full job artifacts and extract to destination.
  98. :param job_id: Gitlab CI job ID
  99. :param destination: extract artifacts to path.
  100. """
  101. job = self.project.jobs.get(job_id)
  102. with tempfile.NamedTemporaryFile(delete=False) as temp_file:
  103. job.artifacts(streamed=True, action=temp_file.write)
  104. with zipfile.ZipFile(temp_file.name, "r") as archive_file:
  105. archive_file.extractall(destination)
  106. @retry
  107. def download_artifact(self, job_id, artifact_path, destination=None): # type: (int, str, Optional[str]) -> List[bytes]
  108. """
  109. download specific path of job artifacts and extract to destination.
  110. :param job_id: Gitlab CI job ID
  111. :param artifact_path: list of path in artifacts (relative path to artifact root path)
  112. :param destination: destination of artifact. Do not save to file if destination is None
  113. :return: A list of artifact file raw data.
  114. """
  115. job = self.project.jobs.get(job_id)
  116. raw_data_list = []
  117. for a_path in artifact_path:
  118. try:
  119. data = job.artifact(a_path) # type: bytes
  120. except gitlab.GitlabGetError as e:
  121. print("Failed to download '{}' form job {}".format(a_path, job_id))
  122. raise e
  123. raw_data_list.append(data)
  124. if destination:
  125. file_path = os.path.join(destination, a_path)
  126. try:
  127. os.makedirs(os.path.dirname(file_path))
  128. except OSError:
  129. # already exists
  130. pass
  131. with open(file_path, "wb") as f:
  132. f.write(data)
  133. return raw_data_list
  134. @retry
  135. def find_job_id(self, job_name, pipeline_id=None, job_status='success'): # type: (str, Optional[str], str) -> List[Dict]
  136. """
  137. Get Job ID from job name of specific pipeline
  138. :param job_name: job name
  139. :param pipeline_id: If None, will get pipeline id from CI pre-defined variable.
  140. :param job_status: status of job. One pipeline could have multiple jobs with same name after retry.
  141. job_status is used to filter these jobs.
  142. :return: a list of job IDs (parallel job will generate multiple jobs)
  143. """
  144. job_id_list = []
  145. if pipeline_id is None:
  146. pipeline_id = os.getenv("CI_PIPELINE_ID")
  147. pipeline = self.project.pipelines.get(pipeline_id)
  148. jobs = pipeline.jobs.list(all=True)
  149. for job in jobs:
  150. match = self.JOB_NAME_PATTERN.match(job.name)
  151. if match:
  152. if match.group(1) == job_name and job.status == job_status:
  153. job_id_list.append({"id": job.id, "parallel_num": match.group(3)})
  154. return job_id_list
  155. @retry
  156. def download_archive(self, ref, destination, project_id=None): # type: (str, str, Optional[int]) -> str
  157. """
  158. Download archive of certain commit of a repository and extract to destination path
  159. :param ref: commit or branch name
  160. :param destination: destination path of extracted archive file
  161. :param project_id: download project of current instance if project_id is None
  162. :return: root path name of archive file
  163. """
  164. if project_id is None:
  165. project = self.project
  166. else:
  167. project = self.gitlab_inst.projects.get(project_id)
  168. with tempfile.NamedTemporaryFile(delete=False) as temp_file:
  169. try:
  170. project.repository_archive(sha=ref, streamed=True, action=temp_file.write)
  171. except gitlab.GitlabGetError as e:
  172. print("Failed to archive from project {}".format(project_id))
  173. raise e
  174. print("archive size: {:.03f}MB".format(float(os.path.getsize(temp_file.name)) / (1024 * 1024)))
  175. with tarfile.open(temp_file.name, "r") as archive_file:
  176. root_name = archive_file.getnames()[0]
  177. archive_file.extractall(destination)
  178. return os.path.join(os.path.realpath(destination), root_name)
  179. def main(): # type: () -> None
  180. parser = argparse.ArgumentParser()
  181. parser.add_argument("action")
  182. parser.add_argument("project_id", type=int)
  183. parser.add_argument("--pipeline_id", "-i", type=int, default=None)
  184. parser.add_argument("--ref", "-r", default="master")
  185. parser.add_argument("--job_id", "-j", type=int, default=None)
  186. parser.add_argument("--job_name", "-n", default=None)
  187. parser.add_argument("--project_name", "-m", default=None)
  188. parser.add_argument("--destination", "-d", default=None)
  189. parser.add_argument("--artifact_path", "-a", nargs="*", default=None)
  190. args = parser.parse_args()
  191. gitlab_inst = Gitlab(args.project_id)
  192. if args.action == "download_artifacts":
  193. gitlab_inst.download_artifacts(args.job_id, args.destination)
  194. if args.action == "download_artifact":
  195. gitlab_inst.download_artifact(args.job_id, args.artifact_path, args.destination)
  196. elif args.action == "find_job_id":
  197. job_ids = gitlab_inst.find_job_id(args.job_name, args.pipeline_id)
  198. print(";".join([",".join([str(j["id"]), j["parallel_num"]]) for j in job_ids]))
  199. elif args.action == "download_archive":
  200. gitlab_inst.download_archive(args.ref, args.destination)
  201. elif args.action == "get_project_id":
  202. ret = gitlab_inst.get_project_id(args.project_name)
  203. print("project id: {}".format(ret))
  204. if __name__ == '__main__':
  205. main()