check_soc_struct_headers.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. #!/usr/bin/env python
  2. # SPDX-FileCopyrightText: 2022 Espressif Systems (Shanghai) CO LTD
  3. # SPDX-License-Identifier: Apache-2.0
  4. # A check script that just works at the time of writing...
  5. #
  6. # also builds a structure tree for further reference
  7. #
  8. # Input file format must be similiar to those headers generated by regtool, or this script makes no sense at all
  9. #
  10. # Known limitation:
  11. # 1. won't accept /* ... */ /* ... */': badly behavior with multiline comment
  12. # 2. won't accept multiple expression within same line' (but will info that)
  13. # 3. won't accept single line struct/union definition
  14. #
  15. # Check list:
  16. # 1. a structure should not contain bitfield member alongside with nested struct/union
  17. # 2. bitfield sum in a struct should be 32 (means being well padded)
  18. # 3. each bitfield type should be uint32_t
  19. # 4. expecting union to be `union { struct {xxx}; uint32_t val; }` and complain if it is not an u32 val (but not fail)
  20. # 5. typedef volatile struct xxx{}: xxx must exists
  21. #
  22. # Otherwise won't fail but warning
  23. import os
  24. import re
  25. import sys
  26. from typing import Any
  27. class MemberField:
  28. member_type = ''
  29. bitfield = None
  30. def __init__(self, m_type: str, m_bits: int=None) -> None:
  31. self.member_type = m_type
  32. self.bitfield = m_bits
  33. def __unicode__(self) -> str:
  34. return self.__str__()
  35. def __repr__(self) -> str:
  36. return self.__str__()
  37. def __str__(self) -> str:
  38. if self.bitfield is None:
  39. return '"Field type={}"'.format(self.member_type)
  40. return '"Field type={} bit={}"'.format(self.member_type, self.bitfield)
  41. class SoCStructureHeaderChecker:
  42. # capture: typedef, volatile, struct name
  43. __REGEXP_MATCH_STRUCTURE_BEGIN = r'^[\s]*(typedef)?(?:[\s]+(volatile))?[\s]+struct[\s]+([\w]+)?[\s\S]*$'
  44. # capture: typedef, volatile, union name
  45. __REGEXP_MATCH_UNION_BEGIN = r'^[\s]*(typedef)?(?:[\s]+(volatile))?[\s]+union[\s]+([\w]+)?[\s\S]*$'
  46. # capture: type_var_name
  47. __REGEXP_MATCH_STRUCT_UNION_END_NAME = r'^[\s]*}[\s]*([\w\[\]\*]*)[\s]*;[\s\S]*$'
  48. # capture: type, name, bitfield
  49. __REGEXP_MATCH_BITFIELD_MEMBER = (r'^[\s]*(?:(?:volatile[\s]+)|(?:))([\w\*]+)[\s]+([\w\*]+(?:(?:\[[\s\S]*\])|(?:)))'
  50. r'[\s]*(?:(?:[\s]*;)|(?::[\s]*([\d]+)[\s]*;))[\s\S]*$')
  51. # should be useless and can be safely deleted
  52. __REGEXP_MATCH_MULTILINE_COMMENT = r'^[\s]*[\/]{0,2}\*[\/]?[\s\S]*$'
  53. __REGEX_MATCH_SIMPLE_VAL_FIELD = r'^[\s]*(?:(?:volatile[\s]+)|(?:))([\w]+)[\s]+([\w\[\]\*]+)[\s]*;[\s]*$'
  54. # capture: type, name
  55. __REGEX_MATCH_ROOT_EXTERNAL = r'^[\s]*extern[\s]+([\w]+)[\s]+([\w]+)[\s]*;[\s]*$'
  56. __linecount = 0
  57. __fd = None # type: Any
  58. __is_eof = False
  59. # generated reference tree
  60. __ref_tree = dict() # type: dict
  61. # middle result of generated tree, shared
  62. # named typedef, or named struct/union. referd but will not delete
  63. __temp_ref_types = dict() # type: dict
  64. def __expand_type(self, member_type: str, bitfield: int=None) -> Any:
  65. if member_type == 'uint32_t':
  66. return MemberField(member_type, bitfield)
  67. if bitfield is not None:
  68. print('\033[0;31mERROR\033[0m: non-u32 type with bitfield')
  69. return None
  70. if member_type in self.__temp_ref_types:
  71. return self.__temp_ref_types[member_type]
  72. return None
  73. def __getline(self, incomment:bool=False) -> Any:
  74. rawline = self.__fd.readline()
  75. if not rawline:
  76. self.__is_eof = True
  77. return None
  78. self.__linecount += 1
  79. if incomment:
  80. pos = rawline.find('*/')
  81. if pos != -1:
  82. # set string that is behind comment
  83. rawline = rawline[pos + 2:]
  84. else:
  85. # continue multiple line
  86. return self.__getline(True)
  87. # preprocess: remove '// comment'
  88. match_obj = re.match(r'^([^(\/\/)]*)\/\/[\s\S]*$', rawline)
  89. if match_obj is not None:
  90. rawline = match_obj.groups()[0]
  91. # preprocess: remove '/* comment'
  92. match_obj = re.match(r'^([^(\/\*)]*)\/\*([\s\S]*)$', rawline)
  93. if match_obj is not None:
  94. rawline = match_obj.groups()[0]
  95. # check if multiline commit in oneline
  96. pos = match_obj.groups()[1].find('*/')
  97. if pos != -1:
  98. # apply string that is behind comment
  99. rawline = rawline + match_obj.groups()[1][pos + 2:]
  100. else:
  101. # multiple line
  102. return self.__getline(True)
  103. if re.match(r'^[\s]*$', rawline):
  104. # skip empty line
  105. return self.__getline()
  106. if rawline.count(';') > 1:
  107. print('\033[0;34mINFO\033[0m: line: {}: possibily multiple expression within same line'.format(self.__linecount))
  108. print(rawline)
  109. return rawline
  110. def __process_structure(self, name: str, is_typedef: bool, is_volatile: bool) -> Any:
  111. ret_val = 0
  112. # first check for anonymous register structs
  113. if is_typedef and is_volatile and name is None:
  114. print('\033[0;31mERROR\033[0m: line {}: annoymous struct'.format(self.__linecount))
  115. ret_val = -1
  116. node_tree = dict()
  117. bitcount = 0
  118. has_nested_struct_union = False
  119. has_non_bitfield_member = False
  120. parsed_varname = ''
  121. while not self.__is_eof:
  122. rawline = self.__getline()
  123. if rawline is None:
  124. break
  125. # check for nested structure
  126. match_obj = re.match(self.__REGEXP_MATCH_STRUCTURE_BEGIN, rawline)
  127. if match_obj is not None:
  128. has_nested_struct_union = True
  129. ret, inherited_node_tree = self.__process_structure(
  130. match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile')
  131. if ret != 0:
  132. ret_val = -2
  133. if inherited_node_tree is not None:
  134. for node in inherited_node_tree:
  135. node_tree[node] = inherited_node_tree[node]
  136. continue
  137. match_obj = re.match(self.__REGEXP_MATCH_UNION_BEGIN, rawline)
  138. if match_obj is not None:
  139. has_nested_struct_union = True
  140. ret, inherited_node_tree = self.__process_union(match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile')
  141. if ret != 0:
  142. ret_val = -2
  143. if inherited_node_tree is not None:
  144. for node in inherited_node_tree:
  145. node_tree[node] = inherited_node_tree[node]
  146. continue
  147. # check if end of struct
  148. match_obj = re.match(self.__REGEXP_MATCH_STRUCT_UNION_END_NAME, rawline)
  149. if match_obj is not None:
  150. # end of struct
  151. if bitcount not in (0, 32):
  152. ret_val = -2
  153. if is_typedef:
  154. print('\033[0;31mERROR\033[0m: line {}: bitfield count is {}, type {}'.format(self.__linecount, bitcount, match_obj.groups()[0]))
  155. else:
  156. print('\033[0;31mERROR\033[0m: line {}: bitfield count is {}, type {}, varname "{}"'
  157. .format(self.__linecount, bitcount, name, match_obj.groups()[0]))
  158. parsed_varname = match_obj.groups()[0]
  159. if is_typedef:
  160. # is a typedef
  161. if match_obj.groups()[0] == '' or match_obj.groups()[0].find('[') != -1:
  162. # should be c error
  163. print('\033[0;31mERROR\033[0m: line {}: C error'.format(self.__linecount))
  164. ret_val = -3
  165. if match_obj.groups()[0] in self.__temp_ref_types:
  166. # duplication, script bug: we are putting all types into same namespace
  167. print('script run into bug...')
  168. self.__temp_ref_types[match_obj.groups()[0]] = dict()
  169. for member in node_tree:
  170. self.__temp_ref_types[match_obj.groups()[0]][member] = node_tree[member]
  171. elif name is not None:
  172. # currently this kind of expression doesn't exist
  173. print('!!!!!!UNDEALED CONDITION!!!!!')
  174. elif match_obj.groups()[0] != '':
  175. # named member, wrap and overwrite
  176. if len(node_tree) == 0:
  177. node_tree = None
  178. else:
  179. array_match = re.match(r'^([\w]*)\[[\s\S]*\]$', match_obj.groups()[0])
  180. if array_match is not None:
  181. node_tree = {array_match.groups()[0] + '[]': node_tree}
  182. else:
  183. node_tree = {match_obj.groups()[0]: node_tree}
  184. else:
  185. # not a type, no member name, treat its fields as its parent's
  186. pass
  187. break
  188. # check member
  189. match_obj = re.match(self.__REGEXP_MATCH_BITFIELD_MEMBER, rawline)
  190. if match_obj is not None:
  191. field_bit = None
  192. if match_obj.groups()[2] is not None:
  193. field_bit = int(match_obj.groups()[2])
  194. bitcount += field_bit
  195. # bitfield should be u32
  196. if match_obj.groups()[0] != 'uint32_t':
  197. print('\033[0;33mWARN\033[0m: line: {}: {} has type {}'.format(self.__linecount, match_obj.groups()[1], match_obj.groups()[0]))
  198. else:
  199. has_non_bitfield_member = True
  200. # append to node tree
  201. member_node = self.__expand_type(match_obj.groups()[0], field_bit)
  202. if member_node is not None:
  203. array_match = re.match(r'^([\w]*)\[[\s\S]*\]$', match_obj.groups()[1])
  204. if array_match is not None:
  205. node_tree[array_match.groups()[0] + '[]'] = member_node
  206. else:
  207. node_tree[match_obj.groups()[1]] = member_node
  208. else:
  209. if '*' not in match_obj.groups()[0]:
  210. print('\033[0;33mWARN\033[0m: line {}: unknown type {}'.format(self.__linecount, match_obj.groups()[0]))
  211. else:
  212. print('\033[0;33mWARN\033[0m: line {}: pointer type {}'.format(self.__linecount, match_obj.groups()[0]))
  213. continue
  214. # check comments
  215. match_obj = re.match(self.__REGEXP_MATCH_MULTILINE_COMMENT, rawline)
  216. if match_obj is not None:
  217. # code comments
  218. continue
  219. # dump out unmatched condition
  220. print(('\033[0;33mWARN\033[0m: line: {}: unexpected expression: {}'.format(self.__linecount, rawline)).replace('\n', ''))
  221. if bitcount != 0 and has_nested_struct_union:
  222. print('\033[0;33mWARN\033[0m: line: {}: mixed bitfield member and nested structure/union'.format(self.__linecount))
  223. if bitcount != 0 and has_non_bitfield_member:
  224. print('\033[0;33mWARN\033[0m: line: {}: mixed bitfield member and non-bitfield member'.format(self.__linecount))
  225. if is_typedef and is_volatile and name is None:
  226. if parsed_varname != '':
  227. print('SUGGEST: {}'.format(parsed_varname.rstrip('t') + 's'))
  228. if name is not None and is_typedef and is_volatile and parsed_varname.rstrip('t') != name.rstrip('s'):
  229. print('\033[0;33mWARN\033[0m: line: {}: different type and typedef name: {} {}'.format(self.__linecount, name, parsed_varname))
  230. return ret_val, node_tree
  231. def __process_union(self, name: str, is_typedef: bool, is_volatile: bool) -> Any:
  232. ret_val = 0
  233. # first check for anonymous register structs
  234. if is_typedef and is_volatile and name is None:
  235. print('\033[0;31mERROR\033[0m: line {}: annoymous union'.format(self.__linecount))
  236. ret_val = -1
  237. node_tree = dict() # type: Any
  238. has_struct_count = 0
  239. has_val_field_count = 0
  240. while not self.__is_eof:
  241. rawline = self.__getline()
  242. if rawline is None:
  243. break
  244. # check for nested structure
  245. match_obj = re.match(self.__REGEXP_MATCH_STRUCTURE_BEGIN, rawline)
  246. if match_obj is not None:
  247. has_struct_count += 1
  248. ret, inherited_node_tree = self.__process_structure(
  249. match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile')
  250. if ret != 0:
  251. ret_val = -2
  252. if inherited_node_tree is not None:
  253. for node in inherited_node_tree:
  254. node_tree[node] = inherited_node_tree[node]
  255. continue
  256. match_obj = re.match(self.__REGEXP_MATCH_UNION_BEGIN, rawline)
  257. if match_obj is not None:
  258. has_struct_count += 1
  259. ret, inherited_node_tree = self.__process_union(match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile')
  260. if ret != 0:
  261. ret_val = -2
  262. if inherited_node_tree is not None:
  263. for node in inherited_node_tree:
  264. node_tree[node] = inherited_node_tree[node]
  265. continue
  266. match_obj = re.match(self.__REGEXP_MATCH_STRUCT_UNION_END_NAME, rawline)
  267. if match_obj is not None:
  268. parsed_varname = match_obj.groups()[0]
  269. # end of struct
  270. if is_typedef:
  271. # is a typedef
  272. if match_obj.groups()[0] == '':
  273. # should be c error
  274. print('\033[0;31mERROR\033[0m: line {}: C error'.format(self.__linecount))
  275. ret_val = -3
  276. if match_obj.groups()[0] in self.__temp_ref_types:
  277. # duplication, script bug: we are putting all types into same namespace
  278. print('script run into bug...')
  279. self.__temp_ref_types[match_obj.groups()[0]] = dict()
  280. for member in node_tree:
  281. self.__temp_ref_types[match_obj.groups()[0]][member] = node_tree[member]
  282. node_tree = None
  283. elif name is not None:
  284. # currently this kind of expression doesn't exist
  285. print('!!!!!!UNDEALED CONDITION!!!!!')
  286. elif match_obj.groups()[0] != '':
  287. # named member, wrap and overwrite
  288. if len(node_tree) == 0:
  289. node_tree = None
  290. else:
  291. array_match = re.match(r'^([\w]*)\[[\s\S]*\]$', match_obj.groups()[0])
  292. if array_match is not None:
  293. node_tree = {array_match.groups()[0] + '[]': node_tree}
  294. else:
  295. node_tree = {match_obj.groups()[0]: node_tree}
  296. else:
  297. # not a type, no member name, treat its fields as its parent's
  298. pass
  299. break
  300. match_obj = re.match(self.__REGEXP_MATCH_MULTILINE_COMMENT, rawline)
  301. if match_obj is not None:
  302. # code comments
  303. continue
  304. match_obj = re.match(self.__REGEX_MATCH_SIMPLE_VAL_FIELD, rawline)
  305. if match_obj is not None:
  306. # expecting to see 'uint32_t val;'
  307. if match_obj.groups()[0] != 'uint32_t' or match_obj.groups()[1] != 'val':
  308. print(('\033[0;33mWARN\033[0m: unexpected union member at {}: {}'.format(self.__linecount, rawline)).replace('\n', ''))
  309. else:
  310. has_val_field_count += 1
  311. # append to node tree
  312. member_node = self.__expand_type(match_obj.groups()[0], None)
  313. if member_node is not None:
  314. node_tree[match_obj.groups()[1]] = member_node
  315. else:
  316. if '*' not in match_obj.groups()[0]:
  317. print('\033[0;31mERROR\033[0m: line {}: unknown type {}'.format(self.__linecount, match_obj.groups()[0]))
  318. else:
  319. print('\033[0;33mWARN\033[0m: line {}: pointer type {}'.format(self.__linecount, match_obj.groups()[0]))
  320. continue
  321. # dump out unmatched condition
  322. print(('\033[0;33mWARN\033[0m: line: {}: unexpected expression: {}'.format(self.__linecount, rawline)).replace('\n', ''))
  323. if not (has_struct_count == 1 and has_val_field_count == 1):
  324. print('\033[0;34mINFO\033[0m: line: {}: not a typical union: {} nested structures, {} u32 val member'
  325. .format(self.__linecount, has_struct_count, has_val_field_count))
  326. if is_typedef and is_volatile and name is None:
  327. if parsed_varname != '':
  328. print('SUGGEST: {}'.format(parsed_varname.rstrip('t') + 's'))
  329. if name is not None and is_typedef and is_volatile and parsed_varname.rstrip('t') != name.rstrip('s'):
  330. print('\033[0;33mWARN\033[0m: line: {}: different type and typedef name: {} {}'.format(self.__linecount, name, parsed_varname))
  331. return ret_val, node_tree
  332. def __process_root(self) -> int:
  333. ret_val = 0
  334. node_tree = dict()
  335. while not self.__is_eof:
  336. rawline = self.__getline()
  337. if rawline is None:
  338. break
  339. # start checking by finding any of structure or union
  340. match_obj = re.match(self.__REGEXP_MATCH_STRUCTURE_BEGIN, rawline)
  341. if match_obj is not None:
  342. ret, inherited_node_tree = self.__process_structure(
  343. match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile')
  344. if ret != 0:
  345. ret_val = -2
  346. if inherited_node_tree is not None:
  347. for node in inherited_node_tree:
  348. node_tree[node] = inherited_node_tree[node]
  349. continue
  350. match_obj = re.match(self.__REGEXP_MATCH_UNION_BEGIN, rawline)
  351. if match_obj is not None:
  352. ret, inherited_node_tree = self.__process_union(match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile')
  353. if ret != 0:
  354. ret_val = -2
  355. if inherited_node_tree is not None:
  356. for node in inherited_node_tree:
  357. node_tree[node] = inherited_node_tree[node]
  358. continue
  359. # processing root level external declaration
  360. match_obj = re.match(self.__REGEX_MATCH_ROOT_EXTERNAL, rawline)
  361. if match_obj is not None:
  362. self.__ref_tree[match_obj.groups()[1]] = self.__expand_type(match_obj.groups()[0])
  363. continue
  364. return ret_val
  365. def check(self, file: str) -> int:
  366. self.__fd = open(file, 'r', encoding='utf8')
  367. self.__linecount = 0
  368. self.__is_eof = False
  369. ret_val = self.__process_root()
  370. self.__fd.close()
  371. if ret_val != 0:
  372. print('\033[0;31mCHECK FAILED\033[0m:\t{}'.format(file))
  373. else:
  374. print('\033[0;32mCHECK PASSED\033[0m:\t{}'.format(file))
  375. return ret_val
  376. def get_ref_tree(self) -> Any:
  377. return self.__ref_tree
  378. def main() -> None:
  379. ret = 0
  380. if len(sys.argv) <= 1 or not os.path.isfile(sys.argv[1]):
  381. print('file not exist')
  382. exit(-1)
  383. checker = SoCStructureHeaderChecker()
  384. print('CHECKING:\t{}'.format(sys.argv[1]))
  385. ret = checker.check(sys.argv[1])
  386. if len(sys.argv) == 3 and sys.argv[2] == 'print':
  387. print(checker.get_ref_tree())
  388. del checker
  389. sys.exit(ret)
  390. if __name__ == '__main__':
  391. main()