check_soc_struct_headers.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
  1. #!/usr/bin/env python
  2. # A check script that just works at the time of writing...
  3. #
  4. # also builds a structure tree for further reference
  5. #
  6. # Input file format must be similiar to those headers generated by regtool, or this script makes no sense at all
  7. #
  8. # Known limitation:
  9. # 1. won't accept /* ... */ /* ... */': badly behavior with multiline comment
  10. # 2. won't accept multiple expression within same line' (but will info that)
  11. # 3. won't accept single line struct/union definition
  12. #
  13. # Check list:
  14. # 1. a structure should not contain bitfield member alongside with nested struct/union
  15. # 2. bitfield sum in a struct should be 32 (means being well padded)
  16. # 3. each bitfield type should be uint32_t
  17. # 4. expecting union to be `union { struct {xxx}; uint32_t val; }` and complain if it is not an u32 val (but not fail)
  18. # 5. typedef volatile struct xxx{}: xxx must exists
  19. #
  20. # Otherwise won't fail but warning
  21. import os
  22. import re
  23. import sys
  24. from typing import Any
  25. class MemberField:
  26. member_type = ''
  27. bitfield = None
  28. def __init__(self, m_type: str, m_bits: int=None) -> None:
  29. self.member_type = m_type
  30. self.bitfield = m_bits
  31. def __unicode__(self) -> str:
  32. return self.__str__()
  33. def __repr__(self) -> str:
  34. return self.__str__()
  35. def __str__(self) -> str:
  36. if self.bitfield is None:
  37. return '"Field type={}"'.format(self.member_type)
  38. return '"Field type={} bit={}"'.format(self.member_type, self.bitfield)
  39. class SoCStructureHeaderChecker:
  40. # capture: typedef, volatile, struct name
  41. __REGEXP_MATCH_STRUCTURE_BEGIN = r'^[\s]*(typedef)?(?:[\s]+(volatile))?[\s]+struct[\s]+([\w]+)?[\s\S]*$'
  42. # capture: typedef, volatile, union name
  43. __REGEXP_MATCH_UNION_BEGIN = r'^[\s]*(typedef)?(?:[\s]+(volatile))?[\s]+union[\s]+([\w]+)?[\s\S]*$'
  44. # capture: type_var_name
  45. __REGEXP_MATCH_STRUCT_UNION_END_NAME = r'^[\s]*}[\s]*([\w\[\]\*]*)[\s]*;[\s\S]*$'
  46. # capture: type, name, bitfield
  47. __REGEXP_MATCH_BITFIELD_MEMBER = (r'^[\s]*(?:(?:volatile[\s]+)|(?:))([\w\*]+)[\s]+([\w\*]+(?:(?:\[[\s\S]*\])|(?:)))'
  48. r'[\s]*(?:(?:[\s]*;)|(?::[\s]*([\d]+)[\s]*;))[\s\S]*$')
  49. # should be useless and can be safely deleted
  50. __REGEXP_MATCH_MULTILINE_COMMENT = r'^[\s]*[\/]{0,2}\*[\/]?[\s\S]*$'
  51. __REGEX_MATCH_SIMPLE_VAL_FIELD = r'^[\s]*(?:(?:volatile[\s]+)|(?:))([\w]+)[\s]+([\w\[\]\*]+)[\s]*;[\s]*$'
  52. # capture: type, name
  53. __REGEX_MATCH_ROOT_EXTERNAL = r'^[\s]*extern[\s]+([\w]+)[\s]+([\w]+)[\s]*;[\s]*$'
  54. __linecount = 0
  55. __fd = None # type: Any
  56. __is_eof = False
  57. # generated reference tree
  58. __ref_tree = dict() # type: dict
  59. # middle result of generated tree, shared
  60. # named typedef, or named struct/union. referd but will not delete
  61. __temp_ref_types = dict() # type: dict
  62. def __expand_type(self, member_type: str, bitfield: int=None) -> Any:
  63. if member_type == 'uint32_t':
  64. return MemberField(member_type, bitfield)
  65. if bitfield is not None:
  66. print('\033[0;31mERROR\033[0m: non-u32 type with bitfield')
  67. return None
  68. if member_type in self.__temp_ref_types:
  69. return self.__temp_ref_types[member_type]
  70. return None
  71. def __getline(self, incomment:bool=False) -> Any:
  72. rawline = self.__fd.readline()
  73. if not rawline:
  74. self.__is_eof = True
  75. return None
  76. self.__linecount += 1
  77. if incomment:
  78. pos = rawline.find('*/')
  79. if pos != -1:
  80. # set string that is behind comment
  81. rawline = rawline[pos + 2:]
  82. else:
  83. # continue multiple line
  84. return self.__getline(True)
  85. # preprocess: remove '// comment'
  86. match_obj = re.match(r'^([^(\/\/)]*)\/\/[\s\S]*$', rawline)
  87. if match_obj is not None:
  88. rawline = match_obj.groups()[0]
  89. # preprocess: remove '/* comment'
  90. match_obj = re.match(r'^([^(\/\*)]*)\/\*([\s\S]*)$', rawline)
  91. if match_obj is not None:
  92. rawline = match_obj.groups()[0]
  93. # check if multiline commit in oneline
  94. pos = match_obj.groups()[1].find('*/')
  95. if pos != -1:
  96. # apply string that is behind comment
  97. rawline = rawline + match_obj.groups()[1][pos + 2:]
  98. else:
  99. # multiple line
  100. return self.__getline(True)
  101. if re.match(r'^[\s]*$', rawline):
  102. # skip empty line
  103. return self.__getline()
  104. if rawline.count(';') > 1:
  105. print('\033[0;34mINFO\033[0m: line: {}: possibily multiple expression within same line'.format(self.__linecount))
  106. print(rawline)
  107. return rawline
  108. def __process_structure(self, name: str, is_typedef: bool, is_volatile: bool) -> Any:
  109. ret_val = 0
  110. # first check for anonymous register structs
  111. if is_typedef and is_volatile and name is None:
  112. print('\033[0;31mERROR\033[0m: line {}: annoymous struct'.format(self.__linecount))
  113. ret_val = -1
  114. node_tree = dict()
  115. bitcount = 0
  116. has_nested_struct_union = False
  117. has_non_bitfield_member = False
  118. parsed_varname = ''
  119. while not self.__is_eof:
  120. rawline = self.__getline()
  121. if rawline is None:
  122. break
  123. # check for nested structure
  124. match_obj = re.match(self.__REGEXP_MATCH_STRUCTURE_BEGIN, rawline)
  125. if match_obj is not None:
  126. has_nested_struct_union = True
  127. ret, inherited_node_tree = self.__process_structure(
  128. match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile')
  129. if ret != 0:
  130. ret_val = -2
  131. if inherited_node_tree is not None:
  132. for node in inherited_node_tree:
  133. node_tree[node] = inherited_node_tree[node]
  134. continue
  135. match_obj = re.match(self.__REGEXP_MATCH_UNION_BEGIN, rawline)
  136. if match_obj is not None:
  137. has_nested_struct_union = True
  138. ret, inherited_node_tree = self.__process_union(match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile')
  139. if ret != 0:
  140. ret_val = -2
  141. if inherited_node_tree is not None:
  142. for node in inherited_node_tree:
  143. node_tree[node] = inherited_node_tree[node]
  144. continue
  145. # check if end of struct
  146. match_obj = re.match(self.__REGEXP_MATCH_STRUCT_UNION_END_NAME, rawline)
  147. if match_obj is not None:
  148. # end of struct
  149. if bitcount not in (0, 32):
  150. ret_val = -2
  151. if is_typedef:
  152. print('\033[0;31mERROR\033[0m: line {}: bitfield count is {}, type {}'.format(self.__linecount, bitcount, match_obj.groups()[0]))
  153. else:
  154. print('\033[0;31mERROR\033[0m: line {}: bitfield count is {}, type {}, varname "{}"'
  155. .format(self.__linecount, bitcount, name, match_obj.groups()[0]))
  156. parsed_varname = match_obj.groups()[0]
  157. if is_typedef:
  158. # is a typedef
  159. if match_obj.groups()[0] == '' or match_obj.groups()[0].find('[') != -1:
  160. # should be c error
  161. print('\033[0;31mERROR\033[0m: line {}: C error'.format(self.__linecount))
  162. ret_val = -3
  163. if match_obj.groups()[0] in self.__temp_ref_types:
  164. # duplication, script bug: we are putting all types into same namespace
  165. print('script run into bug...')
  166. self.__temp_ref_types[match_obj.groups()[0]] = dict()
  167. for member in node_tree:
  168. self.__temp_ref_types[match_obj.groups()[0]][member] = node_tree[member]
  169. elif name is not None:
  170. # currently this kind of expression doesn't exist
  171. print('!!!!!!UNDEALED CONDITION!!!!!')
  172. elif match_obj.groups()[0] != '':
  173. # named member, wrap and overwrite
  174. if len(node_tree) == 0:
  175. node_tree = None
  176. else:
  177. array_match = re.match(r'^([\w]*)\[[\s\S]*\]$', match_obj.groups()[0])
  178. if array_match is not None:
  179. node_tree = {array_match.groups()[0] + '[]': node_tree}
  180. else:
  181. node_tree = {match_obj.groups()[0]: node_tree}
  182. else:
  183. # not a type, no member name, treat its fields as its parent's
  184. pass
  185. break
  186. # check member
  187. match_obj = re.match(self.__REGEXP_MATCH_BITFIELD_MEMBER, rawline)
  188. if match_obj is not None:
  189. field_bit = None
  190. if match_obj.groups()[2] is not None:
  191. field_bit = int(match_obj.groups()[2])
  192. bitcount += field_bit
  193. # bitfield should be u32
  194. if match_obj.groups()[0] != 'uint32_t':
  195. print('\033[0;33mWARN\033[0m: line: {}: {} has type {}'.format(self.__linecount, match_obj.groups()[1], match_obj.groups()[0]))
  196. else:
  197. has_non_bitfield_member = True
  198. # append to node tree
  199. member_node = self.__expand_type(match_obj.groups()[0], field_bit)
  200. if member_node is not None:
  201. array_match = re.match(r'^([\w]*)\[[\s\S]*\]$', match_obj.groups()[1])
  202. if array_match is not None:
  203. node_tree[array_match.groups()[0] + '[]'] = member_node
  204. else:
  205. node_tree[match_obj.groups()[1]] = member_node
  206. else:
  207. if '*' not in match_obj.groups()[0]:
  208. print('\033[0;33mWARN\033[0m: line {}: unknown type {}'.format(self.__linecount, match_obj.groups()[0]))
  209. else:
  210. print('\033[0;33mWARN\033[0m: line {}: pointer type {}'.format(self.__linecount, match_obj.groups()[0]))
  211. continue
  212. # check comments
  213. match_obj = re.match(self.__REGEXP_MATCH_MULTILINE_COMMENT, rawline)
  214. if match_obj is not None:
  215. # code comments
  216. continue
  217. # dump out unmatched condition
  218. print(('\033[0;33mWARN\033[0m: line: {}: unexpected expression: {}'.format(self.__linecount, rawline)).replace('\n', ''))
  219. if bitcount != 0 and has_nested_struct_union:
  220. print('\033[0;33mWARN\033[0m: line: {}: mixed bitfield member and nested structure/union'.format(self.__linecount))
  221. if bitcount != 0 and has_non_bitfield_member:
  222. print('\033[0;33mWARN\033[0m: line: {}: mixed bitfield member and non-bitfield member'.format(self.__linecount))
  223. if is_typedef and is_volatile and name is None:
  224. if parsed_varname != '':
  225. print('SUGGEST: {}'.format(parsed_varname.rstrip('t') + 's'))
  226. if name is not None and is_typedef and is_volatile and parsed_varname.rstrip('t') != name.rstrip('s'):
  227. print('\033[0;33mWARN\033[0m: line: {}: different type and typedef name: {} {}'.format(self.__linecount, name, parsed_varname))
  228. return ret_val, node_tree
  229. def __process_union(self, name: str, is_typedef: bool, is_volatile: bool) -> Any:
  230. ret_val = 0
  231. # first check for anonymous register structs
  232. if is_typedef and is_volatile and name is None:
  233. print('\033[0;31mERROR\033[0m: line {}: annoymous union'.format(self.__linecount))
  234. ret_val = -1
  235. node_tree = dict() # type: Any
  236. has_struct_count = 0
  237. has_val_field_count = 0
  238. while not self.__is_eof:
  239. rawline = self.__getline()
  240. if rawline is None:
  241. break
  242. # check for nested structure
  243. match_obj = re.match(self.__REGEXP_MATCH_STRUCTURE_BEGIN, rawline)
  244. if match_obj is not None:
  245. has_struct_count += 1
  246. ret, inherited_node_tree = self.__process_structure(
  247. match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile')
  248. if ret != 0:
  249. ret_val = -2
  250. if inherited_node_tree is not None:
  251. for node in inherited_node_tree:
  252. node_tree[node] = inherited_node_tree[node]
  253. continue
  254. match_obj = re.match(self.__REGEXP_MATCH_UNION_BEGIN, rawline)
  255. if match_obj is not None:
  256. has_struct_count += 1
  257. ret, inherited_node_tree = self.__process_union(match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile')
  258. if ret != 0:
  259. ret_val = -2
  260. if inherited_node_tree is not None:
  261. for node in inherited_node_tree:
  262. node_tree[node] = inherited_node_tree[node]
  263. continue
  264. match_obj = re.match(self.__REGEXP_MATCH_STRUCT_UNION_END_NAME, rawline)
  265. if match_obj is not None:
  266. parsed_varname = match_obj.groups()[0]
  267. # end of struct
  268. if is_typedef:
  269. # is a typedef
  270. if match_obj.groups()[0] == '':
  271. # should be c error
  272. print('\033[0;31mERROR\033[0m: line {}: C error'.format(self.__linecount))
  273. ret_val = -3
  274. if match_obj.groups()[0] in self.__temp_ref_types:
  275. # duplication, script bug: we are putting all types into same namespace
  276. print('script run into bug...')
  277. self.__temp_ref_types[match_obj.groups()[0]] = dict()
  278. for member in node_tree:
  279. self.__temp_ref_types[match_obj.groups()[0]][member] = node_tree[member]
  280. node_tree = None
  281. elif name is not None:
  282. # currently this kind of expression doesn't exist
  283. print('!!!!!!UNDEALED CONDITION!!!!!')
  284. elif match_obj.groups()[0] != '':
  285. # named member, wrap and overwrite
  286. if len(node_tree) == 0:
  287. node_tree = None
  288. else:
  289. array_match = re.match(r'^([\w]*)\[[\s\S]*\]$', match_obj.groups()[0])
  290. if array_match is not None:
  291. node_tree = {array_match.groups()[0] + '[]': node_tree}
  292. else:
  293. node_tree = {match_obj.groups()[0]: node_tree}
  294. else:
  295. # not a type, no member name, treat its fields as its parent's
  296. pass
  297. break
  298. match_obj = re.match(self.__REGEXP_MATCH_MULTILINE_COMMENT, rawline)
  299. if match_obj is not None:
  300. # code comments
  301. continue
  302. match_obj = re.match(self.__REGEX_MATCH_SIMPLE_VAL_FIELD, rawline)
  303. if match_obj is not None:
  304. # expecting to see 'uint32_t val;'
  305. if match_obj.groups()[0] != 'uint32_t' or match_obj.groups()[1] != 'val':
  306. print(('\033[0;33mWARN\033[0m: unexpected union member at {}: {}'.format(self.__linecount, rawline)).replace('\n', ''))
  307. else:
  308. has_val_field_count += 1
  309. # append to node tree
  310. member_node = self.__expand_type(match_obj.groups()[0], None)
  311. if member_node is not None:
  312. node_tree[match_obj.groups()[1]] = member_node
  313. else:
  314. if '*' not in match_obj.groups()[0]:
  315. print('\033[0;31mERROR\033[0m: line {}: unknown type {}'.format(self.__linecount, match_obj.groups()[0]))
  316. else:
  317. print('\033[0;33mWARN\033[0m: line {}: pointer type {}'.format(self.__linecount, match_obj.groups()[0]))
  318. continue
  319. # dump out unmatched condition
  320. print(('\033[0;33mWARN\033[0m: line: {}: unexpected expression: {}'.format(self.__linecount, rawline)).replace('\n', ''))
  321. if not (has_struct_count == 1 and has_val_field_count == 1):
  322. print('\033[0;34mINFO\033[0m: line: {}: not a typical union: {} nested structures, {} u32 val member'
  323. .format(self.__linecount, has_struct_count, has_val_field_count))
  324. if is_typedef and is_volatile and name is None:
  325. if parsed_varname != '':
  326. print('SUGGEST: {}'.format(parsed_varname.rstrip('t') + 's'))
  327. if name is not None and is_typedef and is_volatile and parsed_varname.rstrip('t') != name.rstrip('s'):
  328. print('\033[0;33mWARN\033[0m: line: {}: different type and typedef name: {} {}'.format(self.__linecount, name, parsed_varname))
  329. return ret_val, node_tree
  330. def __process_root(self) -> int:
  331. ret_val = 0
  332. node_tree = dict()
  333. while not self.__is_eof:
  334. rawline = self.__getline()
  335. if rawline is None:
  336. break
  337. # start checking by finding any of structure or union
  338. match_obj = re.match(self.__REGEXP_MATCH_STRUCTURE_BEGIN, rawline)
  339. if match_obj is not None:
  340. ret, inherited_node_tree = self.__process_structure(
  341. match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile')
  342. if ret != 0:
  343. ret_val = -2
  344. if inherited_node_tree is not None:
  345. for node in inherited_node_tree:
  346. node_tree[node] = inherited_node_tree[node]
  347. continue
  348. match_obj = re.match(self.__REGEXP_MATCH_UNION_BEGIN, rawline)
  349. if match_obj is not None:
  350. ret, inherited_node_tree = self.__process_union(match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile')
  351. if ret != 0:
  352. ret_val = -2
  353. if inherited_node_tree is not None:
  354. for node in inherited_node_tree:
  355. node_tree[node] = inherited_node_tree[node]
  356. continue
  357. # processing root level external declaration
  358. match_obj = re.match(self.__REGEX_MATCH_ROOT_EXTERNAL, rawline)
  359. if match_obj is not None:
  360. self.__ref_tree[match_obj.groups()[1]] = self.__expand_type(match_obj.groups()[0])
  361. continue
  362. return ret_val
  363. def check(self, file: str) -> int:
  364. self.__fd = open(file, 'r', encoding='utf8')
  365. self.__linecount = 0
  366. self.__is_eof = False
  367. ret_val = self.__process_root()
  368. self.__fd.close()
  369. if ret_val != 0:
  370. print('\033[0;31mCHECK FAILED\033[0m:\t{}'.format(file))
  371. else:
  372. print('\033[0;32mCHECK PASSED\033[0m:\t{}'.format(file))
  373. return ret_val
  374. def get_ref_tree(self) -> Any:
  375. return self.__ref_tree
  376. def main() -> None:
  377. ret = 0
  378. if len(sys.argv) <= 1 or not os.path.isfile(sys.argv[1]):
  379. print('file not exist')
  380. exit(-1)
  381. checker = SoCStructureHeaderChecker()
  382. print('CHECKING:\t{}'.format(sys.argv[1]))
  383. ret = checker.check(sys.argv[1])
  384. if len(sys.argv) == 3 and sys.argv[2] == 'print':
  385. print(checker.get_ref_tree())
  386. del checker
  387. sys.exit(ret)
  388. if __name__ == '__main__':
  389. main()