fixcid.py 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. #! /usr/bin/env python3
  2. # Perform massive identifier substitution on C source files.
  3. # This actually tokenizes the files (to some extent) so it can
  4. # avoid making substitutions inside strings or comments.
  5. # Inside strings, substitutions are never made; inside comments,
  6. # it is a user option (off by default).
  7. #
  8. # The substitutions are read from one or more files whose lines,
  9. # when not empty, after stripping comments starting with #,
  10. # must contain exactly two words separated by whitespace: the
  11. # old identifier and its replacement.
  12. #
  13. # The option -r reverses the sense of the substitutions (this may be
  14. # useful to undo a particular substitution).
  15. #
  16. # If the old identifier is prefixed with a '*' (with no intervening
  17. # whitespace), then it will not be substituted inside comments.
  18. #
  19. # Command line arguments are files or directories to be processed.
  20. # Directories are searched recursively for files whose name looks
  21. # like a C file (ends in .h or .c). The special filename '-' means
  22. # operate in filter mode: read stdin, write stdout.
  23. #
  24. # Symbolic links are always ignored (except as explicit directory
  25. # arguments).
  26. #
  27. # The original files are kept as back-up with a "~" suffix.
  28. #
  29. # Changes made are reported to stdout in a diff-like format.
  30. #
  31. # NB: by changing only the function fixline() you can turn this
  32. # into a program for different changes to C source files; by
  33. # changing the function wanted() you can make a different selection of
  34. # files.
  35. import sys
  36. import re
  37. import os
  38. from stat import *
  39. import getopt
  40. err = sys.stderr.write
  41. dbg = err
  42. rep = sys.stdout.write
  43. def usage():
  44. progname = sys.argv[0]
  45. err('Usage: ' + progname +
  46. ' [-c] [-r] [-s file] ... file-or-directory ...\n')
  47. err('\n')
  48. err('-c : substitute inside comments\n')
  49. err('-r : reverse direction for following -s options\n')
  50. err('-s substfile : add a file of substitutions\n')
  51. err('\n')
  52. err('Each non-empty non-comment line in a substitution file must\n')
  53. err('contain exactly two words: an identifier and its replacement.\n')
  54. err('Comments start with a # character and end at end of line.\n')
  55. err('If an identifier is preceded with a *, it is not substituted\n')
  56. err('inside a comment even when -c is specified.\n')
  57. def main():
  58. try:
  59. opts, args = getopt.getopt(sys.argv[1:], 'crs:')
  60. except getopt.error as msg:
  61. err('Options error: ' + str(msg) + '\n')
  62. usage()
  63. sys.exit(2)
  64. bad = 0
  65. if not args: # No arguments
  66. usage()
  67. sys.exit(2)
  68. for opt, arg in opts:
  69. if opt == '-c':
  70. setdocomments()
  71. if opt == '-r':
  72. setreverse()
  73. if opt == '-s':
  74. addsubst(arg)
  75. for arg in args:
  76. if os.path.isdir(arg):
  77. if recursedown(arg): bad = 1
  78. elif os.path.islink(arg):
  79. err(arg + ': will not process symbolic links\n')
  80. bad = 1
  81. else:
  82. if fix(arg): bad = 1
  83. sys.exit(bad)
  84. # Change this regular expression to select a different set of files
  85. Wanted = r'^[a-zA-Z0-9_]+\.[ch]$'
  86. def wanted(name):
  87. return re.match(Wanted, name)
  88. def recursedown(dirname):
  89. dbg('recursedown(%r)\n' % (dirname,))
  90. bad = 0
  91. try:
  92. names = os.listdir(dirname)
  93. except OSError as msg:
  94. err(dirname + ': cannot list directory: ' + str(msg) + '\n')
  95. return 1
  96. names.sort()
  97. subdirs = []
  98. for name in names:
  99. if name in (os.curdir, os.pardir): continue
  100. fullname = os.path.join(dirname, name)
  101. if os.path.islink(fullname): pass
  102. elif os.path.isdir(fullname):
  103. subdirs.append(fullname)
  104. elif wanted(name):
  105. if fix(fullname): bad = 1
  106. for fullname in subdirs:
  107. if recursedown(fullname): bad = 1
  108. return bad
  109. def fix(filename):
  110. ## dbg('fix(%r)\n' % (filename,))
  111. if filename == '-':
  112. # Filter mode
  113. f = sys.stdin
  114. g = sys.stdout
  115. else:
  116. # File replacement mode
  117. try:
  118. f = open(filename, 'r')
  119. except IOError as msg:
  120. err(filename + ': cannot open: ' + str(msg) + '\n')
  121. return 1
  122. head, tail = os.path.split(filename)
  123. tempname = os.path.join(head, '@' + tail)
  124. g = None
  125. # If we find a match, we rewind the file and start over but
  126. # now copy everything to a temp file.
  127. lineno = 0
  128. initfixline()
  129. while 1:
  130. line = f.readline()
  131. if not line: break
  132. lineno = lineno + 1
  133. while line[-2:] == '\\\n':
  134. nextline = f.readline()
  135. if not nextline: break
  136. line = line + nextline
  137. lineno = lineno + 1
  138. newline = fixline(line)
  139. if newline != line:
  140. if g is None:
  141. try:
  142. g = open(tempname, 'w')
  143. except IOError as msg:
  144. f.close()
  145. err(tempname+': cannot create: '+
  146. str(msg)+'\n')
  147. return 1
  148. f.seek(0)
  149. lineno = 0
  150. initfixline()
  151. rep(filename + ':\n')
  152. continue # restart from the beginning
  153. rep(repr(lineno) + '\n')
  154. rep('< ' + line)
  155. rep('> ' + newline)
  156. if g is not None:
  157. g.write(newline)
  158. # End of file
  159. if filename == '-': return 0 # Done in filter mode
  160. f.close()
  161. if not g: return 0 # No changes
  162. g.close()
  163. # Finishing touch -- move files
  164. # First copy the file's mode to the temp file
  165. try:
  166. statbuf = os.stat(filename)
  167. os.chmod(tempname, statbuf[ST_MODE] & 0o7777)
  168. except OSError as msg:
  169. err(tempname + ': warning: chmod failed (' + str(msg) + ')\n')
  170. # Then make a backup of the original file as filename~
  171. try:
  172. os.rename(filename, filename + '~')
  173. except OSError as msg:
  174. err(filename + ': warning: backup failed (' + str(msg) + ')\n')
  175. # Now move the temp file to the original file
  176. try:
  177. os.rename(tempname, filename)
  178. except OSError as msg:
  179. err(filename + ': rename failed (' + str(msg) + ')\n')
  180. return 1
  181. # Return success
  182. return 0
  183. # Tokenizing ANSI C (partly)
  184. Identifier = '(struct )?[a-zA-Z_][a-zA-Z0-9_]+'
  185. String = r'"([^\n\\"]|\\.)*"'
  186. Char = r"'([^\n\\']|\\.)*'"
  187. CommentStart = r'/\*'
  188. CommentEnd = r'\*/'
  189. Hexnumber = '0[xX][0-9a-fA-F]*[uUlL]*'
  190. Octnumber = '0[0-7]*[uUlL]*'
  191. Decnumber = '[1-9][0-9]*[uUlL]*'
  192. Intnumber = Hexnumber + '|' + Octnumber + '|' + Decnumber
  193. Exponent = '[eE][-+]?[0-9]+'
  194. Pointfloat = r'([0-9]+\.[0-9]*|\.[0-9]+)(' + Exponent + r')?'
  195. Expfloat = '[0-9]+' + Exponent
  196. Floatnumber = Pointfloat + '|' + Expfloat
  197. Number = Floatnumber + '|' + Intnumber
  198. # Anything else is an operator -- don't list this explicitly because of '/*'
  199. OutsideComment = (Identifier, Number, String, Char, CommentStart)
  200. OutsideCommentPattern = '(' + '|'.join(OutsideComment) + ')'
  201. OutsideCommentProgram = re.compile(OutsideCommentPattern)
  202. InsideComment = (Identifier, Number, CommentEnd)
  203. InsideCommentPattern = '(' + '|'.join(InsideComment) + ')'
  204. InsideCommentProgram = re.compile(InsideCommentPattern)
  205. def initfixline():
  206. global Program
  207. Program = OutsideCommentProgram
  208. def fixline(line):
  209. global Program
  210. ## print('-->', repr(line))
  211. i = 0
  212. while i < len(line):
  213. match = Program.search(line, i)
  214. if match is None: break
  215. i = match.start()
  216. found = match.group(0)
  217. ## if Program is InsideCommentProgram: print(end='... ')
  218. ## else: print(end=' ')
  219. ## print(found)
  220. if len(found) == 2:
  221. if found == '/*':
  222. Program = InsideCommentProgram
  223. elif found == '*/':
  224. Program = OutsideCommentProgram
  225. n = len(found)
  226. if found in Dict:
  227. subst = Dict[found]
  228. if Program is InsideCommentProgram:
  229. if not Docomments:
  230. print('Found in comment:', found)
  231. i = i + n
  232. continue
  233. if found in NotInComment:
  234. ## print(end='Ignored in comment: ')
  235. ## print(found, '-->', subst)
  236. ## print('Line:', line, end='')
  237. subst = found
  238. ## else:
  239. ## print(end='Substituting in comment: ')
  240. ## print(found, '-->', subst)
  241. ## print('Line:', line, end='')
  242. line = line[:i] + subst + line[i+n:]
  243. n = len(subst)
  244. i = i + n
  245. return line
  246. Docomments = 0
  247. def setdocomments():
  248. global Docomments
  249. Docomments = 1
  250. Reverse = 0
  251. def setreverse():
  252. global Reverse
  253. Reverse = (not Reverse)
  254. Dict = {}
  255. NotInComment = {}
  256. def addsubst(substfile):
  257. try:
  258. fp = open(substfile, 'r')
  259. except IOError as msg:
  260. err(substfile + ': cannot read substfile: ' + str(msg) + '\n')
  261. sys.exit(1)
  262. lineno = 0
  263. while 1:
  264. line = fp.readline()
  265. if not line: break
  266. lineno = lineno + 1
  267. try:
  268. i = line.index('#')
  269. except ValueError:
  270. i = -1 # Happens to delete trailing \n
  271. words = line[:i].split()
  272. if not words: continue
  273. if len(words) == 3 and words[0] == 'struct':
  274. words[:2] = [words[0] + ' ' + words[1]]
  275. elif len(words) != 2:
  276. err(substfile + '%s:%r: warning: bad line: %r' % (substfile, lineno, line))
  277. continue
  278. if Reverse:
  279. [value, key] = words
  280. else:
  281. [key, value] = words
  282. if value[0] == '*':
  283. value = value[1:]
  284. if key[0] == '*':
  285. key = key[1:]
  286. NotInComment[key] = value
  287. if key in Dict:
  288. err('%s:%r: warning: overriding: %r %r\n' % (substfile, lineno, key, value))
  289. err('%s:%r: warning: previous: %r\n' % (substfile, lineno, Dict[key]))
  290. Dict[key] = value
  291. fp.close()
  292. if __name__ == '__main__':
  293. main()