modulefinder.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633
  1. """Find modules used by a script, using introspection."""
  2. import dis
  3. import importlib._bootstrap_external
  4. import importlib.machinery
  5. import marshal
  6. import os
  7. import sys
  8. import types
  9. import warnings
  10. with warnings.catch_warnings():
  11. warnings.simplefilter('ignore', DeprecationWarning)
  12. import imp
  13. LOAD_CONST = dis.opmap['LOAD_CONST']
  14. IMPORT_NAME = dis.opmap['IMPORT_NAME']
  15. STORE_NAME = dis.opmap['STORE_NAME']
  16. STORE_GLOBAL = dis.opmap['STORE_GLOBAL']
  17. STORE_OPS = STORE_NAME, STORE_GLOBAL
  18. EXTENDED_ARG = dis.EXTENDED_ARG
  19. # Modulefinder does a good job at simulating Python's, but it can not
  20. # handle __path__ modifications packages make at runtime. Therefore there
  21. # is a mechanism whereby you can register extra paths in this map for a
  22. # package, and it will be honored.
  23. # Note this is a mapping is lists of paths.
  24. packagePathMap = {}
  25. # A Public interface
  26. def AddPackagePath(packagename, path):
  27. packagePathMap.setdefault(packagename, []).append(path)
  28. replacePackageMap = {}
  29. # This ReplacePackage mechanism allows modulefinder to work around
  30. # situations in which a package injects itself under the name
  31. # of another package into sys.modules at runtime by calling
  32. # ReplacePackage("real_package_name", "faked_package_name")
  33. # before running ModuleFinder.
  34. def ReplacePackage(oldname, newname):
  35. replacePackageMap[oldname] = newname
  36. class Module:
  37. def __init__(self, name, file=None, path=None):
  38. self.__name__ = name
  39. self.__file__ = file
  40. self.__path__ = path
  41. self.__code__ = None
  42. # The set of global names that are assigned to in the module.
  43. # This includes those names imported through starimports of
  44. # Python modules.
  45. self.globalnames = {}
  46. # The set of starimports this module did that could not be
  47. # resolved, ie. a starimport from a non-Python module.
  48. self.starimports = {}
  49. def __repr__(self):
  50. s = "Module(%r" % (self.__name__,)
  51. if self.__file__ is not None:
  52. s = s + ", %r" % (self.__file__,)
  53. if self.__path__ is not None:
  54. s = s + ", %r" % (self.__path__,)
  55. s = s + ")"
  56. return s
  57. class ModuleFinder:
  58. def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):
  59. if path is None:
  60. path = sys.path
  61. self.path = path
  62. self.modules = {}
  63. self.badmodules = {}
  64. self.debug = debug
  65. self.indent = 0
  66. self.excludes = excludes
  67. self.replace_paths = replace_paths
  68. self.processed_paths = [] # Used in debugging only
  69. def msg(self, level, str, *args):
  70. if level <= self.debug:
  71. for i in range(self.indent):
  72. print(" ", end=' ')
  73. print(str, end=' ')
  74. for arg in args:
  75. print(repr(arg), end=' ')
  76. print()
  77. def msgin(self, *args):
  78. level = args[0]
  79. if level <= self.debug:
  80. self.indent = self.indent + 1
  81. self.msg(*args)
  82. def msgout(self, *args):
  83. level = args[0]
  84. if level <= self.debug:
  85. self.indent = self.indent - 1
  86. self.msg(*args)
  87. def run_script(self, pathname):
  88. self.msg(2, "run_script", pathname)
  89. with open(pathname) as fp:
  90. stuff = ("", "r", imp.PY_SOURCE)
  91. self.load_module('__main__', fp, pathname, stuff)
  92. def load_file(self, pathname):
  93. dir, name = os.path.split(pathname)
  94. name, ext = os.path.splitext(name)
  95. with open(pathname) as fp:
  96. stuff = (ext, "r", imp.PY_SOURCE)
  97. self.load_module(name, fp, pathname, stuff)
  98. def import_hook(self, name, caller=None, fromlist=None, level=-1):
  99. self.msg(3, "import_hook", name, caller, fromlist, level)
  100. parent = self.determine_parent(caller, level=level)
  101. q, tail = self.find_head_package(parent, name)
  102. m = self.load_tail(q, tail)
  103. if not fromlist:
  104. return q
  105. if m.__path__:
  106. self.ensure_fromlist(m, fromlist)
  107. return None
  108. def determine_parent(self, caller, level=-1):
  109. self.msgin(4, "determine_parent", caller, level)
  110. if not caller or level == 0:
  111. self.msgout(4, "determine_parent -> None")
  112. return None
  113. pname = caller.__name__
  114. if level >= 1: # relative import
  115. if caller.__path__:
  116. level -= 1
  117. if level == 0:
  118. parent = self.modules[pname]
  119. assert parent is caller
  120. self.msgout(4, "determine_parent ->", parent)
  121. return parent
  122. if pname.count(".") < level:
  123. raise ImportError("relative importpath too deep")
  124. pname = ".".join(pname.split(".")[:-level])
  125. parent = self.modules[pname]
  126. self.msgout(4, "determine_parent ->", parent)
  127. return parent
  128. if caller.__path__:
  129. parent = self.modules[pname]
  130. assert caller is parent
  131. self.msgout(4, "determine_parent ->", parent)
  132. return parent
  133. if '.' in pname:
  134. i = pname.rfind('.')
  135. pname = pname[:i]
  136. parent = self.modules[pname]
  137. assert parent.__name__ == pname
  138. self.msgout(4, "determine_parent ->", parent)
  139. return parent
  140. self.msgout(4, "determine_parent -> None")
  141. return None
  142. def find_head_package(self, parent, name):
  143. self.msgin(4, "find_head_package", parent, name)
  144. if '.' in name:
  145. i = name.find('.')
  146. head = name[:i]
  147. tail = name[i+1:]
  148. else:
  149. head = name
  150. tail = ""
  151. if parent:
  152. qname = "%s.%s" % (parent.__name__, head)
  153. else:
  154. qname = head
  155. q = self.import_module(head, qname, parent)
  156. if q:
  157. self.msgout(4, "find_head_package ->", (q, tail))
  158. return q, tail
  159. if parent:
  160. qname = head
  161. parent = None
  162. q = self.import_module(head, qname, parent)
  163. if q:
  164. self.msgout(4, "find_head_package ->", (q, tail))
  165. return q, tail
  166. self.msgout(4, "raise ImportError: No module named", qname)
  167. raise ImportError("No module named " + qname)
  168. def load_tail(self, q, tail):
  169. self.msgin(4, "load_tail", q, tail)
  170. m = q
  171. while tail:
  172. i = tail.find('.')
  173. if i < 0: i = len(tail)
  174. head, tail = tail[:i], tail[i+1:]
  175. mname = "%s.%s" % (m.__name__, head)
  176. m = self.import_module(head, mname, m)
  177. if not m:
  178. self.msgout(4, "raise ImportError: No module named", mname)
  179. raise ImportError("No module named " + mname)
  180. self.msgout(4, "load_tail ->", m)
  181. return m
  182. def ensure_fromlist(self, m, fromlist, recursive=0):
  183. self.msg(4, "ensure_fromlist", m, fromlist, recursive)
  184. for sub in fromlist:
  185. if sub == "*":
  186. if not recursive:
  187. all = self.find_all_submodules(m)
  188. if all:
  189. self.ensure_fromlist(m, all, 1)
  190. elif not hasattr(m, sub):
  191. subname = "%s.%s" % (m.__name__, sub)
  192. submod = self.import_module(sub, subname, m)
  193. if not submod:
  194. raise ImportError("No module named " + subname)
  195. def find_all_submodules(self, m):
  196. if not m.__path__:
  197. return
  198. modules = {}
  199. # 'suffixes' used to be a list hardcoded to [".py", ".pyc"].
  200. # But we must also collect Python extension modules - although
  201. # we cannot separate normal dlls from Python extensions.
  202. suffixes = []
  203. suffixes += importlib.machinery.EXTENSION_SUFFIXES[:]
  204. suffixes += importlib.machinery.SOURCE_SUFFIXES[:]
  205. suffixes += importlib.machinery.BYTECODE_SUFFIXES[:]
  206. for dir in m.__path__:
  207. try:
  208. names = os.listdir(dir)
  209. except OSError:
  210. self.msg(2, "can't list directory", dir)
  211. continue
  212. for name in names:
  213. mod = None
  214. for suff in suffixes:
  215. n = len(suff)
  216. if name[-n:] == suff:
  217. mod = name[:-n]
  218. break
  219. if mod and mod != "__init__":
  220. modules[mod] = mod
  221. return modules.keys()
  222. def import_module(self, partname, fqname, parent):
  223. self.msgin(3, "import_module", partname, fqname, parent)
  224. try:
  225. m = self.modules[fqname]
  226. except KeyError:
  227. pass
  228. else:
  229. self.msgout(3, "import_module ->", m)
  230. return m
  231. if fqname in self.badmodules:
  232. self.msgout(3, "import_module -> None")
  233. return None
  234. if parent and parent.__path__ is None:
  235. self.msgout(3, "import_module -> None")
  236. return None
  237. try:
  238. fp, pathname, stuff = self.find_module(partname,
  239. parent and parent.__path__, parent)
  240. except ImportError:
  241. self.msgout(3, "import_module ->", None)
  242. return None
  243. try:
  244. m = self.load_module(fqname, fp, pathname, stuff)
  245. finally:
  246. if fp:
  247. fp.close()
  248. if parent:
  249. setattr(parent, partname, m)
  250. self.msgout(3, "import_module ->", m)
  251. return m
  252. def load_module(self, fqname, fp, pathname, file_info):
  253. suffix, mode, type = file_info
  254. self.msgin(2, "load_module", fqname, fp and "fp", pathname)
  255. if type == imp.PKG_DIRECTORY:
  256. m = self.load_package(fqname, pathname)
  257. self.msgout(2, "load_module ->", m)
  258. return m
  259. if type == imp.PY_SOURCE:
  260. co = compile(fp.read()+'\n', pathname, 'exec')
  261. elif type == imp.PY_COMPILED:
  262. try:
  263. data = fp.read()
  264. importlib._bootstrap_external._classify_pyc(data, fqname, {})
  265. except ImportError as exc:
  266. self.msgout(2, "raise ImportError: " + str(exc), pathname)
  267. raise
  268. co = marshal.loads(memoryview(data)[16:])
  269. else:
  270. co = None
  271. m = self.add_module(fqname)
  272. m.__file__ = pathname
  273. if co:
  274. if self.replace_paths:
  275. co = self.replace_paths_in_code(co)
  276. m.__code__ = co
  277. self.scan_code(co, m)
  278. self.msgout(2, "load_module ->", m)
  279. return m
  280. def _add_badmodule(self, name, caller):
  281. if name not in self.badmodules:
  282. self.badmodules[name] = {}
  283. if caller:
  284. self.badmodules[name][caller.__name__] = 1
  285. else:
  286. self.badmodules[name]["-"] = 1
  287. def _safe_import_hook(self, name, caller, fromlist, level=-1):
  288. # wrapper for self.import_hook() that won't raise ImportError
  289. if name in self.badmodules:
  290. self._add_badmodule(name, caller)
  291. return
  292. try:
  293. self.import_hook(name, caller, level=level)
  294. except ImportError as msg:
  295. self.msg(2, "ImportError:", str(msg))
  296. self._add_badmodule(name, caller)
  297. else:
  298. if fromlist:
  299. for sub in fromlist:
  300. if sub in self.badmodules:
  301. self._add_badmodule(sub, caller)
  302. continue
  303. try:
  304. self.import_hook(name, caller, [sub], level=level)
  305. except ImportError as msg:
  306. self.msg(2, "ImportError:", str(msg))
  307. fullname = name + "." + sub
  308. self._add_badmodule(fullname, caller)
  309. def scan_opcodes(self, co):
  310. # Scan the code, and yield 'interesting' opcode combinations
  311. code = co.co_code
  312. names = co.co_names
  313. consts = co.co_consts
  314. opargs = [(op, arg) for _, op, arg in dis._unpack_opargs(code)
  315. if op != EXTENDED_ARG]
  316. for i, (op, oparg) in enumerate(opargs):
  317. if op in STORE_OPS:
  318. yield "store", (names[oparg],)
  319. continue
  320. if (op == IMPORT_NAME and i >= 2
  321. and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST):
  322. level = consts[opargs[i-2][1]]
  323. fromlist = consts[opargs[i-1][1]]
  324. if level == 0: # absolute import
  325. yield "absolute_import", (fromlist, names[oparg])
  326. else: # relative import
  327. yield "relative_import", (level, fromlist, names[oparg])
  328. continue
  329. def scan_code(self, co, m):
  330. code = co.co_code
  331. scanner = self.scan_opcodes
  332. for what, args in scanner(co):
  333. if what == "store":
  334. name, = args
  335. m.globalnames[name] = 1
  336. elif what == "absolute_import":
  337. fromlist, name = args
  338. have_star = 0
  339. if fromlist is not None:
  340. if "*" in fromlist:
  341. have_star = 1
  342. fromlist = [f for f in fromlist if f != "*"]
  343. self._safe_import_hook(name, m, fromlist, level=0)
  344. if have_star:
  345. # We've encountered an "import *". If it is a Python module,
  346. # the code has already been parsed and we can suck out the
  347. # global names.
  348. mm = None
  349. if m.__path__:
  350. # At this point we don't know whether 'name' is a
  351. # submodule of 'm' or a global module. Let's just try
  352. # the full name first.
  353. mm = self.modules.get(m.__name__ + "." + name)
  354. if mm is None:
  355. mm = self.modules.get(name)
  356. if mm is not None:
  357. m.globalnames.update(mm.globalnames)
  358. m.starimports.update(mm.starimports)
  359. if mm.__code__ is None:
  360. m.starimports[name] = 1
  361. else:
  362. m.starimports[name] = 1
  363. elif what == "relative_import":
  364. level, fromlist, name = args
  365. if name:
  366. self._safe_import_hook(name, m, fromlist, level=level)
  367. else:
  368. parent = self.determine_parent(m, level=level)
  369. self._safe_import_hook(parent.__name__, None, fromlist, level=0)
  370. else:
  371. # We don't expect anything else from the generator.
  372. raise RuntimeError(what)
  373. for c in co.co_consts:
  374. if isinstance(c, type(co)):
  375. self.scan_code(c, m)
  376. def load_package(self, fqname, pathname):
  377. self.msgin(2, "load_package", fqname, pathname)
  378. newname = replacePackageMap.get(fqname)
  379. if newname:
  380. fqname = newname
  381. m = self.add_module(fqname)
  382. m.__file__ = pathname
  383. m.__path__ = [pathname]
  384. # As per comment at top of file, simulate runtime __path__ additions.
  385. m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
  386. fp, buf, stuff = self.find_module("__init__", m.__path__)
  387. try:
  388. self.load_module(fqname, fp, buf, stuff)
  389. self.msgout(2, "load_package ->", m)
  390. return m
  391. finally:
  392. if fp:
  393. fp.close()
  394. def add_module(self, fqname):
  395. if fqname in self.modules:
  396. return self.modules[fqname]
  397. self.modules[fqname] = m = Module(fqname)
  398. return m
  399. def find_module(self, name, path, parent=None):
  400. if parent is not None:
  401. # assert path is not None
  402. fullname = parent.__name__+'.'+name
  403. else:
  404. fullname = name
  405. if fullname in self.excludes:
  406. self.msgout(3, "find_module -> Excluded", fullname)
  407. raise ImportError(name)
  408. if path is None:
  409. if name in sys.builtin_module_names:
  410. return (None, None, ("", "", imp.C_BUILTIN))
  411. path = self.path
  412. return imp.find_module(name, path)
  413. def report(self):
  414. """Print a report to stdout, listing the found modules with their
  415. paths, as well as modules that are missing, or seem to be missing.
  416. """
  417. print()
  418. print(" %-25s %s" % ("Name", "File"))
  419. print(" %-25s %s" % ("----", "----"))
  420. # Print modules found
  421. keys = sorted(self.modules.keys())
  422. for key in keys:
  423. m = self.modules[key]
  424. if m.__path__:
  425. print("P", end=' ')
  426. else:
  427. print("m", end=' ')
  428. print("%-25s" % key, m.__file__ or "")
  429. # Print missing modules
  430. missing, maybe = self.any_missing_maybe()
  431. if missing:
  432. print()
  433. print("Missing modules:")
  434. for name in missing:
  435. mods = sorted(self.badmodules[name].keys())
  436. print("?", name, "imported from", ', '.join(mods))
  437. # Print modules that may be missing, but then again, maybe not...
  438. if maybe:
  439. print()
  440. print("Submodules that appear to be missing, but could also be", end=' ')
  441. print("global names in the parent package:")
  442. for name in maybe:
  443. mods = sorted(self.badmodules[name].keys())
  444. print("?", name, "imported from", ', '.join(mods))
  445. def any_missing(self):
  446. """Return a list of modules that appear to be missing. Use
  447. any_missing_maybe() if you want to know which modules are
  448. certain to be missing, and which *may* be missing.
  449. """
  450. missing, maybe = self.any_missing_maybe()
  451. return missing + maybe
  452. def any_missing_maybe(self):
  453. """Return two lists, one with modules that are certainly missing
  454. and one with modules that *may* be missing. The latter names could
  455. either be submodules *or* just global names in the package.
  456. The reason it can't always be determined is that it's impossible to
  457. tell which names are imported when "from module import *" is done
  458. with an extension module, short of actually importing it.
  459. """
  460. missing = []
  461. maybe = []
  462. for name in self.badmodules:
  463. if name in self.excludes:
  464. continue
  465. i = name.rfind(".")
  466. if i < 0:
  467. missing.append(name)
  468. continue
  469. subname = name[i+1:]
  470. pkgname = name[:i]
  471. pkg = self.modules.get(pkgname)
  472. if pkg is not None:
  473. if pkgname in self.badmodules[name]:
  474. # The package tried to import this module itself and
  475. # failed. It's definitely missing.
  476. missing.append(name)
  477. elif subname in pkg.globalnames:
  478. # It's a global in the package: definitely not missing.
  479. pass
  480. elif pkg.starimports:
  481. # It could be missing, but the package did an "import *"
  482. # from a non-Python module, so we simply can't be sure.
  483. maybe.append(name)
  484. else:
  485. # It's not a global in the package, the package didn't
  486. # do funny star imports, it's very likely to be missing.
  487. # The symbol could be inserted into the package from the
  488. # outside, but since that's not good style we simply list
  489. # it missing.
  490. missing.append(name)
  491. else:
  492. missing.append(name)
  493. missing.sort()
  494. maybe.sort()
  495. return missing, maybe
  496. def replace_paths_in_code(self, co):
  497. new_filename = original_filename = os.path.normpath(co.co_filename)
  498. for f, r in self.replace_paths:
  499. if original_filename.startswith(f):
  500. new_filename = r + original_filename[len(f):]
  501. break
  502. if self.debug and original_filename not in self.processed_paths:
  503. if new_filename != original_filename:
  504. self.msgout(2, "co_filename %r changed to %r" \
  505. % (original_filename,new_filename,))
  506. else:
  507. self.msgout(2, "co_filename %r remains unchanged" \
  508. % (original_filename,))
  509. self.processed_paths.append(original_filename)
  510. consts = list(co.co_consts)
  511. for i in range(len(consts)):
  512. if isinstance(consts[i], type(co)):
  513. consts[i] = self.replace_paths_in_code(consts[i])
  514. return types.CodeType(co.co_argcount, co.co_kwonlyargcount,
  515. co.co_nlocals, co.co_stacksize, co.co_flags,
  516. co.co_code, tuple(consts), co.co_names,
  517. co.co_varnames, new_filename, co.co_name,
  518. co.co_firstlineno, co.co_lnotab, co.co_freevars,
  519. co.co_cellvars)
  520. def test():
  521. # Parse command line
  522. import getopt
  523. try:
  524. opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
  525. except getopt.error as msg:
  526. print(msg)
  527. return
  528. # Process options
  529. debug = 1
  530. domods = 0
  531. addpath = []
  532. exclude = []
  533. for o, a in opts:
  534. if o == '-d':
  535. debug = debug + 1
  536. if o == '-m':
  537. domods = 1
  538. if o == '-p':
  539. addpath = addpath + a.split(os.pathsep)
  540. if o == '-q':
  541. debug = 0
  542. if o == '-x':
  543. exclude.append(a)
  544. # Provide default arguments
  545. if not args:
  546. script = "hello.py"
  547. else:
  548. script = args[0]
  549. # Set the path based on sys.path and the script directory
  550. path = sys.path[:]
  551. path[0] = os.path.dirname(script)
  552. path = addpath + path
  553. if debug > 1:
  554. print("path:")
  555. for item in path:
  556. print(" ", repr(item))
  557. # Create the module finder and turn its crank
  558. mf = ModuleFinder(path, debug, exclude)
  559. for arg in args[1:]:
  560. if arg == '-m':
  561. domods = 1
  562. continue
  563. if domods:
  564. if arg[-2:] == '.*':
  565. mf.import_hook(arg[:-2], None, ["*"])
  566. else:
  567. mf.import_hook(arg)
  568. else:
  569. mf.load_file(arg)
  570. mf.run_script(script)
  571. mf.report()
  572. return mf # for -i debugging
  573. if __name__ == '__main__':
  574. try:
  575. mf = test()
  576. except KeyboardInterrupt:
  577. print("\n[interrupted]")