plistlib.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978
  1. r"""plistlib.py -- a tool to generate and parse MacOSX .plist files.
  2. The property list (.plist) file format is a simple XML pickle supporting
  3. basic object types, like dictionaries, lists, numbers and strings.
  4. Usually the top level object is a dictionary.
  5. To write out a plist file, use the dump(value, file)
  6. function. 'value' is the top level object, 'file' is
  7. a (writable) file object.
  8. To parse a plist from a file, use the load(file) function,
  9. with a (readable) file object as the only argument. It
  10. returns the top level object (again, usually a dictionary).
  11. To work with plist data in bytes objects, you can use loads()
  12. and dumps().
  13. Values can be strings, integers, floats, booleans, tuples, lists,
  14. dictionaries (but only with string keys), Data, bytes, bytearray, or
  15. datetime.datetime objects.
  16. Generate Plist example:
  17. pl = dict(
  18. aString = "Doodah",
  19. aList = ["A", "B", 12, 32.1, [1, 2, 3]],
  20. aFloat = 0.1,
  21. anInt = 728,
  22. aDict = dict(
  23. anotherString = "<hello & hi there!>",
  24. aUnicodeValue = "M\xe4ssig, Ma\xdf",
  25. aTrueValue = True,
  26. aFalseValue = False,
  27. ),
  28. someData = b"<binary gunk>",
  29. someMoreData = b"<lots of binary gunk>" * 10,
  30. aDate = datetime.datetime.fromtimestamp(time.mktime(time.gmtime())),
  31. )
  32. with open(fileName, 'wb') as fp:
  33. dump(pl, fp)
  34. Parse Plist example:
  35. with open(fileName, 'rb') as fp:
  36. pl = load(fp)
  37. print(pl["aKey"])
  38. """
  39. __all__ = [
  40. "readPlist", "writePlist", "readPlistFromBytes", "writePlistToBytes",
  41. "Data", "InvalidFileException", "FMT_XML", "FMT_BINARY",
  42. "load", "dump", "loads", "dumps"
  43. ]
  44. import binascii
  45. import codecs
  46. import contextlib
  47. import datetime
  48. import enum
  49. from io import BytesIO
  50. import itertools
  51. import os
  52. import re
  53. import struct
  54. from warnings import warn
  55. from xml.parsers.expat import ParserCreate
  56. PlistFormat = enum.Enum('PlistFormat', 'FMT_XML FMT_BINARY', module=__name__)
  57. globals().update(PlistFormat.__members__)
  58. #
  59. #
  60. # Deprecated functionality
  61. #
  62. #
  63. @contextlib.contextmanager
  64. def _maybe_open(pathOrFile, mode):
  65. if isinstance(pathOrFile, str):
  66. with open(pathOrFile, mode) as fp:
  67. yield fp
  68. else:
  69. yield pathOrFile
  70. def readPlist(pathOrFile):
  71. """
  72. Read a .plist from a path or file. pathOrFile should either
  73. be a file name, or a readable binary file object.
  74. This function is deprecated, use load instead.
  75. """
  76. warn("The readPlist function is deprecated, use load() instead",
  77. DeprecationWarning, 2)
  78. with _maybe_open(pathOrFile, 'rb') as fp:
  79. return load(fp, fmt=None, use_builtin_types=False)
  80. def writePlist(value, pathOrFile):
  81. """
  82. Write 'value' to a .plist file. 'pathOrFile' may either be a
  83. file name or a (writable) file object.
  84. This function is deprecated, use dump instead.
  85. """
  86. warn("The writePlist function is deprecated, use dump() instead",
  87. DeprecationWarning, 2)
  88. with _maybe_open(pathOrFile, 'wb') as fp:
  89. dump(value, fp, fmt=FMT_XML, sort_keys=True, skipkeys=False)
  90. def readPlistFromBytes(data):
  91. """
  92. Read a plist data from a bytes object. Return the root object.
  93. This function is deprecated, use loads instead.
  94. """
  95. warn("The readPlistFromBytes function is deprecated, use loads() instead",
  96. DeprecationWarning, 2)
  97. return load(BytesIO(data), fmt=None, use_builtin_types=False)
  98. def writePlistToBytes(value):
  99. """
  100. Return 'value' as a plist-formatted bytes object.
  101. This function is deprecated, use dumps instead.
  102. """
  103. warn("The writePlistToBytes function is deprecated, use dumps() instead",
  104. DeprecationWarning, 2)
  105. f = BytesIO()
  106. dump(value, f, fmt=FMT_XML, sort_keys=True, skipkeys=False)
  107. return f.getvalue()
  108. class Data:
  109. """
  110. Wrapper for binary data.
  111. This class is deprecated, use a bytes object instead.
  112. """
  113. def __init__(self, data):
  114. if not isinstance(data, bytes):
  115. raise TypeError("data must be as bytes")
  116. self.data = data
  117. @classmethod
  118. def fromBase64(cls, data):
  119. # base64.decodebytes just calls binascii.a2b_base64;
  120. # it seems overkill to use both base64 and binascii.
  121. return cls(_decode_base64(data))
  122. def asBase64(self, maxlinelength=76):
  123. return _encode_base64(self.data, maxlinelength)
  124. def __eq__(self, other):
  125. if isinstance(other, self.__class__):
  126. return self.data == other.data
  127. elif isinstance(other, bytes):
  128. return self.data == other
  129. else:
  130. return NotImplemented
  131. def __repr__(self):
  132. return "%s(%s)" % (self.__class__.__name__, repr(self.data))
  133. #
  134. #
  135. # End of deprecated functionality
  136. #
  137. #
  138. #
  139. # XML support
  140. #
  141. # XML 'header'
  142. PLISTHEADER = b"""\
  143. <?xml version="1.0" encoding="UTF-8"?>
  144. <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
  145. """
  146. # Regex to find any control chars, except for \t \n and \r
  147. _controlCharPat = re.compile(
  148. r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f"
  149. r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]")
  150. def _encode_base64(s, maxlinelength=76):
  151. # copied from base64.encodebytes(), with added maxlinelength argument
  152. maxbinsize = (maxlinelength//4)*3
  153. pieces = []
  154. for i in range(0, len(s), maxbinsize):
  155. chunk = s[i : i + maxbinsize]
  156. pieces.append(binascii.b2a_base64(chunk))
  157. return b''.join(pieces)
  158. def _decode_base64(s):
  159. if isinstance(s, str):
  160. return binascii.a2b_base64(s.encode("utf-8"))
  161. else:
  162. return binascii.a2b_base64(s)
  163. # Contents should conform to a subset of ISO 8601
  164. # (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'. Smaller units
  165. # may be omitted with # a loss of precision)
  166. _dateParser = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z", re.ASCII)
  167. def _date_from_string(s):
  168. order = ('year', 'month', 'day', 'hour', 'minute', 'second')
  169. gd = _dateParser.match(s).groupdict()
  170. lst = []
  171. for key in order:
  172. val = gd[key]
  173. if val is None:
  174. break
  175. lst.append(int(val))
  176. return datetime.datetime(*lst)
  177. def _date_to_string(d):
  178. return '%04d-%02d-%02dT%02d:%02d:%02dZ' % (
  179. d.year, d.month, d.day,
  180. d.hour, d.minute, d.second
  181. )
  182. def _escape(text):
  183. m = _controlCharPat.search(text)
  184. if m is not None:
  185. raise ValueError("strings can't contains control characters; "
  186. "use bytes instead")
  187. text = text.replace("\r\n", "\n") # convert DOS line endings
  188. text = text.replace("\r", "\n") # convert Mac line endings
  189. text = text.replace("&", "&amp;") # escape '&'
  190. text = text.replace("<", "&lt;") # escape '<'
  191. text = text.replace(">", "&gt;") # escape '>'
  192. return text
  193. class _PlistParser:
  194. def __init__(self, use_builtin_types, dict_type):
  195. self.stack = []
  196. self.current_key = None
  197. self.root = None
  198. self._use_builtin_types = use_builtin_types
  199. self._dict_type = dict_type
  200. def parse(self, fileobj):
  201. self.parser = ParserCreate()
  202. self.parser.StartElementHandler = self.handle_begin_element
  203. self.parser.EndElementHandler = self.handle_end_element
  204. self.parser.CharacterDataHandler = self.handle_data
  205. self.parser.ParseFile(fileobj)
  206. return self.root
  207. def handle_begin_element(self, element, attrs):
  208. self.data = []
  209. handler = getattr(self, "begin_" + element, None)
  210. if handler is not None:
  211. handler(attrs)
  212. def handle_end_element(self, element):
  213. handler = getattr(self, "end_" + element, None)
  214. if handler is not None:
  215. handler()
  216. def handle_data(self, data):
  217. self.data.append(data)
  218. def add_object(self, value):
  219. if self.current_key is not None:
  220. if not isinstance(self.stack[-1], type({})):
  221. raise ValueError("unexpected element at line %d" %
  222. self.parser.CurrentLineNumber)
  223. self.stack[-1][self.current_key] = value
  224. self.current_key = None
  225. elif not self.stack:
  226. # this is the root object
  227. self.root = value
  228. else:
  229. if not isinstance(self.stack[-1], type([])):
  230. raise ValueError("unexpected element at line %d" %
  231. self.parser.CurrentLineNumber)
  232. self.stack[-1].append(value)
  233. def get_data(self):
  234. data = ''.join(self.data)
  235. self.data = []
  236. return data
  237. # element handlers
  238. def begin_dict(self, attrs):
  239. d = self._dict_type()
  240. self.add_object(d)
  241. self.stack.append(d)
  242. def end_dict(self):
  243. if self.current_key:
  244. raise ValueError("missing value for key '%s' at line %d" %
  245. (self.current_key,self.parser.CurrentLineNumber))
  246. self.stack.pop()
  247. def end_key(self):
  248. if self.current_key or not isinstance(self.stack[-1], type({})):
  249. raise ValueError("unexpected key at line %d" %
  250. self.parser.CurrentLineNumber)
  251. self.current_key = self.get_data()
  252. def begin_array(self, attrs):
  253. a = []
  254. self.add_object(a)
  255. self.stack.append(a)
  256. def end_array(self):
  257. self.stack.pop()
  258. def end_true(self):
  259. self.add_object(True)
  260. def end_false(self):
  261. self.add_object(False)
  262. def end_integer(self):
  263. self.add_object(int(self.get_data()))
  264. def end_real(self):
  265. self.add_object(float(self.get_data()))
  266. def end_string(self):
  267. self.add_object(self.get_data())
  268. def end_data(self):
  269. if self._use_builtin_types:
  270. self.add_object(_decode_base64(self.get_data()))
  271. else:
  272. self.add_object(Data.fromBase64(self.get_data()))
  273. def end_date(self):
  274. self.add_object(_date_from_string(self.get_data()))
  275. class _DumbXMLWriter:
  276. def __init__(self, file, indent_level=0, indent="\t"):
  277. self.file = file
  278. self.stack = []
  279. self._indent_level = indent_level
  280. self.indent = indent
  281. def begin_element(self, element):
  282. self.stack.append(element)
  283. self.writeln("<%s>" % element)
  284. self._indent_level += 1
  285. def end_element(self, element):
  286. assert self._indent_level > 0
  287. assert self.stack.pop() == element
  288. self._indent_level -= 1
  289. self.writeln("</%s>" % element)
  290. def simple_element(self, element, value=None):
  291. if value is not None:
  292. value = _escape(value)
  293. self.writeln("<%s>%s</%s>" % (element, value, element))
  294. else:
  295. self.writeln("<%s/>" % element)
  296. def writeln(self, line):
  297. if line:
  298. # plist has fixed encoding of utf-8
  299. # XXX: is this test needed?
  300. if isinstance(line, str):
  301. line = line.encode('utf-8')
  302. self.file.write(self._indent_level * self.indent)
  303. self.file.write(line)
  304. self.file.write(b'\n')
  305. class _PlistWriter(_DumbXMLWriter):
  306. def __init__(
  307. self, file, indent_level=0, indent=b"\t", writeHeader=1,
  308. sort_keys=True, skipkeys=False):
  309. if writeHeader:
  310. file.write(PLISTHEADER)
  311. _DumbXMLWriter.__init__(self, file, indent_level, indent)
  312. self._sort_keys = sort_keys
  313. self._skipkeys = skipkeys
  314. def write(self, value):
  315. self.writeln("<plist version=\"1.0\">")
  316. self.write_value(value)
  317. self.writeln("</plist>")
  318. def write_value(self, value):
  319. if isinstance(value, str):
  320. self.simple_element("string", value)
  321. elif value is True:
  322. self.simple_element("true")
  323. elif value is False:
  324. self.simple_element("false")
  325. elif isinstance(value, int):
  326. if -1 << 63 <= value < 1 << 64:
  327. self.simple_element("integer", "%d" % value)
  328. else:
  329. raise OverflowError(value)
  330. elif isinstance(value, float):
  331. self.simple_element("real", repr(value))
  332. elif isinstance(value, dict):
  333. self.write_dict(value)
  334. elif isinstance(value, Data):
  335. self.write_data(value)
  336. elif isinstance(value, (bytes, bytearray)):
  337. self.write_bytes(value)
  338. elif isinstance(value, datetime.datetime):
  339. self.simple_element("date", _date_to_string(value))
  340. elif isinstance(value, (tuple, list)):
  341. self.write_array(value)
  342. else:
  343. raise TypeError("unsupported type: %s" % type(value))
  344. def write_data(self, data):
  345. self.write_bytes(data.data)
  346. def write_bytes(self, data):
  347. self.begin_element("data")
  348. self._indent_level -= 1
  349. maxlinelength = max(
  350. 16,
  351. 76 - len(self.indent.replace(b"\t", b" " * 8) * self._indent_level))
  352. for line in _encode_base64(data, maxlinelength).split(b"\n"):
  353. if line:
  354. self.writeln(line)
  355. self._indent_level += 1
  356. self.end_element("data")
  357. def write_dict(self, d):
  358. if d:
  359. self.begin_element("dict")
  360. if self._sort_keys:
  361. items = sorted(d.items())
  362. else:
  363. items = d.items()
  364. for key, value in items:
  365. if not isinstance(key, str):
  366. if self._skipkeys:
  367. continue
  368. raise TypeError("keys must be strings")
  369. self.simple_element("key", key)
  370. self.write_value(value)
  371. self.end_element("dict")
  372. else:
  373. self.simple_element("dict")
  374. def write_array(self, array):
  375. if array:
  376. self.begin_element("array")
  377. for value in array:
  378. self.write_value(value)
  379. self.end_element("array")
  380. else:
  381. self.simple_element("array")
  382. def _is_fmt_xml(header):
  383. prefixes = (b'<?xml', b'<plist')
  384. for pfx in prefixes:
  385. if header.startswith(pfx):
  386. return True
  387. # Also check for alternative XML encodings, this is slightly
  388. # overkill because the Apple tools (and plistlib) will not
  389. # generate files with these encodings.
  390. for bom, encoding in (
  391. (codecs.BOM_UTF8, "utf-8"),
  392. (codecs.BOM_UTF16_BE, "utf-16-be"),
  393. (codecs.BOM_UTF16_LE, "utf-16-le"),
  394. # expat does not support utf-32
  395. #(codecs.BOM_UTF32_BE, "utf-32-be"),
  396. #(codecs.BOM_UTF32_LE, "utf-32-le"),
  397. ):
  398. if not header.startswith(bom):
  399. continue
  400. for start in prefixes:
  401. prefix = bom + start.decode('ascii').encode(encoding)
  402. if header[:len(prefix)] == prefix:
  403. return True
  404. return False
  405. #
  406. # Binary Plist
  407. #
  408. class InvalidFileException (ValueError):
  409. def __init__(self, message="Invalid file"):
  410. ValueError.__init__(self, message)
  411. _BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'}
  412. _undefined = object()
  413. class _BinaryPlistParser:
  414. """
  415. Read or write a binary plist file, following the description of the binary
  416. format. Raise InvalidFileException in case of error, otherwise return the
  417. root object.
  418. see also: http://opensource.apple.com/source/CF/CF-744.18/CFBinaryPList.c
  419. """
  420. def __init__(self, use_builtin_types, dict_type):
  421. self._use_builtin_types = use_builtin_types
  422. self._dict_type = dict_type
  423. def parse(self, fp):
  424. try:
  425. # The basic file format:
  426. # HEADER
  427. # object...
  428. # refid->offset...
  429. # TRAILER
  430. self._fp = fp
  431. self._fp.seek(-32, os.SEEK_END)
  432. trailer = self._fp.read(32)
  433. if len(trailer) != 32:
  434. raise InvalidFileException()
  435. (
  436. offset_size, self._ref_size, num_objects, top_object,
  437. offset_table_offset
  438. ) = struct.unpack('>6xBBQQQ', trailer)
  439. self._fp.seek(offset_table_offset)
  440. self._object_offsets = self._read_ints(num_objects, offset_size)
  441. self._objects = [_undefined] * num_objects
  442. return self._read_object(top_object)
  443. except (OSError, IndexError, struct.error, OverflowError,
  444. UnicodeDecodeError):
  445. raise InvalidFileException()
  446. def _get_size(self, tokenL):
  447. """ return the size of the next object."""
  448. if tokenL == 0xF:
  449. m = self._fp.read(1)[0] & 0x3
  450. s = 1 << m
  451. f = '>' + _BINARY_FORMAT[s]
  452. return struct.unpack(f, self._fp.read(s))[0]
  453. return tokenL
  454. def _read_ints(self, n, size):
  455. data = self._fp.read(size * n)
  456. if size in _BINARY_FORMAT:
  457. return struct.unpack('>' + _BINARY_FORMAT[size] * n, data)
  458. else:
  459. if not size or len(data) != size * n:
  460. raise InvalidFileException()
  461. return tuple(int.from_bytes(data[i: i + size], 'big')
  462. for i in range(0, size * n, size))
  463. def _read_refs(self, n):
  464. return self._read_ints(n, self._ref_size)
  465. def _read_object(self, ref):
  466. """
  467. read the object by reference.
  468. May recursively read sub-objects (content of an array/dict/set)
  469. """
  470. result = self._objects[ref]
  471. if result is not _undefined:
  472. return result
  473. offset = self._object_offsets[ref]
  474. self._fp.seek(offset)
  475. token = self._fp.read(1)[0]
  476. tokenH, tokenL = token & 0xF0, token & 0x0F
  477. if token == 0x00:
  478. result = None
  479. elif token == 0x08:
  480. result = False
  481. elif token == 0x09:
  482. result = True
  483. # The referenced source code also mentions URL (0x0c, 0x0d) and
  484. # UUID (0x0e), but neither can be generated using the Cocoa libraries.
  485. elif token == 0x0f:
  486. result = b''
  487. elif tokenH == 0x10: # int
  488. result = int.from_bytes(self._fp.read(1 << tokenL),
  489. 'big', signed=tokenL >= 3)
  490. elif token == 0x22: # real
  491. result = struct.unpack('>f', self._fp.read(4))[0]
  492. elif token == 0x23: # real
  493. result = struct.unpack('>d', self._fp.read(8))[0]
  494. elif token == 0x33: # date
  495. f = struct.unpack('>d', self._fp.read(8))[0]
  496. # timestamp 0 of binary plists corresponds to 1/1/2001
  497. # (year of Mac OS X 10.0), instead of 1/1/1970.
  498. result = (datetime.datetime(2001, 1, 1) +
  499. datetime.timedelta(seconds=f))
  500. elif tokenH == 0x40: # data
  501. s = self._get_size(tokenL)
  502. if self._use_builtin_types:
  503. result = self._fp.read(s)
  504. else:
  505. result = Data(self._fp.read(s))
  506. elif tokenH == 0x50: # ascii string
  507. s = self._get_size(tokenL)
  508. result = self._fp.read(s).decode('ascii')
  509. result = result
  510. elif tokenH == 0x60: # unicode string
  511. s = self._get_size(tokenL)
  512. result = self._fp.read(s * 2).decode('utf-16be')
  513. # tokenH == 0x80 is documented as 'UID' and appears to be used for
  514. # keyed-archiving, not in plists.
  515. elif tokenH == 0xA0: # array
  516. s = self._get_size(tokenL)
  517. obj_refs = self._read_refs(s)
  518. result = []
  519. self._objects[ref] = result
  520. result.extend(self._read_object(x) for x in obj_refs)
  521. # tokenH == 0xB0 is documented as 'ordset', but is not actually
  522. # implemented in the Apple reference code.
  523. # tokenH == 0xC0 is documented as 'set', but sets cannot be used in
  524. # plists.
  525. elif tokenH == 0xD0: # dict
  526. s = self._get_size(tokenL)
  527. key_refs = self._read_refs(s)
  528. obj_refs = self._read_refs(s)
  529. result = self._dict_type()
  530. self._objects[ref] = result
  531. for k, o in zip(key_refs, obj_refs):
  532. result[self._read_object(k)] = self._read_object(o)
  533. else:
  534. raise InvalidFileException()
  535. self._objects[ref] = result
  536. return result
  537. def _count_to_size(count):
  538. if count < 1 << 8:
  539. return 1
  540. elif count < 1 << 16:
  541. return 2
  542. elif count << 1 << 32:
  543. return 4
  544. else:
  545. return 8
  546. _scalars = (str, int, float, datetime.datetime, bytes)
  547. class _BinaryPlistWriter (object):
  548. def __init__(self, fp, sort_keys, skipkeys):
  549. self._fp = fp
  550. self._sort_keys = sort_keys
  551. self._skipkeys = skipkeys
  552. def write(self, value):
  553. # Flattened object list:
  554. self._objlist = []
  555. # Mappings from object->objectid
  556. # First dict has (type(object), object) as the key,
  557. # second dict is used when object is not hashable and
  558. # has id(object) as the key.
  559. self._objtable = {}
  560. self._objidtable = {}
  561. # Create list of all objects in the plist
  562. self._flatten(value)
  563. # Size of object references in serialized containers
  564. # depends on the number of objects in the plist.
  565. num_objects = len(self._objlist)
  566. self._object_offsets = [0]*num_objects
  567. self._ref_size = _count_to_size(num_objects)
  568. self._ref_format = _BINARY_FORMAT[self._ref_size]
  569. # Write file header
  570. self._fp.write(b'bplist00')
  571. # Write object list
  572. for obj in self._objlist:
  573. self._write_object(obj)
  574. # Write refnum->object offset table
  575. top_object = self._getrefnum(value)
  576. offset_table_offset = self._fp.tell()
  577. offset_size = _count_to_size(offset_table_offset)
  578. offset_format = '>' + _BINARY_FORMAT[offset_size] * num_objects
  579. self._fp.write(struct.pack(offset_format, *self._object_offsets))
  580. # Write trailer
  581. sort_version = 0
  582. trailer = (
  583. sort_version, offset_size, self._ref_size, num_objects,
  584. top_object, offset_table_offset
  585. )
  586. self._fp.write(struct.pack('>5xBBBQQQ', *trailer))
  587. def _flatten(self, value):
  588. # First check if the object is in the object table, not used for
  589. # containers to ensure that two subcontainers with the same contents
  590. # will be serialized as distinct values.
  591. if isinstance(value, _scalars):
  592. if (type(value), value) in self._objtable:
  593. return
  594. elif isinstance(value, Data):
  595. if (type(value.data), value.data) in self._objtable:
  596. return
  597. elif id(value) in self._objidtable:
  598. return
  599. # Add to objectreference map
  600. refnum = len(self._objlist)
  601. self._objlist.append(value)
  602. if isinstance(value, _scalars):
  603. self._objtable[(type(value), value)] = refnum
  604. elif isinstance(value, Data):
  605. self._objtable[(type(value.data), value.data)] = refnum
  606. else:
  607. self._objidtable[id(value)] = refnum
  608. # And finally recurse into containers
  609. if isinstance(value, dict):
  610. keys = []
  611. values = []
  612. items = value.items()
  613. if self._sort_keys:
  614. items = sorted(items)
  615. for k, v in items:
  616. if not isinstance(k, str):
  617. if self._skipkeys:
  618. continue
  619. raise TypeError("keys must be strings")
  620. keys.append(k)
  621. values.append(v)
  622. for o in itertools.chain(keys, values):
  623. self._flatten(o)
  624. elif isinstance(value, (list, tuple)):
  625. for o in value:
  626. self._flatten(o)
  627. def _getrefnum(self, value):
  628. if isinstance(value, _scalars):
  629. return self._objtable[(type(value), value)]
  630. elif isinstance(value, Data):
  631. return self._objtable[(type(value.data), value.data)]
  632. else:
  633. return self._objidtable[id(value)]
  634. def _write_size(self, token, size):
  635. if size < 15:
  636. self._fp.write(struct.pack('>B', token | size))
  637. elif size < 1 << 8:
  638. self._fp.write(struct.pack('>BBB', token | 0xF, 0x10, size))
  639. elif size < 1 << 16:
  640. self._fp.write(struct.pack('>BBH', token | 0xF, 0x11, size))
  641. elif size < 1 << 32:
  642. self._fp.write(struct.pack('>BBL', token | 0xF, 0x12, size))
  643. else:
  644. self._fp.write(struct.pack('>BBQ', token | 0xF, 0x13, size))
  645. def _write_object(self, value):
  646. ref = self._getrefnum(value)
  647. self._object_offsets[ref] = self._fp.tell()
  648. if value is None:
  649. self._fp.write(b'\x00')
  650. elif value is False:
  651. self._fp.write(b'\x08')
  652. elif value is True:
  653. self._fp.write(b'\x09')
  654. elif isinstance(value, int):
  655. if value < 0:
  656. try:
  657. self._fp.write(struct.pack('>Bq', 0x13, value))
  658. except struct.error:
  659. raise OverflowError(value) from None
  660. elif value < 1 << 8:
  661. self._fp.write(struct.pack('>BB', 0x10, value))
  662. elif value < 1 << 16:
  663. self._fp.write(struct.pack('>BH', 0x11, value))
  664. elif value < 1 << 32:
  665. self._fp.write(struct.pack('>BL', 0x12, value))
  666. elif value < 1 << 63:
  667. self._fp.write(struct.pack('>BQ', 0x13, value))
  668. elif value < 1 << 64:
  669. self._fp.write(b'\x14' + value.to_bytes(16, 'big', signed=True))
  670. else:
  671. raise OverflowError(value)
  672. elif isinstance(value, float):
  673. self._fp.write(struct.pack('>Bd', 0x23, value))
  674. elif isinstance(value, datetime.datetime):
  675. f = (value - datetime.datetime(2001, 1, 1)).total_seconds()
  676. self._fp.write(struct.pack('>Bd', 0x33, f))
  677. elif isinstance(value, Data):
  678. self._write_size(0x40, len(value.data))
  679. self._fp.write(value.data)
  680. elif isinstance(value, (bytes, bytearray)):
  681. self._write_size(0x40, len(value))
  682. self._fp.write(value)
  683. elif isinstance(value, str):
  684. try:
  685. t = value.encode('ascii')
  686. self._write_size(0x50, len(value))
  687. except UnicodeEncodeError:
  688. t = value.encode('utf-16be')
  689. self._write_size(0x60, len(t) // 2)
  690. self._fp.write(t)
  691. elif isinstance(value, (list, tuple)):
  692. refs = [self._getrefnum(o) for o in value]
  693. s = len(refs)
  694. self._write_size(0xA0, s)
  695. self._fp.write(struct.pack('>' + self._ref_format * s, *refs))
  696. elif isinstance(value, dict):
  697. keyRefs, valRefs = [], []
  698. if self._sort_keys:
  699. rootItems = sorted(value.items())
  700. else:
  701. rootItems = value.items()
  702. for k, v in rootItems:
  703. if not isinstance(k, str):
  704. if self._skipkeys:
  705. continue
  706. raise TypeError("keys must be strings")
  707. keyRefs.append(self._getrefnum(k))
  708. valRefs.append(self._getrefnum(v))
  709. s = len(keyRefs)
  710. self._write_size(0xD0, s)
  711. self._fp.write(struct.pack('>' + self._ref_format * s, *keyRefs))
  712. self._fp.write(struct.pack('>' + self._ref_format * s, *valRefs))
  713. else:
  714. raise TypeError(value)
  715. def _is_fmt_binary(header):
  716. return header[:8] == b'bplist00'
  717. #
  718. # Generic bits
  719. #
  720. _FORMATS={
  721. FMT_XML: dict(
  722. detect=_is_fmt_xml,
  723. parser=_PlistParser,
  724. writer=_PlistWriter,
  725. ),
  726. FMT_BINARY: dict(
  727. detect=_is_fmt_binary,
  728. parser=_BinaryPlistParser,
  729. writer=_BinaryPlistWriter,
  730. )
  731. }
  732. def load(fp, *, fmt=None, use_builtin_types=True, dict_type=dict):
  733. """Read a .plist file. 'fp' should be a readable and binary file object.
  734. Return the unpacked root object (which usually is a dictionary).
  735. """
  736. if fmt is None:
  737. header = fp.read(32)
  738. fp.seek(0)
  739. for info in _FORMATS.values():
  740. if info['detect'](header):
  741. P = info['parser']
  742. break
  743. else:
  744. raise InvalidFileException()
  745. else:
  746. P = _FORMATS[fmt]['parser']
  747. p = P(use_builtin_types=use_builtin_types, dict_type=dict_type)
  748. return p.parse(fp)
  749. def loads(value, *, fmt=None, use_builtin_types=True, dict_type=dict):
  750. """Read a .plist file from a bytes object.
  751. Return the unpacked root object (which usually is a dictionary).
  752. """
  753. fp = BytesIO(value)
  754. return load(
  755. fp, fmt=fmt, use_builtin_types=use_builtin_types, dict_type=dict_type)
  756. def dump(value, fp, *, fmt=FMT_XML, sort_keys=True, skipkeys=False):
  757. """Write 'value' to a .plist file. 'fp' should be a writable,
  758. binary file object.
  759. """
  760. if fmt not in _FORMATS:
  761. raise ValueError("Unsupported format: %r"%(fmt,))
  762. writer = _FORMATS[fmt]["writer"](fp, sort_keys=sort_keys, skipkeys=skipkeys)
  763. writer.write(value)
  764. def dumps(value, *, fmt=FMT_XML, skipkeys=False, sort_keys=True):
  765. """Return a bytes object with the contents for a .plist file.
  766. """
  767. fp = BytesIO()
  768. dump(value, fp, fmt=fmt, skipkeys=skipkeys, sort_keys=sort_keys)
  769. return fp.getvalue()