ziguard.awk 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. # Convert tzdata source into vanguard or rearguard form.
  2. # Contributed by Paul Eggert. This file is in the public domain.
  3. # This is not a general-purpose converter; it is designed for current tzdata.
  4. # It just converts from current source to main, vanguard, and rearguard forms.
  5. # Although it might be nice for it to be idempotent, or to be useful
  6. # for converting back and forth between vanguard and rearguard formats,
  7. # it does not do these nonessential tasks now.
  8. #
  9. # Although main and vanguard forms are currently equivalent,
  10. # this need not always be the case. When the two forms differ,
  11. # this script can convert either from main to vanguard form (needed then),
  12. # or from vanguard to main form (this conversion would be needed later,
  13. # after main became rearguard and vanguard became main).
  14. # There is no need to convert rearguard to other forms.
  15. #
  16. # When converting to vanguard form, the output can use the line
  17. # "Zone GMT 0 - GMT" which TZUpdater 2.3.2 mistakenly rejects.
  18. #
  19. # When converting to vanguard form, the output can use negative SAVE
  20. # values.
  21. #
  22. # When converting to rearguard form, the output uses only nonnegative
  23. # SAVE values. The idea is for the output data to simulate the behavior
  24. # of the input data as best it can within the constraints of the
  25. # rearguard format.
  26. # Given a FIELD like "-0:30", return a minute count like -30.
  27. function get_minutes(field, \
  28. sign, hours, minutes)
  29. {
  30. sign = field ~ /^-/ ? -1 : 1
  31. hours = +field
  32. if (field ~ /:/) {
  33. minutes = field
  34. sub(/[^:]*:/, "", minutes)
  35. }
  36. return 60 * hours + sign * minutes
  37. }
  38. # Given an OFFSET, which is a minute count like 300 or 330,
  39. # return a %z-style abbreviation like "+05" or "+0530".
  40. function offset_abbr(offset, \
  41. hours, minutes, sign)
  42. {
  43. hours = int(offset / 60)
  44. minutes = offset % 60
  45. if (minutes) {
  46. return sprintf("%+.4d", hours * 100 + minutes);
  47. } else {
  48. return sprintf("%+.2d", hours)
  49. }
  50. }
  51. # Round TIMESTAMP (a +-hh:mm:ss.dddd string) to the nearest second.
  52. function round_to_second(timestamp, \
  53. hh, mm, ss, seconds, dot_dddd, subseconds)
  54. {
  55. dot_dddd = timestamp
  56. if (!sub(/^[+-]?[0-9]+:[0-9]+:[0-9]+\./, ".", dot_dddd))
  57. return timestamp
  58. hh = mm = ss = timestamp
  59. sub(/^[-+]?[0-9]+:[0-9]+:/, "", ss)
  60. sub(/^[-+]?[0-9]+:/, "", mm)
  61. sub(/^[-+]?/, "", hh)
  62. seconds = 3600 * hh + 60 * mm + ss
  63. subseconds = +dot_dddd
  64. seconds += 0.5 < subseconds || ((subseconds == 0.5) && (seconds % 2));
  65. return sprintf("%s%d:%.2d:%.2d", timestamp ~ /^-/ ? "-" : "", \
  66. seconds / 3600, seconds / 60 % 60, seconds % 60)
  67. }
  68. BEGIN {
  69. dataform_type["vanguard"] = 1
  70. dataform_type["main"] = 1
  71. dataform_type["rearguard"] = 1
  72. if (PACKRATLIST) {
  73. while (getline <PACKRATLIST) {
  74. if ($0 ~ /^#/) continue
  75. packratlist[$3] = 1
  76. }
  77. }
  78. # The command line should set DATAFORM.
  79. if (!dataform_type[DATAFORM]) exit 1
  80. }
  81. $1 == "#PACKRATLIST" && $2 == PACKRATLIST {
  82. sub(/^#PACKRATLIST[\t ]+[^\t ]+[\t ]+/, "")
  83. }
  84. /^Zone/ { zone = $2 }
  85. DATAFORM != "main" {
  86. in_comment = $0 ~ /^#/
  87. uncomment = comment_out = 0
  88. # If this line should differ due to Czechoslovakia using negative SAVE values,
  89. # uncomment the desired version and comment out the undesired one.
  90. if (zone == "Europe/Prague" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \
  91. && $0 ~ /1947 Feb 23/) {
  92. if (($(in_comment + 2) != "-") == (DATAFORM != "rearguard")) {
  93. uncomment = in_comment
  94. } else {
  95. comment_out = !in_comment
  96. }
  97. }
  98. # If this line should differ due to Ireland using negative SAVE values,
  99. # uncomment the desired version and comment out the undesired one.
  100. Rule_Eire = $0 ~ /^#?Rule[\t ]+Eire[\t ]/
  101. Zone_Dublin_post_1968 \
  102. = (zone == "Europe/Dublin" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \
  103. && (!$(in_comment + 4) || 1968 < $(in_comment + 4)))
  104. if (Rule_Eire || Zone_Dublin_post_1968) {
  105. if ((Rule_Eire \
  106. || (Zone_Dublin_post_1968 && $(in_comment + 3) == "IST/GMT")) \
  107. == (DATAFORM != "rearguard")) {
  108. uncomment = in_comment
  109. } else {
  110. comment_out = !in_comment
  111. }
  112. }
  113. # If this line should differ due to Namibia using negative SAVE values,
  114. # uncomment the desired version and comment out the undesired one.
  115. Rule_Namibia = $0 ~ /^#?Rule[\t ]+Namibia[\t ]/
  116. Zone_using_Namibia_rule \
  117. = (zone == "Africa/Windhoek" && $0 ~ /^#?[\t ]+[12]:00[\t ]/ \
  118. && ($(in_comment + 2) == "Namibia" \
  119. || ($(in_comment + 2) == "-" && $(in_comment + 3) == "CAT" \
  120. && ((1994 <= $(in_comment + 4) && $(in_comment + 4) <= 2017) \
  121. || in_comment + 3 == NF))))
  122. if (Rule_Namibia || Zone_using_Namibia_rule) {
  123. if ((Rule_Namibia \
  124. ? ($9 ~ /^-/ || ($9 == 0 && $10 == "CAT")) \
  125. : $(in_comment + 1) == "2:00" && $(in_comment + 2) == "Namibia") \
  126. == (DATAFORM != "rearguard")) {
  127. uncomment = in_comment
  128. } else {
  129. comment_out = !in_comment
  130. }
  131. }
  132. # If this line should differ due to Portugal benefiting from %z if supported,
  133. # uncomment the desired version and comment out the undesired one.
  134. if ($0 ~ /^#?[\t ]+-[12]:00[\t ]+Port[\t ]+[%+-]/) {
  135. if (($0 ~ /%z/) == (DATAFORM == "vanguard")) {
  136. uncomment = in_comment
  137. } else {
  138. comment_out = !in_comment
  139. }
  140. }
  141. # In vanguard form, use the line "Zone GMT 0 - GMT" instead of
  142. # "Zone Etc/GMT 0 - GMT" and adjust Link lines accordingly.
  143. # This works around a bug in TZUpdater 2.3.2.
  144. if (/^#?(Zone|Link)[\t ]+(Etc\/)?GMT[\t ]/) {
  145. if (($2 == "GMT") == (DATAFORM == "vanguard")) {
  146. uncomment = in_comment
  147. } else {
  148. comment_out = !in_comment
  149. }
  150. }
  151. if (uncomment) {
  152. sub(/^#/, "")
  153. }
  154. if (comment_out) {
  155. sub(/^/, "#")
  156. }
  157. # Prefer %z in vanguard form, explicit abbreviations otherwise.
  158. if (DATAFORM == "vanguard") {
  159. sub(/^(Zone[\t ]+[^\t ]+)?[\t ]+[^\t ]+[\t ]+[^\t ]+[\t ]+[-+][^\t ]+/, \
  160. "&CHANGE-TO-%z")
  161. sub(/-00CHANGE-TO-%z/, "-00")
  162. sub(/[-+][^\t ]+CHANGE-TO-/, "")
  163. } else {
  164. if ($0 ~ /^[^#]*%z/) {
  165. stdoff_column = 2 * ($0 ~ /^Zone/) + 1
  166. rules_column = stdoff_column + 1
  167. stdoff = get_minutes($stdoff_column)
  168. rules = $rules_column
  169. stdabbr = offset_abbr(stdoff)
  170. if (rules == "-") {
  171. abbr = stdabbr
  172. } else {
  173. dstabbr_only = rules ~ /^[+0-9-]/
  174. if (dstabbr_only) {
  175. dstoff = get_minutes(rules)
  176. } else {
  177. # The DST offset is normally an hour, but there are special cases.
  178. if (rules == "Morocco" && NF == 3) {
  179. dstoff = -60
  180. } else if (rules == "NBorneo") {
  181. dstoff = 20
  182. } else if (((rules == "Cook" || rules == "LH") && NF == 3) \
  183. || (rules == "Uruguay" \
  184. && $0 ~ /[\t ](1942 Dec 14|1960|1970|1974 Dec 22)$/)) {
  185. dstoff = 30
  186. } else if (rules == "Uruguay" && $0 ~ /[\t ]1974 Mar 10$/) {
  187. dstoff = 90
  188. } else {
  189. dstoff = 60
  190. }
  191. }
  192. dstabbr = offset_abbr(stdoff + dstoff)
  193. if (dstabbr_only) {
  194. abbr = dstabbr
  195. } else {
  196. abbr = stdabbr "/" dstabbr
  197. }
  198. }
  199. sub(/%z/, abbr)
  200. }
  201. }
  202. # Normally, prefer whole seconds. However, prefer subseconds
  203. # if generating vanguard form and the otherwise-undocumented
  204. # VANGUARD_SUBSECONDS environment variable is set.
  205. # This relies on #STDOFF comment lines in the data.
  206. # It is for hypothetical clients that support UT offsets that are
  207. # not integer multiples of one second (e.g., Europe/Lisbon, 1884 to 1912).
  208. # No known clients need this currently, and this experimental
  209. # feature may be changed or withdrawn in future releases.
  210. if ($1 == "#STDOFF") {
  211. stdoff = $2
  212. rounded_stdoff = round_to_second(stdoff)
  213. if (DATAFORM == "vanguard" && ENVIRON["VANGUARD_SUBSECONDS"]) {
  214. stdoff_subst[0] = rounded_stdoff
  215. stdoff_subst[1] = stdoff
  216. } else {
  217. stdoff_subst[0] = stdoff
  218. stdoff_subst[1] = rounded_stdoff
  219. }
  220. } else if (stdoff_subst[0]) {
  221. stdoff_column = 2 * ($0 ~ /^Zone/) + 1
  222. stdoff_column_val = $stdoff_column
  223. if (stdoff_column_val == stdoff_subst[0]) {
  224. sub(stdoff_subst[0], stdoff_subst[1])
  225. } else if (stdoff_column_val != stdoff_subst[1]) {
  226. stdoff_subst[0] = 0
  227. }
  228. }
  229. # In rearguard form, change the Japan rule line with "Sat>=8 25:00"
  230. # to "Sun>=9 1:00", to cater to zic before 2007 and to older Java.
  231. if ($0 ~ /^Rule/ && $2 == "Japan") {
  232. if (DATAFORM == "rearguard") {
  233. if ($7 == "Sat>=8" && $8 == "25:00") {
  234. sub(/Sat>=8/, "Sun>=9")
  235. sub(/25:00/, " 1:00")
  236. }
  237. } else {
  238. if ($7 == "Sun>=9" && $8 == "1:00") {
  239. sub(/Sun>=9/, "Sat>=8")
  240. sub(/ 1:00/, "25:00")
  241. }
  242. }
  243. }
  244. # In rearguard form, change the Morocco lines with negative SAVE values
  245. # to use positive SAVE values.
  246. if ($2 == "Morocco") {
  247. if ($0 ~ /^Rule/) {
  248. if ($4 ~ /^201[78]$/ && $6 == "Oct") {
  249. if (DATAFORM == "rearguard") {
  250. sub(/\t2018\t/, "\t2017\t")
  251. } else {
  252. sub(/\t2017\t/, "\t2018\t")
  253. }
  254. }
  255. if (2019 <= $3) {
  256. if ($8 == "2:00") {
  257. if (DATAFORM == "rearguard") {
  258. sub(/\t0\t/, "\t1:00\t")
  259. } else {
  260. sub(/\t1:00\t/, "\t0\t")
  261. }
  262. } else {
  263. if (DATAFORM == "rearguard") {
  264. sub(/\t-1:00\t/, "\t0\t")
  265. } else {
  266. sub(/\t0\t/, "\t-1:00\t")
  267. }
  268. }
  269. }
  270. }
  271. if ($1 ~ /^[+0-9-]/ && NF == 3) {
  272. if (DATAFORM == "rearguard") {
  273. sub(/1:00\tMorocco/, "0:00\tMorocco")
  274. sub(/\t\+01\/\+00$/, "\t+00/+01")
  275. } else {
  276. sub(/0:00\tMorocco/, "1:00\tMorocco")
  277. sub(/\t\+00\/+01$/, "\t+01/+00")
  278. }
  279. }
  280. }
  281. }
  282. /^Zone/ {
  283. packrat_ignored = FILENAME == PACKRATDATA && PACKRATLIST && !packratlist[$2];
  284. }
  285. {
  286. if (packrat_ignored && $0 !~ /^Rule/) {
  287. sub(/^/, "#")
  288. }
  289. }
  290. # Return a link line resulting by changing OLDLINE to link to TARGET
  291. # from LINKNAME, instead of linking to OLDTARGET from LINKNAME.
  292. # Align data columns the same as they were in OLDLINE.
  293. # Also, replace any existing white space followed by comment with COMMENT.
  294. function make_linkline(oldline, target, linkname, oldtarget, comment, \
  295. oldprefix, oldprefixlen, oldtargettabs, \
  296. replsuffix, targettabs)
  297. {
  298. oldprefix = "Link\t" oldtarget "\t"
  299. oldprefixlen = length(oldprefix)
  300. if (substr(oldline, 1, oldprefixlen) == oldprefix) {
  301. # Use tab stops to preserve LINKNAME's column.
  302. replsuffix = substr(oldline, oldprefixlen + 1)
  303. sub(/[\t ]*#.*/, "", replsuffix)
  304. oldtargettabs = int(length(oldtarget) / 8) + 1
  305. targettabs = int(length(target) / 8) + 1
  306. for (; targettabs < oldtargettabs; targettabs++) {
  307. replsuffix = "\t" replsuffix
  308. }
  309. for (; oldtargettabs < targettabs && replsuffix ~ /^\t/; targettabs--) {
  310. replsuffix = substr(replsuffix, 2)
  311. }
  312. } else {
  313. # Odd format line; don't bother lining up its replacement nicely.
  314. replsuffix = linkname
  315. }
  316. return "Link\t" target "\t" replsuffix comment
  317. }
  318. /^Link/ && $4 == "#=" && DATAFORM == "vanguard" {
  319. $0 = make_linkline($0, $5, $3, $2)
  320. }
  321. # If a Link line is followed by a Link or Zone line for the same data, comment
  322. # out the Link line. This can happen if backzone overrides a Link
  323. # with a Zone or a different Link.
  324. /^Zone/ {
  325. sub(/^Link/, "#Link", line[linkline[$2]])
  326. }
  327. /^Link/ {
  328. sub(/^Link/, "#Link", line[linkline[$3]])
  329. linkline[$3] = NR
  330. linktarget[$3] = $2
  331. }
  332. { line[NR] = $0 }
  333. function cut_link_chains_short( \
  334. l, linkname, t, target)
  335. {
  336. for (linkname in linktarget) {
  337. target = linktarget[linkname]
  338. t = linktarget[target]
  339. if (t) {
  340. # TARGET is itself a link name. Replace the line "Link TARGET LINKNAME"
  341. # with "Link T LINKNAME #= TARGET", where T is at the end of the chain
  342. # of links that LINKNAME points to.
  343. while ((u = linktarget[t])) {
  344. t = u
  345. }
  346. l = linkline[linkname]
  347. line[l] = make_linkline(line[l], t, linkname, target, "\t#= " target)
  348. }
  349. }
  350. }
  351. END {
  352. if (DATAFORM != "vanguard") {
  353. cut_link_chains_short()
  354. }
  355. for (i = 1; i <= NR; i++)
  356. print line[i]
  357. }