runresult_diff.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. #!/usr/bin/env python3
  2. from nsdk_utils import *
  3. from functools import total_ordering
  4. import os
  5. from posixpath import split
  6. import sys
  7. import time
  8. import copy
  9. import glob
  10. import argparse
  11. import shutil
  12. import subprocess
  13. import csv
  14. import json
  15. import re
  16. import datetime
  17. import operator
  18. import collections
  19. from openpyxl import load_workbook
  20. from openpyxl.utils import get_column_letter, column_index_from_string
  21. SCRIPTDIR = sys.path[0]
  22. # key name as a line,value is two-level nested list, generated csv file is where json file is
  23. def dump_jsonfile2csv(jsonfile, csvfile):
  24. valid, csvtable = load_json(jsonfile)
  25. csvsummary = []
  26. csvlist = []
  27. if len(csvtable) == 0:
  28. print("No data found, no need to generate csv!")
  29. return False
  30. for key in csvtable:
  31. # must has one row title and at least one row content
  32. if len(csvtable[key]) > 1:
  33. csvsummary.append([key])
  34. csvsummary.extend(csvtable[key])
  35. # turn into csv format
  36. for value in csvsummary:
  37. csvlist.append(",".join(map(str, value)))
  38. print(">>> Dumping %s into %s\n" %(jsonfile, csvfile))
  39. save_csv(csvfile, csvlist, display=False)
  40. return True
  41. # Merge barebench different dir's "runresult.xlsx.csv" into one
  42. def merge_csvs(srcdir: str, srcfilename: str, mergedcsv: str):
  43. with open(mergedcsv, "w+") as merged_file:
  44. subdirlist = os.listdir(srcdir)
  45. subdirlist.sort()
  46. for subdirname in subdirlist:
  47. bare_rstfile = os.path.join(srcdir, subdirname, srcfilename)
  48. if not os.path.isfile(bare_rstfile):
  49. continue
  50. print(">>> Merge %s into %s\n" %(bare_rstfile, mergedcsv))
  51. with open(bare_rstfile, "r") as bf:
  52. lineslist = bf.readlines()
  53. lineslist.insert(0, "Case type: " + subdirname + "\n")
  54. merged_file.writelines(lineslist)
  55. def performance_diff_bare(base_value: str, opt_value: str, nounit = True):
  56. # filter: not a number
  57. if re.compile("_*[A-Za-z]+[0-9]*|-").search(base_value) or \
  58. re.compile("_*[A-Za-z]+[0-9]*|-").search(opt_value):
  59. # todo: check base and new in case of wrong item
  60. return "%s" %(base_value)
  61. opt_value_str = opt_value.strip()
  62. base_value_str = base_value.strip()
  63. # get unit
  64. unit = re.compile("[A-Za-z/%]+").findall(opt_value)[-1] if not nounit else " "
  65. # get value
  66. base_value_num = float(base_value_str.split(unit)[0].strip())
  67. opt_value_num = float(opt_value_str.split(unit)[0].strip())
  68. diff_value = float(opt_value_num - base_value_num)
  69. # big is better
  70. #percent = "%f%%" %(abs(diff_value/base_value_num)*100) if base_value != 0 else "%s vs %s" %(diff_value, base_value)
  71. # small is better (opt-base)/opt
  72. percent = "%f%%" %(abs(diff_value/opt_value_num)*100) if base_value != 0 else "%s vs %s" %(diff_value, base_value)
  73. # small means performance increase (↑)
  74. diff_sign = '↑' if 0 > diff_value else '↓'
  75. diff_sign = "Δ=" if 0 == diff_value else diff_sign
  76. diff_content = "%s %f %s %s %s" %(diff_sign, abs(diff_value), unit, diff_sign, percent)
  77. return diff_content
  78. # two-level nested list
  79. def perf_diff_list_bare(base_value_list: list, opt_value_list: list, nounit = True, start_idx = 0):
  80. writecontentlist = []
  81. for baselist, newlist in zip(base_value_list, opt_value_list):
  82. writecontent = []
  83. for basevalue, newvalue in zip(baselist[start_idx:], newlist[start_idx:]):
  84. writecontent.append(performance_diff_bare(str(basevalue).strip(), str(newvalue).strip(), nounit))
  85. if start_idx > 0:
  86. temp = []
  87. temp = baselist[:start_idx]
  88. temp.extend(writecontent)
  89. writecontent = temp
  90. writecontentlist.append(writecontent)
  91. # todo: return[] if only one list, not [[]]
  92. return writecontentlist
  93. def generate_diff_json(base_value_list: list, opt_value_list: list, nounit, start_idx):
  94. diff_list = perf_diff_list_bare(base_value_list, opt_value_list, nounit, start_idx)
  95. return diff_list
  96. def diff_with(base_resultjson: dict, new_resultjson: dict, diff_resultname: str):
  97. baseresult_jsondata_sorted, newresult_jsondata_sorted = sort_dict_netstedlist(base_resultjson, new_resultjson, \
  98. sortfrom_listindex = 1)
  99. if baseresult_jsondata_sorted is None or newresult_jsondata_sorted is None:
  100. print("Error: Empty dict!")
  101. return False
  102. diff_case_results = dict()
  103. for key in newresult_jsondata_sorted:
  104. if key in baseresult_jsondata_sorted:
  105. case_result_new = get_specific_key_value(newresult_jsondata_sorted, key)
  106. case_result_base = get_specific_key_value(baseresult_jsondata_sorted, key)
  107. diff_case_result = generate_diff_json(case_result_base, case_result_new, nounit = True, start_idx = 0)
  108. # generate diff result json from compared jsons
  109. diff_case_results.update({key: diff_case_result})
  110. print("Compare done, start generating report!\n")
  111. diff_resultcsv = os.path.join(SCRIPTDIR, diff_resultname)
  112. diff_resultjson = diff_resultname.rstrip(".csv") + ".json"
  113. save_json(diff_resultjson, diff_case_results)
  114. ret = dump_jsonfile2csv(diff_resultjson, diff_resultcsv)
  115. return ret, diff_resultcsv
  116. # todo: may need to get overlap_set 1st-level keys of two dicts later, now suppose at least 1st-level keys are same(ordered dict)
  117. # two-level nest list, like nmsis_dsp_tests in runresult.xlsx.csvtable.json
  118. def sort_dict_netstedlist(basedict: dict, newdict: dict, sortfrom_listindex: int):
  119. for targetkey in basedict:
  120. if targetkey not in basedict or targetkey not in newdict:
  121. continue
  122. rest_list_base = basedict[targetkey][:sortfrom_listindex]
  123. rest_list_new = newdict[targetkey][:sortfrom_listindex]
  124. if len(basedict[targetkey]) > 1 and len(newdict[targetkey]) > 1:
  125. resultlist_base = basedict[targetkey][sortfrom_listindex:]
  126. resultlist_new = newdict[targetkey][sortfrom_listindex:]
  127. resultlist_sorted_base, resultlist_sorted_new = sort_twolist_by_commonkey(resultlist_base, resultlist_new)
  128. rest_list_base.extend(resultlist_sorted_base)
  129. rest_list_new.extend(resultlist_sorted_new)
  130. basedict.update({targetkey: rest_list_base})
  131. newdict.update({targetkey: rest_list_new})
  132. return basedict, newdict
  133. # two-level nest list, like nmsis_dsp_tests in runresult.xlsx.csvtable.json
  134. # base has the fewer elements, sort by the first two element(0: sub-type, 1: sub-function)
  135. def sort_list_by_baseone_key(base: list, tobe_sorted: list):
  136. new_sortedlist = []
  137. index_to_remove = []
  138. base_keys = [value[0] for value in base]
  139. tobe_sorted_keys = [value[0] for value in tobe_sorted]
  140. for sublist_index, sublist_value in enumerate(base):
  141. found = False
  142. for sublist in tobe_sorted:
  143. if sublist_value[0] == sublist[0] and sublist_value[1] == sublist[1]:
  144. found = True
  145. new_sortedlist.append(sublist)
  146. if not found:
  147. index_to_remove.append(sublist_index)
  148. # remove the element which still doesn't exist in the list "tobe_sorted"
  149. for index in reversed(index_to_remove):
  150. base.pop(index)
  151. union_set = set(tobe_sorted_keys) | set(base_keys)
  152. overlap_set = set(tobe_sorted_keys) & set(base_keys)
  153. if union_set - overlap_set:
  154. print("Warning: these functions as below belongs to no overlap_set, will not show in the diff report: %s\n"
  155. %(union_set - overlap_set))
  156. return base, new_sortedlist
  157. def sort_twolist_by_commonkey(list_1: list, list_2: list):
  158. if len(list_1) < len(list_2):
  159. base, new_sortedlist = sort_list_by_baseone_key(list_1, list_2)
  160. return base, new_sortedlist
  161. else:
  162. base, new_sortedlist = sort_list_by_baseone_key(list_2, list_1)
  163. # corresponding to order of input
  164. return new_sortedlist, base
  165. if __name__ == '__main__':
  166. parser = argparse.ArgumentParser(description = "Compare two runresult.xlsx.csvtable.json to get benchmark result difference")
  167. parser.add_argument('--base', required = True, help = "Path to base one, suffix with .csvtable.json")
  168. parser.add_argument('--new', required = True, help = "Path to new one, suffix with .csvtable.json")
  169. parser.add_argument('--name', default = "diff_runresult", help = "Name of the diff result without suffix")
  170. args = parser.parse_args()
  171. base_json = args.base
  172. new_json = args.new
  173. diff_resultname = "%s.csv" %(args.name.rstrip(".csv"))
  174. valid, baseresult_jsondata = load_json(base_json)
  175. if JSON_OK != valid:
  176. print("Invalid json file %s, please check!" % (base_json))
  177. sys.exit(1)
  178. valid, newresult_jsondata = load_json(new_json)
  179. if JSON_OK != valid:
  180. print("Invalid json file %s, please check!" % (new_json))
  181. sys.exit(1)
  182. print("Start to compare %s with %s\n" %(new_json, base_json))
  183. ret, diff_result = diff_with(baseresult_jsondata, newresult_jsondata, diff_resultname)
  184. if False == ret:
  185. print("Diff failed!\n")
  186. sys.exit(1)
  187. else:
  188. print("Diff succeed! See report %s" %(diff_result))
  189. sys.exit(0)