7 months ago
2.0 kB
import matplotlib.pyplot as plt import argparse import numpy as np import struct def get_args(): parser = argparse.ArgumentParser(description='Visualize the binary file') parser.add_argument('--file', type=str, default='osm', help='binary file path') parser.add_argument('--output', type=str, default='osm.pdf', help='output image path') parser.add_argument('--nsample', type=int, default=10000, help='how many samples to draw') return parser.parse_args() def index_to_offset(x): return 8 + x * 8 def main(): args = get_args() sample = np.random.randint(0, 200000000, size=args.nsample) with open(args.file, 'rb') as f: f.seek(0) key_num = struct.unpack("Q", f.read(8))[0] # 前8个byte为数据集包含的key的数量 print(key_num) keys = [] k0 = struct.unpack("Q", f.read(8))[0] # 第一个key plt.figure(figsize=(10, 5)) for i in range(1, args.nsample + 1): # 全局采样 nsample 个 key f.seek(index_to_offset(sample[i-1])) key = struct.unpack("Q", f.read(8))[0] keys.append(key - k0) keys.sort() plt.plot(range(len(keys)), keys) # 全局采样图 plt.savefig(args.output) plt.figure(figsize=(10, 5)) sample_pos = int(key_num * 0.6) # 在整张图的60%的地方做局部采样,按需修改 f.seek(index_to_offset(sample_pos)) k00 = struct.unpack("Q", f.read(8))[0] # 这里实际在局部连续采样了 101 个 key local_keys = [] for i in range(100): key = struct.unpack("Q", f.read(8))[0] local_keys.append(key - k00) xs = range(len(local_keys)) plt.scatter(xs, local_keys, s=10, c='b', marker='o', label='local') plt.savefig(args.output.split('.')[0] + '_local.pdf') if __name__ == '__main__': main()
Leave a Comment