Untitled
unknown
python
2 years ago
2.0 kB
16
Indexable
import matplotlib.pyplot as plt
import argparse
import numpy as np
import struct
def get_args():
parser = argparse.ArgumentParser(description='Visualize the binary file')
parser.add_argument('--file', type=str, default='osm', help='binary file path')
parser.add_argument('--output', type=str, default='osm.pdf', help='output image path')
parser.add_argument('--nsample', type=int, default=10000, help='how many samples to draw')
return parser.parse_args()
def index_to_offset(x):
return 8 + x * 8
def main():
args = get_args()
sample = np.random.randint(0, 200000000, size=args.nsample)
with open(args.file, 'rb') as f:
f.seek(0)
key_num = struct.unpack("Q", f.read(8))[0] # 前8个byte为数据集包含的key的数量
print(key_num)
keys = []
k0 = struct.unpack("Q", f.read(8))[0] # 第一个key
plt.figure(figsize=(10, 5))
for i in range(1, args.nsample + 1): # 全局采样 nsample 个 key
f.seek(index_to_offset(sample[i-1]))
key = struct.unpack("Q", f.read(8))[0]
keys.append(key - k0)
keys.sort()
plt.plot(range(len(keys)), keys) # 全局采样图
plt.savefig(args.output)
plt.figure(figsize=(10, 5))
sample_pos = int(key_num * 0.6) # 在整张图的60%的地方做局部采样,按需修改
f.seek(index_to_offset(sample_pos))
k00 = struct.unpack("Q", f.read(8))[0] # 这里实际在局部连续采样了 101 个 key
local_keys = []
for i in range(100):
key = struct.unpack("Q", f.read(8))[0]
local_keys.append(key - k00)
xs = range(len(local_keys))
plt.scatter(xs, local_keys, s=10, c='b', marker='o', label='local')
plt.savefig(args.output.split('.')[0] + '_local.pdf')
if __name__ == '__main__':
main()
Editor is loading...
Leave a Comment