Untitled

mail@pastecode.io avatar
unknown
python
7 months ago
2.0 kB
3
Indexable
Never
import matplotlib.pyplot as plt
import argparse
import numpy as np
import struct

def get_args():
    parser = argparse.ArgumentParser(description='Visualize the binary file')
    parser.add_argument('--file', type=str, default='osm', help='binary file path')
    parser.add_argument('--output', type=str, default='osm.pdf', help='output image path')
    parser.add_argument('--nsample', type=int, default=10000, help='how many samples to draw')
    return parser.parse_args()

def index_to_offset(x):
    return 8 + x * 8

def main():
    args = get_args()
    sample = np.random.randint(0, 200000000, size=args.nsample)

    with open(args.file, 'rb') as f:
        f.seek(0)
        key_num = struct.unpack("Q", f.read(8))[0] # 前8个byte为数据集包含的key的数量
        print(key_num)
        keys = []
        k0 = struct.unpack("Q", f.read(8))[0] # 第一个key
        plt.figure(figsize=(10, 5))
        for i in range(1, args.nsample + 1): # 全局采样 nsample 个 key
            f.seek(index_to_offset(sample[i-1]))
            key = struct.unpack("Q", f.read(8))[0]
            keys.append(key - k0)
        keys.sort()
        plt.plot(range(len(keys)), keys) # 全局采样图
        plt.savefig(args.output)
        
        plt.figure(figsize=(10, 5))
        sample_pos = int(key_num * 0.6) # 在整张图的60%的地方做局部采样,按需修改
        f.seek(index_to_offset(sample_pos))
        k00 = struct.unpack("Q", f.read(8))[0] # 这里实际在局部连续采样了 101 个 key
        local_keys = []
        for i in range(100):
            key = struct.unpack("Q", f.read(8))[0]
            local_keys.append(key - k00)
        xs = range(len(local_keys))
        plt.scatter(xs, local_keys, s=10, c='b', marker='o', label='local')
        
        plt.savefig(args.output.split('.')[0] + '_local.pdf')
        


if __name__ == '__main__':
    main()
Leave a Comment