0920_onnx
unknown
python
10 months ago
2.5 kB
9
Indexable
import onnxruntime as ort import numpy as np from transformers import AutoTokenizer # Print available providers print("Available providers:", ort.get_available_providers()) print(ort.__version__) # Set up ONNX Runtime session with GPU priority model_path = "auto_Opset16.onnx" providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] cuda_options = { 'device_id': 0, 'arena_extend_strategy': 'kNextPowerOfTwo', 'gpu_mem_limit': 2 * 1024 * 1024 * 1024, # 2 GB 'cudnn_conv_algo_search': 'EXHAUSTIVE', 'do_copy_in_default_stream': True, } session = ort.InferenceSession(model_path, providers=providers, provider_options=[cuda_options, {}]) # Verify the provider being used print("Using provider:", session.get_providers()) print(f"Using device: {ort.get_device()}") # Load tokenizer (adjust model name if needed) tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") # Prepare input with padding or truncation text = "Hello, world! This is a test." inputs = tokenizer(text, return_tensors="np", padding="max_length", max_length=128, truncation=True) # Ensure input_ids is int64 inputs['input_ids'] = inputs['input_ids'].astype(np.int64) # Get input names input_names = [input.name for input in session.get_inputs()] # Create input dictionary and convert to float # Replace the input_dict creation with this: input_dict = {} for name in input_names: if name in inputs: if name == 'input_ids': input_dict[name] = inputs[name].astype(np.int64) else: input_dict[name] = inputs[name].astype(np.float32) # Print model input and output information print("\nModel inputs:") for input in session.get_inputs(): print(f" Name: {input.name}, Shape: {input.shape}, Type: {input.type}") print("\nModel outputs:") for output in session.get_outputs(): print(f" Name: {output.name}, Shape: {output.shape}, Type: {output.type}") # Print input shapes and types for name, value in input_dict.items(): print(f"\nInput '{name}':") print(f" Shape: {value.shape}") print(f" Type: {value.dtype}") print(f" First few elements: {value[0, :5]}") # Run inference outputs = session.run(None, input_dict) # Process outputs (adjust based on your model's output) print("\nOutput:") print(f" Shape: {outputs[0].shape}") print(f" Type: {outputs[0].dtype}") print(f" First few elements: {outputs[0][:5]}") # You may need to add more processing based on your model's specific output
Editor is loading...
Leave a Comment