0920_onnx
unknown
python
a year ago
2.5 kB
13
Indexable
import onnxruntime as ort
import numpy as np
from transformers import AutoTokenizer
# Print available providers
print("Available providers:", ort.get_available_providers())
print(ort.__version__)
# Set up ONNX Runtime session with GPU priority
model_path = "auto_Opset16.onnx"
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
cuda_options = {
'device_id': 0,
'arena_extend_strategy': 'kNextPowerOfTwo',
'gpu_mem_limit': 2 * 1024 * 1024 * 1024, # 2 GB
'cudnn_conv_algo_search': 'EXHAUSTIVE',
'do_copy_in_default_stream': True,
}
session = ort.InferenceSession(model_path, providers=providers, provider_options=[cuda_options, {}])
# Verify the provider being used
print("Using provider:", session.get_providers())
print(f"Using device: {ort.get_device()}")
# Load tokenizer (adjust model name if needed)
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
# Prepare input with padding or truncation
text = "Hello, world! This is a test."
inputs = tokenizer(text, return_tensors="np", padding="max_length", max_length=128, truncation=True)
# Ensure input_ids is int64
inputs['input_ids'] = inputs['input_ids'].astype(np.int64)
# Get input names
input_names = [input.name for input in session.get_inputs()]
# Create input dictionary and convert to float
# Replace the input_dict creation with this:
input_dict = {}
for name in input_names:
if name in inputs:
if name == 'input_ids':
input_dict[name] = inputs[name].astype(np.int64)
else:
input_dict[name] = inputs[name].astype(np.float32)
# Print model input and output information
print("\nModel inputs:")
for input in session.get_inputs():
print(f" Name: {input.name}, Shape: {input.shape}, Type: {input.type}")
print("\nModel outputs:")
for output in session.get_outputs():
print(f" Name: {output.name}, Shape: {output.shape}, Type: {output.type}")
# Print input shapes and types
for name, value in input_dict.items():
print(f"\nInput '{name}':")
print(f" Shape: {value.shape}")
print(f" Type: {value.dtype}")
print(f" First few elements: {value[0, :5]}")
# Run inference
outputs = session.run(None, input_dict)
# Process outputs (adjust based on your model's output)
print("\nOutput:")
print(f" Shape: {outputs[0].shape}")
print(f" Type: {outputs[0].dtype}")
print(f" First few elements: {outputs[0][:5]}")
# You may need to add more processing based on your model's specific outputEditor is loading...
Leave a Comment