0920_onnx

import onnxruntime as ort
import numpy as np
from transformers import AutoTokenizer

# Print available providers
print("Available providers:", ort.get_available_providers())
print(ort.__version__)

# Set up ONNX Runtime session with GPU priority
model_path = "auto_Opset16.onnx"
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']

cuda_options = {
    'device_id': 0,
    'arena_extend_strategy': 'kNextPowerOfTwo',
    'gpu_mem_limit': 2 * 1024 * 1024 * 1024,  # 2 GB
    'cudnn_conv_algo_search': 'EXHAUSTIVE',
    'do_copy_in_default_stream': True,
}

session = ort.InferenceSession(model_path, providers=providers, provider_options=[cuda_options, {}])

# Verify the provider being used
print("Using provider:", session.get_providers())
print(f"Using device: {ort.get_device()}")

# Load tokenizer (adjust model name if needed)
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

# Prepare input with padding or truncation
text = "Hello, world! This is a test."
inputs = tokenizer(text, return_tensors="np", padding="max_length", max_length=128, truncation=True)
# Ensure input_ids is int64
inputs['input_ids'] = inputs['input_ids'].astype(np.int64)
# Get input names
input_names = [input.name for input in session.get_inputs()]

# Create input dictionary and convert to float
# Replace the input_dict creation with this:
input_dict = {}
for name in input_names:
    if name in inputs:
        if name == 'input_ids':
            input_dict[name] = inputs[name].astype(np.int64)
        else:
            input_dict[name] = inputs[name].astype(np.float32)

# Print model input and output information
print("\nModel inputs:")
for input in session.get_inputs():
    print(f"  Name: {input.name}, Shape: {input.shape}, Type: {input.type}")

print("\nModel outputs:")
for output in session.get_outputs():
    print(f"  Name: {output.name}, Shape: {output.shape}, Type: {output.type}")

# Print input shapes and types
for name, value in input_dict.items():
    print(f"\nInput '{name}':")
    print(f"  Shape: {value.shape}")
    print(f"  Type: {value.dtype}")
    print(f"  First few elements: {value[0, :5]}")

# Run inference
outputs = session.run(None, input_dict)

# Process outputs (adjust based on your model's output)
print("\nOutput:")
print(f"  Shape: {outputs[0].shape}")
print(f"  Type: {outputs[0].dtype}")
print(f"  First few elements: {outputs[0][:5]}")

# You may need to add more processing based on your model's specific output
Editor is loading...