import requests
import sys
import json
import time
bearer_token = "***"
endpoint_id = "***"
prompt = """
List me all of the US presidents?
"""
# Define the URL
url = f"https://api.runpod.ai/v2/{endpoint_id}/runsync"
# Define the headers
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {bearer_token}'
}
system_message = """You are a helpful, respectful and honest assistant and chatbot."""
prompt_template = f'''[INST] <<SYS>>
{system_message}
<</SYS>>'''
# Add the initial user message
prompt_template += f'\n{prompt} [/INST]'
# Define the payload
# payload = json.dumps({
# "input": {
# "prompt": prompt_template,
# "max_new_tokens": 4096,
# "temperature": 0.7,
# "top_k": 50,
# "top_p": 0.7,
# "repetition_penalty": 1.2,
# "batch_size": 8,
# "stop": ["</s>"]
# }
# })
payload = json.dumps({
"input": {
"prompt": prompt,
"sampling_params": {
"max_tokens": 2000,
"n": 1,
"presence_penalty": 0.2,
"frequency_penalty": 0.7,
"temperature": 0.3,
}
}
})
# Make the POST request
response = requests.request("POST", url, headers=headers, data=payload)
response_json = json.loads(response.text)
status_url = f"https://api.runpod.ai/v2/{endpoint_id}/status/{response_json['id']}"
get_status = requests.get(status_url, headers=headers)
print(get_status.text)
status_id = json.loads(get_status.text)['id']
stream_url = f"https://api.runpod.ai/v2/{endpoint_id}/stream/{response_json['id']}"
statuses = []
get_status = requests.get(stream_url, headers=headers)
print(get_status.text)
# get_status = json.loads(get_status.text)["stream"][0]["output"].strip().split()
# print(get_status)
# statuses.append(get_status["stream"][0]["output"])
if response.status_code == 200:
json_response = json.loads(response.text)
task_id = response_json['id'] # use this to find the task we want to stream
# model_response = json_response["output"]
model_response = statuses
else:
model_response = "Can you retry please?"