POST /v1/responses — OpenAI Responses format
Endpoint
POST /v1/responses
POST /v1/responses/compact
Overview
The Responses API is the new OpenAI format that supports stateful conversations and built-in tools.
/v1/responses/compact returns a compacted response omitting intermediate reasoning steps.
Request Body
| Name | Type | Required | Default | Description |
|---|---|---|---|---|
model | string | yes | Model ID | |
input | string | array | yes | Input content — a string or array of message objects | |
instructions | string | no | System-level instructions for the model | |
stream | boolean | no | false | Enable streaming output |
max_output_tokens | integer | no | Maximum output tokens | |
temperature | number | no | 1 | Sampling temperature (0–2) |
previous_response_id | string | no | ID of previous response for stateful continuation |
Example
cURL
curl https://api.nbility.dev/v1/responses \
-H "Authorization: Bearer YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "o4-mini",
"input": "Explain quantum computing in simple terms"
}'
Python
from openai import OpenAI
client = OpenAI(
api_key="YOUR_API_KEY",
base_url="https://api.nbility.dev/v1"
)
response = client.responses.create(
model="o4-mini",
input="Explain quantum computing in simple terms"
)
print(response.output_text)
Go
package main
import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
)
func main() {
body, _ := json.Marshal(map[string]any{
"model": "o4-mini",
"input": "Explain quantum computing in simple terms",
})
req, _ := http.NewRequest("POST", "https://api.nbility.dev/v1/responses", bytes.NewReader(body))
req.Header.Set("Authorization", "Bearer YOUR_API_KEY")
req.Header.Set("Content-Type", "application/json")
resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()
data, _ := io.ReadAll(resp.Body)
fmt.Println(string(data))
}
Java
import java.net.http.*;
import java.net.URI;
public class Main {
public static void main(String[] args) throws Exception {
String body = """
{"model":"o4-mini","input":"Explain quantum computing in simple terms"}
""";
HttpClient client = HttpClient.newHttpClient();
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create("https://api.nbility.dev/v1/responses"))
.header("Authorization", "Bearer YOUR_API_KEY")
.header("Content-Type", "application/json")
.POST(HttpRequest.BodyPublishers.ofString(body))
.build();
HttpResponse<String> response = client.send(request,
HttpResponse.BodyHandlers.ofString());
System.out.println(response.body());
}
}
Response
{
"id": "resp_abc123",
"object": "response",
"created_at": 1700000000,
"model": "o4-mini",
"output": [
{
"type": "message",
"role": "assistant",
"content": [
{
"type": "output_text",
"text": "Quantum computing uses quantum mechanics..."
}
]
}
],
"usage": {
"input_tokens": 10,
"output_tokens": 50,
"total_tokens": 60
}
}