import anthropic
client = anthropic.Anthropic()
# First request with web search and cache breakpoint
messages = [
{
"role": "user",
"content": "What's the current weather in San Francisco today?"
}
]
response1 = client.messages.create(
model="claude-opus-4-1-20250805",
max_tokens=1024,
messages=messages,
tools=[{
"type": "web_search_20250305",
"name": "web_search",
"user_location": {
"type": "approximate",
"city": "San Francisco",
"region": "California",
"country": "US",
"timezone": "America/Los_Angeles"
}
}]
)
# Add Claude's response to the conversation
messages.append({
"role": "assistant",
"content": response1.content
})
# Second request with cache breakpoint after the search results
messages.append({
"role": "user",
"content": "Should I expect rain later this week?",
"cache_control": {"type": "ephemeral"} # Cache up to this point
})
response2 = client.messages.create(
model="claude-opus-4-1-20250805",
max_tokens=1024,
messages=messages,
tools=[{
"type": "web_search_20250305",
"name": "web_search",
"user_location": {
"type": "approximate",
"city": "San Francisco",
"region": "California",
"country": "US",
"timezone": "America/Los_Angeles"
}
}]
)
# The second response will benefit from cached search results
# while still being able to perform new searches if needed
print(f"Cache read tokens: {response2.usage.get('cache_read_input_tokens', 0)}")