weather_tool = { "name": "get_weather", "description": "Get current weather for a location", "input_schema": { "type": "object", "properties": { "location": {"type": "string"} }, "required": ["location"] }}# First request - Claude responds with thinking and tool requestresponse = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=16000, thinking={ "type": "enabled", "budget_tokens": 10000 }, tools=[weather_tool], messages=[ {"role": "user", "content": "What's the weather in Paris?"} ])
APIレスポンスには思考、テキスト、およびツール使用ブロックが含まれます:
Copy
{ "content": [ { "type": "thinking", "thinking": "The user wants to know the current weather in Paris. I have access to a function `get_weather`...", "signature": "BDaL4VrbR2Oj0hO4XpJxT28J5TILnCrrUXoKiiNBZW9P+nr8XSj1zuZzAl4egiCCpQNvfyUuFFJP5CncdYZEQPPmLxYsNrcs...." }, { "type": "text", "text": "I can help you get the current weather information for Paris. Let me check that for you" }, { "type": "tool_use", "id": "toolu_01CswdEQBMshySk6Y9DFKrfq", "name": "get_weather", "input": { "location": "Paris" } } ]}
それでは会話を続けてツールを使用しましょう
Copy
# Extract thinking block and tool use blockthinking_block = next((block for block in response.content if block.type == 'thinking'), None)tool_use_block = next((block for block in response.content if block.type == 'tool_use'), None)# Call your actual weather API, here is where your actual API call would go# let's pretend this is what we get backweather_data = {"temperature": 88}# Second request - Include thinking block and tool result# No new thinking blocks will be generated in the responsecontinuation = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=16000, thinking={ "type": "enabled", "budget_tokens": 10000 }, tools=[weather_tool], messages=[ {"role": "user", "content": "What's the weather in Paris?"}, # notice that the thinking_block is passed in as well as the tool_use_block # if this is not passed in, an error is raised {"role": "assistant", "content": [thinking_block, tool_use_block]}, {"role": "user", "content": [{ "type": "tool_result", "tool_use_id": tool_use_block.id, "content": f"Current temperature: {weather_data['temperature']}°F" }]} ])
APIレスポンスにはテキストのみが含まれるようになります
Copy
{ "content": [ { "type": "text", "text": "Currently in Paris, the temperature is 88°F (31°C)" } ]}
import anthropicclient = anthropic.Anthropic()# Define toolscalculator_tool = { "name": "calculator", "description": "Perform mathematical calculations", "input_schema": { "type": "object", "properties": { "expression": { "type": "string", "description": "Mathematical expression to evaluate" } }, "required": ["expression"] }}database_tool = { "name": "database_query", "description": "Query product database", "input_schema": { "type": "object", "properties": { "query": { "type": "string", "description": "SQL query to execute" } }, "required": ["query"] }}# First request - Claude thinks once before all tool callsresponse = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=16000, thinking={ "type": "enabled", "budget_tokens": 10000 }, tools=[calculator_tool, database_tool], messages=[{ "role": "user", "content": "What's the total revenue if we sold 150 units of product A at $50 each, and how does this compare to our average monthly revenue from the database?" }])# Response includes thinking followed by tool uses# Note: Claude thinks once at the beginning, then makes all tool decisionsprint("First response:")for block in response.content: if block.type == "thinking": print(f"Thinking (summarized): {block.thinking}") elif block.type == "tool_use": print(f"Tool use: {block.name} with input {block.input}") elif block.type == "text": print(f"Text: {block.text}")# You would execute the tools and return results...# After getting both tool results back, Claude directly responds without additional thinking
このインターリーブ思考なしの例では:
Claudeはタスクを理解するために最初に一度だけ考えます
すべてのツール使用の決定を前もって行います
ツール結果が返されると、Claudeは追加の思考なしで即座に応答を提供します
インターリーブ思考を伴うツール使用
Copy
import anthropicclient = anthropic.Anthropic()# Same tool definitions as beforecalculator_tool = { "name": "calculator", "description": "Perform mathematical calculations", "input_schema": { "type": "object", "properties": { "expression": { "type": "string", "description": "Mathematical expression to evaluate" } }, "required": ["expression"] }}database_tool = { "name": "database_query", "description": "Query product database", "input_schema": { "type": "object", "properties": { "query": { "type": "string", "description": "SQL query to execute" } }, "required": ["query"] }}# First request with interleaved thinking enabledresponse = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=16000, thinking={ "type": "enabled", "budget_tokens": 10000 }, tools=[calculator_tool, database_tool], # Enable interleaved thinking with beta header extra_headers={ "anthropic-beta": "interleaved-thinking-2025-05-14" }, messages=[{ "role": "user", "content": "What's the total revenue if we sold 150 units of product A at $50 each, and how does this compare to our average monthly revenue from the database?" }])print("Initial response:")thinking_blocks = []tool_use_blocks = []for block in response.content: if block.type == "thinking": thinking_blocks.append(block) print(f"Thinking: {block.thinking}") elif block.type == "tool_use": tool_use_blocks.append(block) print(f"Tool use: {block.name} with input {block.input}") elif block.type == "text": print(f"Text: {block.text}")# First tool result (calculator)calculator_result = "7500" # 150 * 50# Continue with first tool resultresponse2 = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=16000, thinking={ "type": "enabled", "budget_tokens": 10000 }, tools=[calculator_tool, database_tool], extra_headers={ "anthropic-beta": "interleaved-thinking-2025-05-14" }, messages=[ { "role": "user", "content": "What's the total revenue if we sold 150 units of product A at $50 each, and how does this compare to our average monthly revenue from the database?" }, { "role": "assistant", "content": [thinking_blocks[0], tool_use_blocks[0]] }, { "role": "user", "content": [{ "type": "tool_result", "tool_use_id": tool_use_blocks[0].id, "content": calculator_result }] } ])print("\nAfter calculator result:")# With interleaved thinking, Claude can think about the calculator result# before deciding to query the databasefor block in response2.content: if block.type == "thinking": thinking_blocks.append(block) print(f"Interleaved thinking: {block.thinking}") elif block.type == "tool_use": tool_use_blocks.append(block) print(f"Tool use: {block.name} with input {block.input}")# Second tool result (database)database_result = "5200" # Example average monthly revenue# Continue with second tool resultresponse3 = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=16000, thinking={ "type": "enabled", "budget_tokens": 10000 }, tools=[calculator_tool, database_tool], extra_headers={ "anthropic-beta": "interleaved-thinking-2025-05-14" }, messages=[ { "role": "user", "content": "What's the total revenue if we sold 150 units of product A at $50 each, and how does this compare to our average monthly revenue from the database?" }, { "role": "assistant", "content": [thinking_blocks[0], tool_use_blocks[0]] }, { "role": "user", "content": [{ "type": "tool_result", "tool_use_id": tool_use_blocks[0].id, "content": calculator_result }] }, { "role": "assistant", "content": thinking_blocks[1:] + tool_use_blocks[1:] }, { "role": "user", "content": [{ "type": "tool_result", "tool_use_id": tool_use_blocks[1].id, "content": database_result }] } ])print("\nAfter database result:")# With interleaved thinking, Claude can think about both results# before formulating the final responsefor block in response3.content: if block.type == "thinking": print(f"Final thinking: {block.thinking}") elif block.type == "text": print(f"Final response: {block.text}")
from anthropic import Anthropicimport requestsfrom bs4 import BeautifulSoupclient = Anthropic()def fetch_article_content(url): response = requests.get(url) soup = BeautifulSoup(response.content, 'html.parser') # Remove script and style elements for script in soup(["script", "style"]): script.decompose() # Get text text = soup.get_text() # Break into lines and remove leading and trailing space on each lines = (line.strip() for line in text.splitlines()) # Break multi-headlines into a line each chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) # Drop blank lines text = '\n'.join(chunk for chunk in chunks if chunk) return text# Fetch the content of the articlebook_url = "https://www.gutenberg.org/cache/epub/1342/pg1342.txt"book_content = fetch_article_content(book_url)# Use just enough text for caching (first few chapters)LARGE_TEXT = book_content[:5000]SYSTEM_PROMPT=[ { "type": "text", "text": "You are an AI assistant that is tasked with literary analysis. Analyze the following text carefully.", }, { "type": "text", "text": LARGE_TEXT, "cache_control": {"type": "ephemeral"} }]MESSAGES = [ { "role": "user", "content": "Analyze the tone of this passage." }]# First request - establish cacheprint("First request - establishing cache")response1 = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=20000, thinking={ "type": "enabled", "budget_tokens": 4000 }, system=SYSTEM_PROMPT, messages=MESSAGES)print(f"First response usage: {response1.usage}")MESSAGES.append({ "role": "assistant", "content": response1.content})MESSAGES.append({ "role": "user", "content": "Analyze the characters in this passage."})# Second request - same thinking parameters (cache hit expected)print("\nSecond request - same thinking parameters (cache hit expected)")response2 = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=20000, thinking={ "type": "enabled", "budget_tokens": 4000 }, system=SYSTEM_PROMPT, messages=MESSAGES)print(f"Second response usage: {response2.usage}")# Third request - different thinking parameters (cache miss for messages)print("\nThird request - different thinking parameters (cache miss for messages)")response3 = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=20000, thinking={ "type": "enabled", "budget_tokens": 8000 # Changed thinking budget }, system=SYSTEM_PROMPT, # System prompt remains cached messages=MESSAGES # Messages cache is invalidated)print(f"Third response usage: {response3.usage}")
メッセージのキャッシング(思考が変更されると無効化される)
Copy
from anthropic import Anthropicimport requestsfrom bs4 import BeautifulSoupclient = Anthropic()def fetch_article_content(url): response = requests.get(url) soup = BeautifulSoup(response.content, 'html.parser') # Remove script and style elements for script in soup(["script", "style"]): script.decompose() # Get text text = soup.get_text() # Break into lines and remove leading and trailing space on each lines = (line.strip() for line in text.splitlines()) # Break multi-headlines into a line each chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) # Drop blank lines text = '\n'.join(chunk for chunk in chunks if chunk) return text# Fetch the content of the articlebook_url = "https://www.gutenberg.org/cache/epub/1342/pg1342.txt"book_content = fetch_article_content(book_url)# Use just enough text for caching (first few chapters)LARGE_TEXT = book_content[:5000]# No system prompt - caching in messages insteadMESSAGES = [ { "role": "user", "content": [ { "type": "text", "text": LARGE_TEXT, "cache_control": {"type": "ephemeral"}, }, { "type": "text", "text": "Analyze the tone of this passage." } ] }]# First request - establish cacheprint("First request - establishing cache")response1 = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=20000, thinking={ "type": "enabled", "budget_tokens": 4000 }, messages=MESSAGES)print(f"First response usage: {response1.usage}")MESSAGES.append({ "role": "assistant", "content": response1.content})MESSAGES.append({ "role": "user", "content": "Analyze the characters in this passage."})# Second request - same thinking parameters (cache hit expected)print("\nSecond request - same thinking parameters (cache hit expected)")response2 = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=20000, thinking={ "type": "enabled", "budget_tokens": 4000 # Same thinking budget }, messages=MESSAGES)print(f"Second response usage: {response2.usage}")MESSAGES.append({ "role": "assistant", "content": response2.content})MESSAGES.append({ "role": "user", "content": "Analyze the setting in this passage."})# Third request - different thinking budget (cache miss expected)print("\nThird request - different thinking budget (cache miss expected)")response3 = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=20000, thinking={ "type": "enabled", "budget_tokens": 8000 # Different thinking budget breaks cache }, messages=MESSAGES)print(f"Third response usage: {response3.usage}")
import anthropicclient = anthropic.Anthropic()# Using a special prompt that triggers redacted thinking (for demonstration purposes only)response = client.messages.create( model="claude-3-7-sonnet-20250219", max_tokens=16000, thinking={ "type": "enabled", "budget_tokens": 10000 }, messages=[{ "role": "user", "content": "ANTHROPIC_MAGIC_STRING_TRIGGER_REDACTED_THINKING_46C9A13E193C177646C7398A98432ECCCE4C1253D5E2D82641AC0E52CC2876CB" }])# Identify redacted thinking blockshas_redacted_thinking = any( block.type == "redacted_thinking" for block in response.content)if has_redacted_thinking: print("Response contains redacted thinking blocks") # These blocks are still usable in subsequent requests # Extract all blocks (both redacted and non-redacted) all_thinking_blocks = [ block for block in response.content if block.type in ["thinking", "redacted_thinking"] ] # When passing to subsequent requests, include all blocks without modification # This preserves the integrity of Claude's reasoning print(f"Found {len(all_thinking_blocks)} thinking blocks total") print(f"These blocks are still billable as output tokens")