Cho đến giờ, chúng ta đã học về LLMs như passive responders - nhận prompt, trả lời, xong. Nhưng AI Agents khác - chúng có thể reason, plan, use tools, và take actions để đạt được goals phức tạp.
Tưởng tượng thay vì chỉ hỏi "What's the weather?", bạn nói "Book me a flight to Tokyo next week when weather is nice" - agent sẽ tự check weather forecast, search flights, compare prices, và book cho bạn. Đó là agentic behavior.
┌──────────────────────────────────────┐
│ AI Agent │
├──────────────────────────────────────┤
│ │
│ ┌────────────────────────────────┐ │
│ │ 1. Perceive (Input) │ │
│ │ - User goal │ │
│ │ - Environment state │ │
│ └────────────────────────────────┘ │
│ ↓ │
│ ┌────────────────────────────────┐ │
│ │ 2. Reason & Plan │ │
│ │ - Break down goal │ │
│ │ - Choose strategy │ │
│ └────────────────────────────────┘ │
│ ↓ │
│ ┌────────────────────────────────┐ │
│ │ 3. Act (Tool Use) │ │
│ │ - Search │ │
│ │ - Calculator │ │
│ │ - Code execution │ │
│ │ - API calls │ │
│ └────────────────────────────────┘ │
│ ↓ │
│ ┌────────────────────────────────┐ │
│ │ 4. Observe (Feedback) │ │
│ │ - Tool results │ │
│ │ - Success/failure │ │
│ └────────────────────────────────┘ │
│ ↓ │
│ Loop until goal achieved │
│ │
└──────────────────────────────────────┘
ReAct là pattern kết hợp reasoning (suy luận) và acting (hành động).
Thought: [Reasoning về next step]
Action: [Tool to use]
Observation: [Tool result]
Thought: [Reasoning based on observation]
Action: [Next tool]
Observation: [Result]
... (repeat until solved)
Thought: I now know the answer
Answer: [Final answer]
Question: What is the square root of the population of France?
Thought: I need to find the population of France first
Action: search("population of France")
Observation: The population of France is approximately 67.75 million
Thought: Now I need to calculate the square root of 67,750,000
Action: calculator(sqrt(67750000))
Observation: 8231.57
Thought: I have the answer
Answer: The square root of France's population is approximately 8,231.57
from langchain.agents import Tool, AgentExecutor, create_react_agent
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
# Define tools
def search(query):
# Call search API
return f"Search results for: {query}"
def calculator(expression):
# Safely evaluate math
return eval(expression)
tools = [
Tool(
name="Search",
func=search,
description="Useful for finding factual information"
),
Tool(
name="Calculator",
func=calculator,
description="Useful for mathematical calculations"
)
]
# ReAct prompt template
react_prompt = PromptTemplate.from_template("""
Answer the following question using this format:
Thought: [Your reasoning]
Action: [Tool name]
Action Input: [Tool input]
Observation: [Tool result will appear here]
... (repeat Thought/Action/Observation as needed)
Thought: I now know the final answer
Final Answer: [Your answer]
Question: {input}
{agent_scratchpad}
""")
# Create agent
llm = OpenAI(temperature=0)
agent = create_react_agent(llm, tools, react_prompt)
# Execute
agent_executor = AgentExecutor(
agent=agent,
tools=tools,
verbose=True,
max_iterations=5
)
result = agent_executor.invoke({
"input": "What is the square root of the population of France?"
})
Reasoning benefits:
Acting benefits:
Together:
Modern LLMs (GPT-4, Claude) có built-in function calling.
from openai import OpenAI
client = OpenAI()
# Define available functions
functions = [
{
"name": "get_weather",
"description": "Get current weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "City name, e.g. San Francisco"
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"]
}
},
"required": ["location"]
}
},
{
"name": "search_web",
"description": "Search the internet",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query"
}
},
"required": ["query"]
}
}
]
# User query
messages = [
{"role": "user", "content": "What's the weather like in Tokyo?"}
]
# LLM decides which function to call
response = client.chat.completions.create(
model="gpt-4",
messages=messages,
functions=functions,
function_call="auto" # Let model decide
)
# Check if function call requested
if response.choices[0].message.function_call:
function_call = response.choices[0].message.function_call
print(f"Function: {function_call.name}")
print(f"Arguments: {function_call.arguments}")
# Output:
# Function: get_weather
# Arguments: {"location": "Tokyo", "unit": "celsius"}
import json
def execute_function(function_name, arguments):
"""Route to actual function implementation."""
args = json.loads(arguments)
if function_name == "get_weather":
return get_weather(**args)
elif function_name == "search_web":
return search_web(**args)
else:
return {"error": "Unknown function"}
# Execute the function
function_result = execute_function(
function_call.name,
function_call.arguments
)
# Send result back to LLM
messages.append(response.choices[0].message) # Function call
messages.append({
"role": "function",
"name": function_call.name,
"content": json.dumps(function_result)
})
# Get final response
final_response = client.chat.completions.create(
model="gpt-4",
messages=messages
)
print(final_response.choices[0].message.content)
# "The weather in Tokyo is currently 18°C with clear skies."
def agent_loop(user_query, max_iterations=5):
"""Agent that can call multiple tools."""
messages = [{"role": "user", "content": user_query}]
for i in range(max_iterations):
# Call LLM
response = client.chat.completions.create(
model="gpt-4",
messages=messages,
functions=functions,
function_call="auto"
)
message = response.choices[0].message
# Check if done
if not message.function_call:
# Final answer
return message.content
# Execute function
function_result = execute_function(
message.function_call.name,
message.function_call.arguments
)
# Add to conversation
messages.append(message)
messages.append({
"role": "function",
"name": message.function_call.name,
"content": json.dumps(function_result)
})
return "Max iterations reached"
# Example: Complex query requiring multiple tools
result = agent_loop(
"Find the cheapest flight to the city with the best weather in Europe next week"
)
# Agent will:
# 1. search_web("Europe weather forecast next week")
# 2. search_flights("cheapest flights to [best weather city]")
# 3. Synthesize answer
# GPT-4 can call multiple functions in parallel
response = client.chat.completions.create(
model="gpt-4",
messages=[{
"role": "user",
"content": "What's the weather in Tokyo and New York?"
}],
functions=functions,
function_call="auto"
)
# May return multiple function calls
for choice in response.choices:
if choice.message.function_call:
# Execute in parallel
results = []
for call in choice.message.function_calls:
result = execute_function(call.name, call.arguments)
results.append(result)
Agent creates full plan before executing.
def plan_and_solve(goal):
# Step 1: Create plan
plan_prompt = f"""Create a step-by-step plan to: {goal}
Format:
1. [First step]
2. [Second step]
...
"""
plan = llm.generate(plan_prompt)
# Step 2: Execute plan
results = []
for step in parse_steps(plan):
result = execute_step(step)
results.append(result)
# Step 3: Synthesize
final_answer = synthesize_results(results)
return final_answer
Example:
Goal: "Organize a team meeting next Tuesday"
Plan:
1. Check team calendars for availability
2. Find available meeting rooms
3. Send calendar invites
4. Book meeting room
5. Prepare agenda
Execute each step with appropriate tools
Agent evaluates own performance and improves.
def reflexion_agent(task, max_attempts=3):
for attempt in range(max_attempts):
# Try to solve
solution = agent_attempt(task)
# Self-evaluate
evaluation = llm.generate(f"""
Task: {task}
Your solution: {solution}
Is this solution correct? If not, what's wrong?
""")
if "correct" in evaluation.lower():
return solution
# Reflect on failure
reflection = llm.generate(f"""
Previous attempt failed: {evaluation}
What should you do differently next time?
""")
# Try again with reflection
task = f"{task}\n\nPrevious reflection: {reflection}"
return solution
Explore multiple reasoning paths simultaneously.
def tree_of_thoughts(problem):
# Generate multiple initial thoughts
thoughts = [
llm.generate(f"Approach 1 to solve: {problem}"),
llm.generate(f"Approach 2 to solve: {problem}"),
llm.generate(f"Approach 3 to solve: {problem}")
]
# Expand each thought
expanded = []
for thought in thoughts:
# Continue reasoning
next_steps = llm.generate(f"Continue this approach: {thought}")
expanded.append((thought, next_steps))
# Evaluate each path
evaluations = []
for thought, steps in expanded:
score = llm.generate(f"Rate this approach (0-10): {thought} -> {steps}")
evaluations.append((thought, steps, score))
# Choose best path
best = max(evaluations, key=lambda x: x[2])
return best[1] # Return best solution
Multiple specialized agents working together.
class OrchestratorAgent:
def __init__(self):
self.workers = {
"researcher": ResearchAgent(),
"writer": WriterAgent(),
"coder": CoderAgent()
}
def solve(self, task):
# Decompose task
subtasks = self.decompose(task)
# Assign to workers
results = []
for subtask in subtasks:
worker = self.choose_worker(subtask)
result = worker.execute(subtask)
results.append(result)
# Synthesize
return self.synthesize(results)
def choose_worker(self, subtask):
# Route to appropriate specialist
if "research" in subtask.lower():
return self.workers["researcher"]
elif "write" in subtask.lower():
return self.workers["writer"]
elif "code" in subtask.lower():
return self.workers["coder"]
# Example
orchestrator = OrchestratorAgent()
result = orchestrator.solve(
"Create a blog post about recent AI advances with code examples"
)
# Orchestrator:
# 1. researcher.execute("Find recent AI advances")
# 2. writer.execute("Write blog post about [research results]")
# 3. coder.execute("Create code examples for [blog topics]")
# 4. Combine into final blog post
Agents debate to reach better conclusions.
def multi_agent_debate(question, num_rounds=3):
# Initialize agents with different perspectives
agent_a = LLM(system_prompt="You are a skeptical critic")
agent_b = LLM(system_prompt="You are an optimistic advocate")
positions = {
"a": agent_a.generate(question),
"b": agent_b.generate(question)
}
# Debate rounds
for round in range(num_rounds):
# Agent A responds to B's position
positions["a"] = agent_a.generate(
f"{question}\n\nOpponent says: {positions['b']}\n\nYour response:"
)
# Agent B responds to A's position
positions["b"] = agent_b.generate(
f"{question}\n\nOpponent says: {positions['a']}\n\nYour response:"
)
# Judge synthesizes
judge = LLM(system_prompt="You are an impartial judge")
final_answer = judge.generate(f"""
Question: {question}
Position A: {positions['a']}
Position B: {positions['b']}
Synthesize the best answer considering both perspectives.
""")
return final_answer
class CollaborativeCodingSystem:
def __init__(self):
self.architect = LLM(system_prompt="System architect")
self.coder = LLM(system_prompt="Software engineer")
self.reviewer = LLM(system_prompt="Code reviewer")
def develop(self, requirements):
# 1. Architecture
architecture = self.architect.generate(
f"Design architecture for: {requirements}"
)
# 2. Implementation
code = self.coder.generate(
f"Implement this architecture:\n{architecture}"
)
# 3. Review
review = self.reviewer.generate(
f"Review this code:\n{code}\n\nSuggest improvements."
)
# 4. Iterate if needed
if "needs revision" in review.lower():
code = self.coder.generate(
f"Revise code based on review:\n{review}\n\nOriginal:\n{code}"
)
return code
# Example
system = CollaborativeCodingSystem()
code = system.develop("Build a REST API for user management")
# ❌ Vague
Tool(
name="search",
description="Search stuff"
)
# ✅ Clear
Tool(
name="web_search",
description="Search the internet for current information. Use this when you need real-time data or recent events. Input should be a search query string. Returns top 5 web results."
)
def robust_tool_execution(tool, input):
try:
result = tool(input)
return {"success": True, "result": result}
except Exception as e:
return {
"success": False,
"error": str(e),
"suggestion": "Try a different approach or tool"
}
# Prevent infinite loops
agent_executor = AgentExecutor(
agent=agent,
tools=tools,
max_iterations=10, # Safety limit
early_stopping_method="generate" # Stop and generate answer
)
system_prompt = """You are a helpful AI agent with access to tools.
Guidelines:
1. Think step-by-step about the problem
2. Use tools when you need external information or actions
3. If a tool fails, try a different approach
4. Keep track of what you've tried
5. When you have enough information, provide a clear final answer
Available tools:
{tool_descriptions}
"""
class MonitoredAgent:
def __init__(self, agent):
self.agent = agent
self.logs = []
def execute(self, task):
start_time = time.time()
try:
result = self.agent.execute(task)
self.logs.append({
"task": task,
"result": result,
"duration": time.time() - start_time,
"success": True
})
return result
except Exception as e:
self.logs.append({
"task": task,
"error": str(e),
"duration": time.time() - start_time,
"success": False
})
raise
def get_stats(self):
return {
"total_tasks": len(self.logs),
"success_rate": sum(l["success"] for l in self.logs) / len(self.logs),
"avg_duration": sum(l["duration"] for l in self.logs) / len(self.logs)
}
Trong các bài tiếp theo, chúng ta sẽ chuyển sang Part IV: Production & System Design - containerization, model serving, MLOps, và scalability.
Bài viết thuộc series "From Zero to AI Engineer" - Module 8: Advanced GenAI Topics