Files
connpy/connpy/ai.py
T
fluzzi32 dba7e24dda feat(copilot): stabilize concurrency and enhance terminal context management
- AI Concurrency:
  - Implemented a dedicated background event loop (ConnpyAILoop) in a separate thread for AI tasks to ensure thread safety and event loop affinity.
  - Added 'run_ai_async' utility to funnel all LiteLLM calls through the dedicated loop.
  - Implemented global 'cleanup()' for safe closure of sync/async LiteLLM sessions.

- gRPC & Remote Sessions:
  - Enhanced 'NodeServicer' to identify command blocks within the terminal buffer using prompt regex/byte tracking.
  - Added support for selective context retrieval via 'context_start_pos' in the gRPC Interact stream.
  - Synchronized remote Copilot behavior by enriching questions with session history (last 5 queries) in 'NodeStub'.
  - Optimized token usage by cleaning 'node_info' metadata before AI transmission.

- Terminal Context & Core:
  - Modified 'node.connect' to always initialize 'mylog' (BytesIO) buffer regardless of disk logging configuration, ensuring Copilot context availability.
  - Integrated 'ai.cleanup()' in CLI (connapp) and Server (api) exit points for graceful shutdowns.
  - Suppressed LiteLLM internal streaming coroutine warnings during task cancellation.
2026-05-11 12:30:43 -03:00

1560 lines
80 KiB
Python
Executable File

import os
import sys
import json
import re
import datetime
import threading
import asyncio
from textwrap import dedent
from .core import nodes
_litellm_initialized = False
def _init_litellm():
global _litellm_initialized
if not _litellm_initialized:
import litellm
# Silenciar feedback de litellm
litellm.suppress_debug_info = True
litellm.set_verbose = False
_litellm_initialized = True
def completion(*args, **kwargs):
_init_litellm()
from litellm import completion as _completion
return _completion(*args, **kwargs)
def stream_chunk_builder(*args, **kwargs):
_init_litellm()
from litellm import stream_chunk_builder as _stream_chunk_builder
return _stream_chunk_builder(*args, **kwargs)
from .hooks import ClassHook, MethodHook
from . import printer
from rich.markdown import Markdown
from rich.panel import Panel
from rich.text import Text
from rich.console import Group
from rich.rule import Rule
console = printer.console
_ai_loop = None
_ai_thread = None
_ai_lock = threading.Lock()
def _get_ai_loop():
global _ai_loop, _ai_thread
with _ai_lock:
if _ai_loop is None:
_ai_loop = asyncio.new_event_loop()
_ai_thread = threading.Thread(target=_ai_loop.run_forever, name="ConnpyAILoop", daemon=True)
_ai_thread.start()
return _ai_loop
def run_ai_async(coro):
"""Run a coroutine in the dedicated AI background loop."""
loop = _get_ai_loop()
return asyncio.run_coroutine_threadsafe(coro, loop)
def cleanup():
"""Safely close any global litellm sessions in the dedicated AI loop."""
global _ai_loop
if _ai_loop:
try:
future = asyncio.run_coroutine_threadsafe(_async_cleanup(), _ai_loop)
future.result(timeout=5)
except:
pass
async def _async_cleanup():
"""Internal async cleanup for litellm sessions."""
try:
import litellm
# 1. Close synchronous session
if hasattr(litellm, "client_session") and litellm.client_session:
try:
if hasattr(litellm.client_session, "close"):
res = litellm.client_session.close()
if asyncio.iscoroutine(res): await res
except: pass
litellm.client_session = None
# 2. Close asynchronous session
if hasattr(litellm, "aclient_session") and litellm.aclient_session:
try:
session = litellm.aclient_session
litellm.aclient_session = None
if hasattr(session, "close"):
await session.close()
except: pass
except ImportError:
pass
@ClassHook
class ai:
"""Hybrid Multi-Agent System: Selective Escalation with Role Persistence."""
SAFE_COMMANDS = [
r'^show\s+', r'^ls\s*', r'^cat\s+', r'^ip\s+', r'^pwd$', r'^hostname$', r'^uname',
r'^df\s*', r'^free\s*', r'^ps\s*', r'^ping\s+', r'^traceroute\s+', r'^whois\s+',
r'^kubectl\s+(get|describe|version|logs|top|explain|cluster-info|api-resources|api-versions)\s+',
r'^systemctl\s+status\s+', r'^journalctl\s+'
]
def __init__(self, config, org=None, api_key=None, engineer_model=None, architect_model=None, engineer_api_key=None, architect_api_key=None, console=None, confirm_handler=None, trust=False):
self.config = config
self.console = console or printer.console
self.confirm_handler = confirm_handler or self._local_confirm_handler
self.trusted_session = trust # Trust mode for the entire session
self.interrupted = False
# 1. Cargar configuración genérica
aiconfig = self.config.config.get("ai", {})
# Modelos (Prioridad: Argumento -> Config -> Default)
self.engineer_model = engineer_model or aiconfig.get("engineer_model") or "gemini/gemini-3.1-flash-lite-preview"
self.architect_model = architect_model or aiconfig.get("architect_model") or "anthropic/claude-sonnet-4-6"
# API Keys (Prioridad: Argumento -> Config)
self.engineer_key = engineer_api_key or aiconfig.get("engineer_api_key")
self.architect_key = architect_api_key or aiconfig.get("architect_api_key")
# Custom Trusted Commands Regexes
custom_trusted = aiconfig.get("trusted_commands", [])
if isinstance(custom_trusted, str):
custom_trusted = [c.strip() for c in custom_trusted.split(",") if c.strip()]
self.safe_commands = list(self.SAFE_COMMANDS) + (custom_trusted if isinstance(custom_trusted, list) else [])
# Límites
self.max_history = 30
self.max_truncate = 50000
self.soft_limit_iterations = 20 # Show warning and suggest Ctrl+C
self.hard_limit_iterations = 50 # Force stop
# External tool registry (populated by plugins via ClassHook.modify)
self.external_engineer_tools = [] # Tool defs for Engineer LLM
self.external_architect_tools = [] # Tool defs for Architect LLM
self.external_tool_handlers = {} # {"tool_name": handler_callable}
self.tool_status_formatters = {} # {"tool_name": formatter_callable}
self.engineer_prompt_extensions = [] # Extra text for engineer prompt
self.architect_prompt_extensions = [] # Extra text for architect prompt
# Long-term memory
self.memory_path = os.path.join(self.config.defaultdir, "ai_memory.md")
self.long_term_memory = ""
if os.path.exists(self.memory_path):
try:
with open(self.memory_path, "r") as f:
self.long_term_memory = f.read()
except FileNotFoundError:
self.long_term_memory = ""
except PermissionError as e:
self.console.print(f"[warning]Warning: Cannot read AI memory file: {e}[/warning]")
except Exception as e:
self.console.print(f"[warning]Warning: Failed to load AI memory: {e}[/warning]")
# Session Management
self.sessions_dir = os.path.join(self.config.defaultdir, "ai_sessions")
os.makedirs(self.sessions_dir, exist_ok=True)
self.session_id = None
self.session_path = None
# Prompts base agnósticos
architect_instructions = ""
if self.architect_key:
architect_instructions = """
CRITICAL - CONSULT vs ESCALATE:
- ALWAYS use 'consult_architect' for: Configuration planning, design decisions, complex troubleshooting.
Examples: "consultalo con el arquitecto", "preguntale al arquitecto", "que opina el arquitecto"
You stay in control and present the advice to the user.
- ONLY use 'escalate_to_architect' when user EXPLICITLY asks to TALK to the Architect:
Examples: "quiero hablar con el arquitecto", "pasame con el arquitecto", "que me atienda el arquitecto"
After escalation, you hand over control completely.
- DEFAULT: When in doubt, use 'consult_architect'. Escalation is rare.
"""
else:
architect_instructions = """
CRITICAL - ARCHITECT UNAVAILABLE:
- The Strategic Reasoning Engine (Architect) is currently UNAVAILABLE because its API key is not configured.
- DO NOT attempt to consult or escalate to the architect.
- If the user asks to consult the architect, inform them that the Architect is offline and offer to help them directly to the best of your abilities.
"""
self._engineer_base_prompt = dedent(f"""
Role: TECHNICAL EXECUTION ENGINE.
Expertise: Universal Networking (Cisco, Nokia, Juniper, 6wind, etc.).
Rules:
- BE FAST AND EXTREMELY CONCISE: Provide direct answers. No filler words, no decorative language, no polite pleasantries. Save output tokens at all costs.
- KNOWLEDGE FIRST: For general networking questions (AS numbers, protocol details, standards, generic commands), use your internal knowledge. ONLY use tools when the user's specific infrastructure data is required.
- INVENTORY ONLY: 'run_commands', 'list_nodes', and 'get_node_info' are ONLY for interacting with the user's inventory.
- BROADCAST RESTRICTION: Avoid using filter '.*' in 'run_commands' unless the user explicitly requests a global action. Try to target specific nodes or groups based on the conversation.
- AUTONOMY: Proactively use iterative tool calls to find the root cause of infrastructure issues.
- BATCH OPERATIONS: When working on multiple devices, call tools in parallel.
- COMPLETE MISSIONS: Execute ALL steps of a mission before reporting back.
- DIAGRAM: Use ASCII art or Unicode box-drawing characters directly in your responses to visualize topologies or paths when helpful.
- EVIDENCE: Include 'Key Snippets' from tool outputs. Be token-efficient.
- NO WANDERING: Do not speculate. If stuck, report attempts.
- SAFETY: When you use 'run_commands' with configuration commands, the system automatically prompts the user for confirmation. Just execute - don't ask permission first.
{architect_instructions}
Network Context: {{self.long_term_memory if self.long_term_memory else "Empty."}}
""").strip()
self._architect_base_prompt = dedent(f"""
Role: STRATEGIC REASONING ENGINE.
Expertise: Network Architecture, Complex Troubleshooting, and Design Validation.
Rules:
- CONCISENESS IS MANDATORY: Strip out fluff, decorative language, and filler words. Provide direct, tactical instructions and analysis to save output tokens.
- STRATEGY: Define technical missions for the Engineer.
- DIAGRAM: Use ASCII art or Unicode box-drawing characters in your responses to visualize topologies, traffic paths, or logic flows.
- ENGINEER CAPABILITIES: Your Engineer can:
* Filter nodes (list_nodes), Run CLI commands (run_commands), Get metadata (get_node_info).
- ANALYSIS: Review technical findings to identify patterns or design failures.
- MEMORY: Update long-term facts ONLY when the user explicitly requests it.
CRITICAL - EFFICIENT DELEGATION:
- Plan ALL tasks upfront before delegating.
- Delegate ONCE with a complete, detailed mission including ALL steps.
- Example: "List all routers matching 'border.*', then run 'show ip bgp summary' and 'show ip route' on each, then analyze the outputs."
- DO NOT delegate multiple times for the same goal. Batch everything into ONE mission.
- Wait for Engineer's complete report before responding to user.
CRITICAL - RETURNING CONTROL:
- When your strategic analysis is complete and no further architectural decisions are needed, use 'return_to_engineer' to hand control back.
- The Engineer is better suited for ongoing technical execution and troubleshooting.
- Only stay in control if the user explicitly needs strategic oversight for multiple interactions.
Network Context: {self.long_term_memory if self.long_term_memory else "Empty."}
""").strip()
def _local_confirm_handler(self, prompt, default="n"):
"""Default confirmation handler using rich.prompt."""
from rich.prompt import Prompt
return Prompt.ask(prompt, default=default)
@property
def engineer_system_prompt(self):
"""Build engineer system prompt with plugin extensions."""
if self.engineer_prompt_extensions:
extensions = "\n".join(self.engineer_prompt_extensions)
return self._engineer_base_prompt + f"\n\nPlugin Capabilities:\n{extensions}"
return self._engineer_base_prompt
@property
def architect_system_prompt(self):
"""Build architect system prompt with plugin extensions."""
if self.architect_prompt_extensions:
extensions = "\n".join(self.architect_prompt_extensions)
return self._architect_base_prompt + f"\n\nPlugin Capabilities:\n{extensions}"
return self._architect_base_prompt
def register_ai_tool(self, tool_definition, handler, target="engineer", engineer_prompt=None, architect_prompt=None, status_formatter=None):
"""Register an external tool for the AI system.
Args:
tool_definition (dict): OpenAI-compatible tool definition.
handler (callable): Function(ai_instance, **tool_args) -> str.
target (str): 'engineer', 'architect', or 'both'.
engineer_prompt (str): Extra text for engineer system prompt.
architect_prompt (str): Extra text for architect system prompt.
status_formatter (callable): Function(args_dict) -> status string.
"""
name = tool_definition["function"]["name"]
# Check if already registered to prevent duplicates
if target in ("engineer", "both"):
if not any(t["function"]["name"] == name for t in self.external_engineer_tools):
self.external_engineer_tools.append(tool_definition)
if target in ("architect", "both"):
if not any(t["function"]["name"] == name for t in self.external_architect_tools):
self.external_architect_tools.append(tool_definition)
self.external_tool_handlers[name] = handler
if engineer_prompt and engineer_prompt not in self.engineer_prompt_extensions:
self.engineer_prompt_extensions.append(engineer_prompt)
if architect_prompt and architect_prompt not in self.architect_prompt_extensions:
self.architect_prompt_extensions.append(architect_prompt)
if status_formatter:
self.tool_status_formatters[name] = status_formatter
def _stream_completion(self, model, messages, tools, api_key, status=None, label="", debug=False, chunk_callback=None, **kwargs):
"""Stream a completion call, rendering styled Markdown in real-time.
Returns (response, streamed) where:
- response: reconstructed ModelResponse (same as non-streaming)
- streamed: True if text was rendered to console during streaming
"""
from rich.live import Live
stream_resp = completion(model=model, messages=messages, tools=tools, api_key=api_key, stream=True, **kwargs)
chunks = []
full_content = ""
is_streaming_text = False
has_tool_calls = False
live_display = None
# Determine styling based on current brain
role_label = "Network Architect" if "architect" in label.lower() else "Network Engineer"
alias = "architect" if "architect" in label.lower() else "engineer"
title = f"[bold {alias}]{role_label}[/bold {alias}]"
border = alias
try:
for chunk in stream_resp:
chunks.append(chunk)
delta = chunk.choices[0].delta
# Detect tool calls
if hasattr(delta, 'tool_calls') and delta.tool_calls:
has_tool_calls = True
# Stream text content with styled rendering
if hasattr(delta, 'content') and delta.content:
full_content += delta.content
if chunk and chunk_callback:
# Check for remote interruption during streaming
if hasattr(self, "interrupted") and self.interrupted:
raise KeyboardInterrupt
chunk_callback(delta.content)
if not chunk_callback:
if not is_streaming_text:
# Stop spinner definitively
if status:
try:
status.stop()
except Exception:
pass
# Create a stable, direct Console to bypass _ConsoleProxy recreation bugs
from rich.console import Console as RichConsole
from .printer import connpy_theme, get_original_stdout
stable_console = RichConsole(theme=connpy_theme, file=get_original_stdout())
live_display = Live(
Panel(Markdown(full_content), title=title, border_style=border, expand=False),
console=stable_console,
refresh_per_second=8,
transient=False
)
live_display.start()
is_streaming_text = True
else:
live_display.update(
Panel(Markdown(full_content), title=title, border_style=border, expand=False)
)
except Exception as e:
if not chunks:
raise
finally:
if live_display:
# Render final state with complete content
try:
live_display.update(
Panel(Markdown(full_content), title=title, border_style=border, expand=False)
)
except Exception:
pass
try:
live_display.stop()
except Exception:
pass
# Rebuild complete response from chunks
try:
response = stream_chunk_builder(chunks, messages=messages)
except Exception:
# Fallback: manual reconstruction if stream_chunk_builder fails
full_content_rebuilt = ""
tool_calls_map = {}
for c in chunks:
d = c.choices[0].delta
if hasattr(d, 'content') and d.content:
full_content_rebuilt += d.content
if hasattr(d, 'tool_calls') and d.tool_calls:
for tc in d.tool_calls:
idx = tc.index
if idx not in tool_calls_map:
tool_calls_map[idx] = {"id": tc.id or "", "type": "function", "function": {"name": getattr(tc.function, 'name', '') or '', "arguments": getattr(tc.function, 'arguments', '') or ''}}
else:
if tc.id: tool_calls_map[idx]["id"] = tc.id
if tc.function:
if tc.function.name: tool_calls_map[idx]["function"]["name"] = tc.function.name
if tc.function.arguments: tool_calls_map[idx]["function"]["arguments"] += tc.function.arguments
# Build a minimal response-like object
class FakeFunc:
def __init__(self, name, arguments): self.name = name; self.arguments = arguments
class FakeTC:
def __init__(self, d): self.id = d["id"]; self.function = FakeFunc(d["function"]["name"], d["function"]["arguments"])
def model_dump(self, **kw): return {"id": self.id, "type": "function", "function": {"name": self.function.name, "arguments": self.function.arguments}}
class FakeMsg:
def __init__(self, content, tcs): self.content = content or None; self.tool_calls = tcs if tcs else None; self.role = "assistant"
def model_dump(self, **kw):
d = {"role": "assistant", "content": self.content}
if self.tool_calls: d["tool_calls"] = [tc.model_dump() for tc in self.tool_calls]
return d
class FakeChoice:
def __init__(self, msg): self.message = msg
class FakeResp:
def __init__(self, choice): self.choices = [choice]; self.usage = None
tcs = [FakeTC(tool_calls_map[i]) for i in sorted(tool_calls_map)] if tool_calls_map else None
response = FakeResp(FakeChoice(FakeMsg(full_content_rebuilt or full_content, tcs)))
# Only count as "streamed" if we rendered text AND it was the final response (no tool calls)
streamed = is_streaming_text and not has_tool_calls
return response, streamed
def _sanitize_messages(self, messages):
"""Sanitize message list for strict providers like Gemini.
Ensures that:
1. Every assistant message with tool_calls is followed by ALL its tool responses
2. No user/system messages appear between tool_calls and tool responses
3. Orphaned tool_calls at the end are removed
4. Orphaned tool responses without a preceding tool_call are removed
5. Incompatible metadata like cache_control is stripped for non-Anthropic models
6. Enforces strict alternating history to prevent BadRequestError on Gemini.
"""
if not messages:
return messages
# Pre-process messages to pull text from list contents (Anthropic cache format)
# and remove explicit cache keys.
pre_sanitized = []
for msg in messages:
m = msg.copy() if isinstance(msg, dict) else msg.model_dump(exclude_none=True)
# Convert content list to plain string if it's a system message with caching metadata
if m.get('role') == 'system' and isinstance(m.get('content'), list):
if m['content'] and isinstance(m['content'][0], dict) and m['content'][0].get('text'):
m['content'] = m['content'][0]['text']
else:
m['content'] = ""
# Remove any explicit cache_control key anywhere
if 'cache_control' in m: del m['cache_control']
if isinstance(m.get('content'), list):
for item in m['content']:
if isinstance(item, dict) and 'cache_control' in item: del item['cache_control']
pre_sanitized.append(m)
sanitized = []
last_role = None
i = 0
while i < len(pre_sanitized):
msg = pre_sanitized[i]
role = msg.get('role', '')
if role == 'system':
sanitized.append(msg)
last_role = 'system'
i += 1
elif role == 'user':
if last_role == 'user' and sanitized:
# Combine consecutive user messages
sanitized[-1]['content'] = str(sanitized[-1].get('content', '') or '') + '\n' + str(msg.get('content', '') or '')
else:
sanitized.append(msg)
last_role = 'user'
i += 1
elif role == 'assistant':
has_tools = bool(msg.get('tool_calls'))
# Gemini strict sequence: Assistant MUST be preceded by user or tool.
# If preceded by system, assistant, or if it's the very first message...
if last_role not in ('user', 'tool'):
sanitized.append({"role": "user", "content": "[System sequence separator: History Truncated/Merged]"})
last_role = 'user'
if has_tools:
# Look ahead for matching tool responses
tool_responses = []
j = i + 1
while j < len(pre_sanitized):
next_msg = pre_sanitized[j]
if next_msg.get('role') == 'tool':
tool_responses.append(next_msg)
j += 1
else:
break
if tool_responses:
sanitized.append(msg)
sanitized.extend(tool_responses)
last_role = 'tool'
i = j
else:
# Orphaned tool_calls with no responses - skip the assistant message
# If we just added a dummy user message for this assistant, remove it too
if sanitized and sanitized[-1].get('content') == "[System sequence separator: History Truncated/Merged]":
sanitized.pop()
last_role = sanitized[-1].get('role', '') if sanitized else None
i += 1
else:
sanitized.append(msg)
last_role = 'assistant'
i += 1
elif role == 'tool':
# Orphaned tool response (no preceding assistant with tool_calls) - skip
i += 1
else:
sanitized.append(msg)
last_role = role
i += 1
return sanitized
def _truncate(self, text, limit=None):
"""Truncate text to specified limit, keeping head (60%) and tail (40%)."""
if not isinstance(text, str): return str(text)
final_limit = limit or self.max_truncate
if len(text) <= final_limit: return text
head_limit = int(final_limit * 0.6)
tail_limit = int(final_limit * 0.4)
return (text[:head_limit] + f"\n\n[... OUTPUT TRUNCATED ...]\n\n" + text[-tail_limit:])
def _print_debug_observation(self, fn, obs, status=None):
"""Prints a tool observation in a readable way during debug mode."""
# Try to parse as JSON if it's a string
if isinstance(obs, str):
try:
obs_data = json.loads(obs)
except Exception:
obs_data = obs
else:
obs_data = obs
if isinstance(obs_data, dict):
elements = []
for k, v in obs_data.items():
elements.append(Text(f"{k}:", style="key"))
# Use Text for values to ensure newlines are rendered
val = str(v)
# If it's a multiline string from a delegation task, keep it clean
elements.append(Text(val))
if not elements:
content = Text("Empty data set")
else:
# Add a small spacer instead of a Rule for cleaner look
from rich.console import Group
content = Group(*elements)
elif isinstance(obs_data, list):
content = Text("\n".join(f"{item}" for item in obs_data))
else:
content = Text(str(obs_data))
title = f"[bold]{fn}[/bold]"
# Stop status before printing panel to avoid ghosting
if status:
try: status.stop()
except: pass
self.console.print(Panel(content, title=title, border_style="ai_status"))
# Resume status
if status:
try: status.start()
except: pass
def manage_memory_tool(self, content, action="append"):
"""Save or update long-term memory. Only use when user explicitly requests it."""
if not content or not content.strip():
return "Error: Cannot save empty content to memory."
try:
mode = "a" if action == "append" else "w"
os.makedirs(os.path.dirname(self.memory_path), exist_ok=True)
with open(self.memory_path, mode) as f:
timestamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
f.write(f"\n\n## {timestamp}\n{content.strip()}\n" if action == "append" else content)
# Reload memory after update
with open(self.memory_path, "r") as f:
self.long_term_memory = f.read()
return "Memory updated successfully."
except PermissionError as e:
return f"Error: Permission denied writing to memory file: {e}"
except Exception as e:
return f"Error updating memory: {str(e)}"
def list_nodes_tool(self, filter_pattern=".*"):
"""List nodes matching the filter pattern. Returns metadata for <=5 nodes, names only for more."""
try:
matched_names = self.config._getallnodes(filter_pattern)
if not matched_names: return "No nodes found."
if len(matched_names) <= 5:
matched_data = self.config.getitems(matched_names, extract=True)
res = {}
for name, data in matched_data.items():
os_tag = "unknown"
if isinstance(data, dict):
ts = data.get("tags")
if isinstance(ts, dict): os_tag = ts.get("os", "unknown")
res[name] = {"os": os_tag}
return res
return {"count": len(matched_names), "nodes": matched_names, "note": "Use 'get_node_info' for details."}
except Exception as e:
return f"Error listing nodes: {str(e)}"
def _is_safe_command(self, cmd):
"""Check if a command matches safe patterns."""
return any(re.match(pattern, cmd.strip(), re.IGNORECASE) for pattern in self.safe_commands)
def run_commands_tool(self, nodes_filter, commands, status=None):
"""Execute commands on nodes matching the filter. Native interactive confirmation for unsafe commands."""
# Handle if commands is a JSON string
if isinstance(commands, str):
try:
commands = json.loads(commands)
except ValueError:
commands = [c.strip() for c in commands.split('\n') if c.strip()]
# Expand multi-line commands within a list (in case the AI packs them)
if isinstance(commands, list):
expanded_commands = []
for cmd in commands:
expanded_commands.extend([c.strip() for c in str(cmd).split('\n') if c.strip()])
commands = expanded_commands
else:
commands = [str(commands)]
# Check command safety natively
if not self.trusted_session:
unsafe_commands = [cmd for cmd in commands if not self._is_safe_command(cmd)]
if unsafe_commands:
# Stop the spinner so prompt doesn't get messed up
if status: status.stop()
# Show ALL commands with unsafe ones highlighted
formatted_cmds = []
for cmd in commands:
if cmd in unsafe_commands:
formatted_cmds.append(f" • [warning]{cmd}[/warning]")
else:
formatted_cmds.append(f"{cmd}")
panel_content = f"Target: {nodes_filter}\nCommands:\n" + "\n".join(formatted_cmds)
# Use print_important if available (for remote bridges) fallback to standard print
print_fn = getattr(self.console, "print_important", self.console.print)
print_fn(Panel(panel_content, title="[bold warning]⚠️ UNSAFE COMMANDS DETECTED[/bold warning]", border_style="warning"))
try:
user_resp = self.confirm_handler("[bold warning]Execute? (y: yes / n: no / a: allow all this session / <text>: feedback)[/bold warning]", default="n")
except KeyboardInterrupt:
if status: status.update("[ai_status]Engineer: Resuming...")
self.console.print("[fail]✗ Aborted by user (Ctrl+C).[/fail]")
raise
# Resume the spinner
if status: status.update("[ai_status]Engineer: Processing user response...")
user_resp_lower = user_resp.strip().lower()
if user_resp_lower in ['a', 'allow']:
self.trusted_session = True
self.console.print("[pass]✓ Trust Mode Enabled. All future commands in this session will execute without confirmation.[/pass]")
elif user_resp_lower in ['y', 'yes']:
self.console.print("[pass]✓ Executing...[/pass]")
elif user_resp_lower in ['n', 'no', '']:
self.console.print("[fail]✗ Execution rejected by user.[/fail]")
return "Error: User rejected execution."
else:
self.console.print(f"[user_prompt]User feedback: [/user_prompt]{user_resp}")
return f"User requested changes: {user_resp}. Please adjust the commands based on this feedback and try again."
try:
matched_names = self.config._getallnodes(nodes_filter)
if not matched_names: return "No nodes found matching filter."
thisnodes_dict = self.config.getitems(matched_names, extract=True)
result = nodes(thisnodes_dict, config=self.config).run(commands)
return result
except Exception as e:
return f"Error executing commands: {str(e)}"
def get_node_info_tool(self, node_name):
"""Get detailed metadata for a specific node. Passwords are masked."""
try:
d = self.config.getitem(node_name, extract=True)
if 'password' in d: d['password'] = '***'
return d
except Exception as e:
return f"Error getting node info: {str(e)}"
def _engineer_loop(self, task, status=None, debug=False, chat_history=None):
"""Internal loop where the Engineer executes technical tasks for the Architect."""
# Optimización de caché para el Ingeniero (Solo para Anthropic directo, Vertex tiene reglas distintas)
if "claude" in self.engineer_model.lower() and "vertex" not in self.engineer_model.lower():
messages = [{"role": "system", "content": [{"type": "text", "text": self.engineer_system_prompt, "cache_control": {"type": "ephemeral"}}]}]
else:
messages = [{"role": "system", "content": self.engineer_system_prompt}]
if chat_history:
# Clean chat history from caching metadata if engineer is not a compatible Claude model
if "claude" not in self.engineer_model.lower() or "vertex" in self.engineer_model.lower():
messages.extend(self._sanitize_messages(chat_history[-5:]))
else:
messages.extend(chat_history[-5:])
messages.append({"role": "user", "content": f"MISSION: {task}"})
tools = self._get_engineer_tools()
usage = {"input": 0, "output": 0, "total": 0}
iteration = 0
soft_limit_warned = False
try:
# Set up remote interrupt callback if bridge is provided
if status and hasattr(status, "on_interrupt"):
status.on_interrupt = lambda: setattr(self, "interrupted", True)
while iteration < self.hard_limit_iterations:
iteration += 1
# Check for interruption
if self.interrupted:
raise KeyboardInterrupt
# Soft limit warning
if iteration == self.soft_limit_iterations and not soft_limit_warned:
self.console.print(f"[warning]⚠ Engineer has performed {iteration} steps. This is taking longer than expected.[/warning]")
self.console.print(f"[warning] You can press Ctrl+C to interrupt and get a summary.[/warning]")
soft_limit_warned = True
if status and not chat_history: status.update(f"[ai_status]Engineer: Analyzing mission... (step {iteration})")
try:
safe_messages = self._sanitize_messages(messages)
response = completion(model=self.engineer_model, messages=safe_messages, tools=tools, api_key=self.engineer_key)
except Exception as e:
if status: status.stop()
raise ValueError(f"Engineer failed to connect: {str(e)}")
if hasattr(response, "usage") and response.usage:
usage["input"] += getattr(response.usage, "prompt_tokens", 0)
usage["output"] += getattr(response.usage, "completion_tokens", 0)
usage["total"] += getattr(response.usage, "total_tokens", 0)
resp_msg = response.choices[0].message
msg_dict = resp_msg.model_dump(exclude_none=True)
if msg_dict.get("tool_calls") and msg_dict.get("content") == "": msg_dict["content"] = None
messages.append(msg_dict)
if not resp_msg.tool_calls: break
for tc in resp_msg.tool_calls:
fn, args = tc.function.name, json.loads(tc.function.arguments)
# Notificación en tiempo real de la tarea técnica (Only if not in Architect loop)
if status and not chat_history:
if fn == "list_nodes": status.update(f"[ai_status]Engineer: [SEARCH] {args.get('filter_pattern','.*')}")
elif fn == "run_commands":
cmds = args.get('commands', [])
cmd_str = cmds[0] if cmds else ""
status.update(f"[ai_status]Engineer: [CMD] {cmd_str}")
elif fn == "get_node_info": status.update(f"[ai_status]Engineer: [INSPECT] {args.get('node_name','')}")
elif fn in self.tool_status_formatters: status.update(self.tool_status_formatters[fn](args))
if debug:
self._print_debug_observation(f"Decision: {fn}", args, status=status)
if fn == "list_nodes": obs = self.list_nodes_tool(**args)
elif fn == "run_commands": obs = self.run_commands_tool(**args, status=status)
elif fn == "get_node_info": obs = self.get_node_info_tool(**args)
elif fn in self.external_tool_handlers: obs = self.external_tool_handlers[fn](self, **args)
else: obs = f"Error: Unknown tool '{fn}'."
if debug:
self._print_debug_observation(f"Observation: {fn}", obs, status=status)
# Ensure observation is a string and truncated for the LLM
obs_str = obs if isinstance(obs, str) else json.dumps(obs)
messages.append({"tool_call_id": tc.id, "role": "tool", "name": fn, "content": self._truncate(obs_str)})
if iteration >= self.hard_limit_iterations:
self.console.print(f"[error]⛔ Engineer reached hard limit ({self.hard_limit_iterations} steps). Forcing stop.[/error]")
if debug and resp_msg.content:
self.console.print(Panel(Text(resp_msg.content), title="[bold engineer]Engineer Final Report to Architect[/bold engineer]", border_style="engineer"))
return resp_msg.content, usage
except Exception as e:
return f"Engineer failed: {str(e)}", usage
def _get_engineer_tools(self):
"""Define tools available to the Engineer."""
base_tools = [
{"type": "function", "function": {"name": "list_nodes", "description": "Lists available nodes in the inventory.", "parameters": {"type": "object", "properties": {"filter_pattern": {"type": "string", "description": "Regex to filter nodes (e.g. '.*', 'border.*')."}}}}},
{"type": "function", "function": {"name": "run_commands", "description": "Runs one or more commands on matched nodes. MANDATORY: You MUST call 'list_nodes' first to verify the target list.", "parameters": {"type": "object", "properties": {"nodes_filter": {"type": "string", "description": "Exact node name or verified filter pattern."}, "commands": {"type": "array", "items": {"type": "string"}, "description": "List of commands (e.g. ['show ip route', 'show int desc'])."}}, "required": ["nodes_filter", "commands"]}}},
{"type": "function", "function": {"name": "get_node_info", "description": "Gets full metadata for a specific node.", "parameters": {"type": "object", "properties": {"node_name": {"type": "string"}}, "required": ["node_name"]}}}
]
if self.architect_key:
base_tools.extend([
{"type": "function", "function": {"name": "consult_architect", "description": "Ask the Strategic Reasoning Engine for advice on complex design, architecture, or troubleshooting decisions. You remain in control and will present the response to the user. Use this for: configuration planning, design validation, complex troubleshooting.", "parameters": {"type": "object", "properties": {"question": {"type": "string", "description": "Strategic question or decision needed."}, "technical_summary": {"type": "string", "description": "Technical findings and context gathered so far."}}, "required": ["question", "technical_summary"]}}},
{"type": "function", "function": {"name": "escalate_to_architect", "description": "Transfer full control to the Strategic Reasoning Engine. Use ONLY when the user explicitly requests the Architect or when the problem requires strategic oversight beyond consultation. After escalation, the Architect takes over the conversation.", "parameters": {"type": "object", "properties": {"reason": {"type": "string", "description": "Why you're escalating (e.g. 'User requested Architect', 'Complex multi-site design needed')."}, "context": {"type": "string", "description": "Full context and findings to hand over."}}, "required": ["reason", "context"]}}}
])
# Deduplicate by name to prevent Gemini BadRequestError
all_tools = base_tools + self.external_engineer_tools
seen_names = set()
unique_tools = []
for t in all_tools:
name = t["function"]["name"]
if name not in seen_names:
unique_tools.append(t)
seen_names.add(name)
return unique_tools
def _get_architect_tools(self):
"""Define tools available to the Strategic Reasoning Engine."""
base_tools = [
{"type": "function", "function": {"name": "delegate_to_engineer", "description": "Delegates a technical mission to the Engineer.", "parameters": {"type": "object", "properties": {"task": {"type": "string", "description": "Detailed technical mission or goal."}}, "required": ["task"]}}},
{"type": "function", "function": {"name": "return_to_engineer", "description": "Return control to the Engineer. Use this when your strategic analysis is complete and the Engineer should handle the rest of the conversation.", "parameters": {"type": "object", "properties": {"summary": {"type": "string", "description": "Brief summary of your analysis to hand over to the Engineer."}}, "required": ["summary"]}}},
{"type": "function", "function": {"name": "manage_memory_tool", "description": "Saves information to long-term memory. MANDATORY: Only use this if the user explicitly asks to remember or save something.", "parameters": {"type": "object", "properties": {"content": {"type": "string"}, "action": {"type": "string", "enum": ["append", "replace"]}}, "required": ["content"]}}}
]
all_tools = base_tools + self.external_architect_tools
seen_names = set()
unique_tools = []
for t in all_tools:
name = t["function"]["name"]
if name not in seen_names:
unique_tools.append(t)
seen_names.add(name)
return unique_tools
def _get_sessions(self):
"""Returns a list of session metadata sorted by date."""
sessions = []
if not os.path.exists(self.sessions_dir):
return []
for f in os.listdir(self.sessions_dir):
if f.endswith(".json"):
path = os.path.join(self.sessions_dir, f)
try:
with open(path, "r") as fs:
data = json.load(fs)
sessions.append({
"id": f[:-5],
"title": data.get("title", "Untitled Session"),
"created_at": data.get("created_at", "Unknown"),
"model": data.get("model", "Unknown"),
"path": path
})
except Exception:
continue
return sorted(sessions, key=lambda x: x["created_at"], reverse=True)
def list_sessions(self):
"""Prints a list of sessions using printer.table."""
sessions = self._get_sessions()
if not sessions:
printer.info("No saved AI sessions found.")
return
columns = ["ID", "Title", "Created At", "Model"]
rows = [[s["id"], s["title"], s["created_at"], s["model"]] for s in sessions]
printer.table("AI Persisted Sessions", columns, rows)
def load_session_data(self, session_id):
"""Loads a session's raw data by ID."""
path = os.path.join(self.sessions_dir, f"{session_id}.json")
if os.path.exists(path):
try:
with open(path, "r") as f:
data = json.load(f)
self.session_id = session_id
self.session_path = path
return data
except Exception as e:
printer.error(f"Failed to load session {session_id}: {e}")
return None
def delete_session(self, session_id):
"""Deletes a session by ID."""
path = os.path.join(self.sessions_dir, f"{session_id}.json")
if os.path.exists(path):
os.remove(path)
printer.success(f"Session {session_id} deleted.")
else:
printer.error(f"Session {session_id} not found.")
def get_last_session_id(self):
"""Returns the ID of the most recent session."""
sessions = self._get_sessions()
return sessions[0]["id"] if sessions else None
def _generate_session_id(self, query):
"""Generates a unique session ID based on timestamp."""
return datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
def save_session(self, history, title=None, model=None):
"""Saves current history to the session file."""
if not self.session_id:
# Generate ID from first user query if available
first_user_msg = next((m["content"] for m in history if m["role"] == "user"), "new-session")
self.session_id = self._generate_session_id(first_user_msg)
self.session_path = os.path.join(self.sessions_dir, f"{self.session_id}.json")
# If it's a new file, we might want to set a better title
if not os.path.exists(self.session_path) and not title:
raw_title = next((m["content"] for m in history if m["role"] == "user"), "New Session")
# Clean title: remove newlines, multiple spaces
clean_title = " ".join(raw_title.split())
if len(clean_title) > 40:
title = clean_title[:37].strip() + "..."
else:
title = clean_title
try:
# Read existing metadata if it exists
metadata = {}
if os.path.exists(self.session_path):
with open(self.session_path, "r") as f:
metadata = json.load(f)
metadata.update({
"id": self.session_id,
"title": title or metadata.get("title", "New Session"),
"created_at": metadata.get("created_at", datetime.datetime.now().isoformat()),
"updated_at": datetime.datetime.now().isoformat(),
"model": model or metadata.get("model", self.engineer_model),
"history": history
})
with open(self.session_path, "w") as f:
json.dump(metadata, f, indent=4)
except Exception as e:
printer.error(f"Failed to save session: {e}")
except Exception as e:
printer.error(f"Failed to save session: {e}")
@MethodHook
def ask(self, user_input, dryrun=False, chat_history=None, status=None, debug=False, stream=True, session_id=None, chunk_callback=None):
if not self.engineer_key:
raise ValueError("Engineer API key not configured. Use 'connpy config --engineer-api-key <key>' to set it.")
if chat_history is None: chat_history = []
# Load session if provided and history is empty
if session_id and not chat_history:
session_data = self.load_session_data(session_id)
if session_data:
chat_history = session_data.get("history", [])
# If we loaded history, the caller might need it back
# But typically ask() is called in a loop with an external history object
usage = {"input": 0, "output": 0, "total": 0}
# 1. Selector de Rol inicial (Sticky Brain)
explicit_architect = re.match(r'^(architect|arquitecto|@architect)[:\s]', user_input, re.I)
explicit_engineer = re.match(r'^(engineer|ingeniero|@engineer)[:\s]', user_input, re.I)
if explicit_architect:
current_brain = "architect"
elif explicit_engineer:
current_brain = "engineer"
else:
# Sticky Brain: Detectar si el Arquitecto estaba al mando en el historial reciente
is_architect_active = False
for msg in reversed(chat_history[-5:]):
tcs = msg.get('tool_calls') if isinstance(msg, dict) else getattr(msg, 'tool_calls', None)
if tcs:
for tc in tcs:
fn = tc.get('function', {}).get('name') if isinstance(tc, dict) else getattr(getattr(tc, 'function', None), 'name', '')
# Architect stays in control if delegating tasks or if Engineer escalated to them
# consult_architect is just Engineer asking for advice - Engineer keeps control
if fn in ['delegate_to_engineer', 'escalate_to_architect']:
is_architect_active = True; break
if is_architect_active: break
current_brain = "architect" if is_architect_active else "engineer"
# 2. Preparación de mensajes y limpieza
clean_input = re.sub(r'^(architect|arquitecto|engineer|ingeniero|@architect|@engineer)[:\s]+', '', user_input, flags=re.IGNORECASE).strip()
system_prompt = self.architect_system_prompt if current_brain == "architect" else self.engineer_system_prompt
tools = self._get_architect_tools() if current_brain == "architect" else self._get_engineer_tools()
model = self.architect_model if current_brain == "architect" else self.engineer_model
key = self.architect_key if current_brain == "architect" else self.engineer_key
# Estructura optimizada para Prompt Caching (Solo para Anthropic directo, Vertex tiene reglas distintas)
if "claude" in model.lower() and "vertex" not in model.lower():
messages = [{"role": "system", "content": [{"type": "text", "text": system_prompt, "cache_control": {"type": "ephemeral"}}]}]
else:
messages = [{"role": "system", "content": system_prompt}]
# Interleaving de historial
last_role = "system"
# Sanitize history if the current target model is not compatible with cache_control
history_to_process = chat_history[-self.max_history:]
if "claude" not in model.lower() or "vertex" in model.lower():
history_to_process = self._sanitize_messages(history_to_process)
for msg in history_to_process:
m = msg if isinstance(msg, dict) else msg.model_dump(exclude_none=True)
role = m.get('role')
if role == last_role and role == 'user':
messages[-1]['content'] += "\n" + (m.get('content') or "")
continue
if role == 'assistant' and m.get('tool_calls') and m.get('content') == "": m['content'] = None
messages.append(m)
last_role = role
if last_role == 'user': messages[-1]['content'] += "\n" + clean_input
else: messages.append({"role": "user", "content": clean_input})
# 3. Bucle de ejecución
iteration = 0
try:
# Set up remote interrupt callback if bridge is provided
if status and hasattr(status, "on_interrupt"):
status.on_interrupt = lambda: setattr(self, "interrupted", True)
while iteration < self.hard_limit_iterations:
iteration += 1
# Check for interruption
if self.interrupted:
raise KeyboardInterrupt
# Soft limit warning
if iteration == self.soft_limit_iterations and not soft_limit_warned:
self.console.print(f"[warning]⚠ Agent has performed {iteration} steps. This is taking longer than expected.[/warning]")
self.console.print(f"[warning] You can press Ctrl+C to interrupt and get a summary of progress.[/warning]")
soft_limit_warned = True
label = "[architect][bold]Architect[/bold][/architect]" if current_brain == "architect" else "[engineer][bold]Engineer[/bold][/engineer]"
if status:
# Notify responder identity ONLY for web/remote clients (StatusBridge has is_web)
if getattr(status, "is_web", False):
status.update(f"__RESPONDER__:{current_brain}")
status.update(f"{label} is thinking... (step {iteration})")
streamed_response = False
try:
safe_messages = self._sanitize_messages(messages)
if stream:
response, streamed_response = self._stream_completion(
model=model, messages=safe_messages, tools=tools, api_key=key,
status=status, label=label, debug=debug, num_retries=3,
chunk_callback=chunk_callback
)
else:
response = completion(model=model, messages=safe_messages, tools=tools, api_key=key, num_retries=3)
except Exception as e:
if current_brain == "architect":
if status: status.update("[unavailable]Architect unavailable! Falling back to Engineer...")
# Preserve context when falling back - use clean_input directly
current_brain = "engineer"
model = self.engineer_model
tools = self._get_engineer_tools()
key = self.engineer_key
# Rebuild messages with Engineer system prompt and original user request
messages = [{"role": "system", "content": self.engineer_system_prompt}]
# Add chat history if exists (excluding system prompt)
if chat_history:
for msg in chat_history[-self.max_history:]:
if msg.get('role') != 'system':
messages.append(msg)
# Add current user request
messages.append({"role": "user", "content": clean_input})
continue
else:
return {"response": f"Error: Both engines failed. {str(e)}", "chat_history": messages[1:], "usage": usage}
if hasattr(response, "usage") and response.usage:
usage["input"] += getattr(response.usage, "prompt_tokens", 0)
usage["output"] += getattr(response.usage, "completion_tokens", 0)
usage["total"] += getattr(response.usage, "total_tokens", 0)
resp_msg = response.choices[0].message
msg_dict = resp_msg.model_dump(exclude_none=True)
if msg_dict.get("tool_calls") and msg_dict.get("content") == "": msg_dict["content"] = None
messages.append(msg_dict)
if debug and resp_msg.content and not streamed_response:
# In CLI debug mode, only print intermediate reasoning if there are tool calls AND it wasn't already streamed.
# If there are no tool calls, this content is the final answer and will be printed by the caller.
if resp_msg.tool_calls:
if status:
try: status.stop()
except: pass
self.console.print(Panel(Markdown(resp_msg.content), title=f"[{current_brain}][bold]{label} Reasoning[/bold][/{current_brain}]", border_style="architect" if current_brain == "architect" else "engineer"))
if status:
try: status.start()
except: pass
if not resp_msg.tool_calls: break
# Track if we need to inject a user message after all tool responses
pending_user_message = None
for tc in resp_msg.tool_calls:
fn, args = tc.function.name, json.loads(tc.function.arguments)
# Validate tool access based on current brain
if fn in ['delegate_to_engineer'] and current_brain != "architect":
obs = f"Error: Tool '{fn}' is only available to the Architect (Architect). You are the Engineer (Engineer). Use 'run_commands' directly to execute configuration."
messages.append({"tool_call_id": tc.id, "role": "tool", "name": fn, "content": obs})
continue
if status:
if fn == "delegate_to_engineer": status.update(f"[architect]Architect: [DELEGATING MISSION] {args.get('task','')[:40]}...")
elif fn == "manage_memory_tool": status.update(f"[architect]Architect: [UPDATING MEMORY]")
if debug:
self._print_debug_observation(f"Decision: {fn}", args, status=status)
if fn == "delegate_to_engineer":
obs, eng_usage = self._engineer_loop(args["task"], status=status, debug=debug, chat_history=messages[:-1])
usage["input"] += eng_usage["input"]; usage["output"] += eng_usage["output"]; usage["total"] += eng_usage["total"]
elif fn == "consult_architect":
if status: status.update("[architect]Engineer consulting Architect...")
try:
# Consultation only - Engineer stays in control
claude_resp = completion(
model=self.architect_model,
messages=[
{"role": "system", "content": self.architect_system_prompt},
{"role": "user", "content": f"The Engineer needs your strategic advice.\n\nTECHNICAL SUMMARY: {args['technical_summary']}\n\nQUESTION: {args['question']}\n\nProvide strategic guidance. The Engineer will continue handling the user."}
],
api_key=self.architect_key,
num_retries=3
)
obs = claude_resp.choices[0].message.content
if debug:
if status:
try: status.stop()
except: pass
self.console.print(Panel(Markdown(obs), title="[architect]Architect Consultation[/architect]", border_style="architect"))
if status:
try: status.start()
except: pass
except Exception as e:
if status: status.update("[unavailable]Architect unavailable! Engineer continuing alone...")
obs = f"Architect unavailable ({str(e)}). Proceeding with your best technical judgment."
elif fn == "escalate_to_architect":
if status: status.update("[architect]Transferring control to Architect...")
# Full escalation - Architect takes over
current_brain = "architect"
model = self.architect_model
tools = self._get_architect_tools()
key = self.architect_key
messages[0] = {"role": "system", "content": self.architect_system_prompt}
# Prepare handover context to inject AFTER all tool responses
handover_msg = f"HANDOVER FROM EXECUTION ENGINE\n\nReason: {args['reason']}\n\nContext: {args['context']}\n\nYou are now in control of this conversation."
pending_user_message = handover_msg
obs = "Control transferred to Architect. Handover context will be provided."
if debug:
if status:
try: status.stop()
except: pass
self.console.print(Panel(Text(handover_msg), title="[architect]Escalation to Architect[/architect]", border_style="architect"))
if status:
try: status.start()
except: pass
elif fn == "return_to_engineer":
if status: status.update("[engineer]Transferring control back to Engineer...")
# Architect returns control to Engineer
current_brain = "engineer"
model = self.engineer_model
tools = self._get_engineer_tools()
key = self.engineer_key
messages[0] = {"role": "system", "content": self.engineer_system_prompt}
# Prepare handover context to inject AFTER all tool responses
handover_msg = f"HANDOVER FROM ARCHITECT\n\nSummary: {args['summary']}\n\nYou are now back in control. Continue handling the user's requests."
pending_user_message = handover_msg
obs = "Control returned to Engineer. Handover summary will be provided."
if debug:
if status:
try: status.stop()
except: pass
self.console.print(Panel(Text(handover_msg), title="[engineer]Return to Engineer[/engineer]", border_style="engineer"))
if status:
try: status.start()
except: pass
elif fn == "list_nodes": obs = self.list_nodes_tool(**args)
elif fn == "run_commands": obs = self.run_commands_tool(**args, status=status)
elif fn == "get_node_info": obs = self.get_node_info_tool(**args)
elif fn == "manage_memory_tool": obs = self.manage_memory_tool(**args)
elif fn in self.external_tool_handlers: obs = self.external_tool_handlers[fn](self, **args)
else: obs = f"Error: {fn} unknown."
if debug and fn not in ["delegate_to_engineer", "consult_architect", "escalate_to_architect", "return_to_engineer"]:
self._print_debug_observation(f"Observation: {fn}", obs, status=status)
# Ensure observation is a string and truncated for the LLM
obs_str = obs if isinstance(obs, str) else json.dumps(obs)
messages.append({"tool_call_id": tc.id, "role": "tool", "name": fn, "content": self._truncate(obs_str)})
# Inject pending user message AFTER all tool responses are added
if pending_user_message:
messages.append({"role": "user", "content": pending_user_message})
if iteration >= self.hard_limit_iterations:
self.console.print(f"[error]⛔ Agent reached hard limit ({self.hard_limit_iterations} steps). Forcing stop to prevent infinite loop.[/error]")
# Only inject user message if we're not in the middle of tool calls
last_msg = messages[-1] if messages else {}
if last_msg.get("role") != "assistant" or not last_msg.get("tool_calls"):
messages.append({"role": "user", "content": "Hard iteration limit reached. Please provide a summary of your findings so far."})
try:
safe_messages = self._sanitize_messages(messages)
response = completion(model=model, messages=safe_messages, tools=[], api_key=key)
resp_msg = response.choices[0].message
messages.append(resp_msg.model_dump(exclude_none=True))
except Exception as e:
if status:
status.update(f"[error]Error fetching summary: {e}[/error]")
printer.warning(f"Failed to fetch final summary from LLM: {e}")
except KeyboardInterrupt:
if status: status.update("[error]Interrupted! Closing pending tasks...")
last_msg = messages[-1]
if last_msg.get("tool_calls"):
for tc in last_msg["tool_calls"]:
messages.append({"tool_call_id": tc.get("id"), "role": "tool", "name": tc.get("function", {}).get("name"), "content": "Operation cancelled by user."})
# Use a fresh list for the summary call to avoid history corruption
summary_messages = list(messages)
summary_messages.append({"role": "user", "content": "USER INTERRUPTED. Briefly summarize what you were doing and stop."})
try:
safe_messages = self._sanitize_messages(summary_messages)
# Use tools=None to force a text summary during interruption
response = completion(model=model, messages=safe_messages, tools=None, api_key=key)
resp_msg = response.choices[0].message
messages.append(resp_msg.model_dump(exclude_none=True))
# IMPORTANT: Manually trigger callback for the summary so Web UI sees it
if chunk_callback and resp_msg.content:
chunk_callback(resp_msg.content)
except Exception:
error_msg = "Operation interrupted by user. Summary unavailable."
messages.append({"role": "assistant", "content": error_msg})
if chunk_callback:
chunk_callback(error_msg)
finally:
# Auto-save session
self.save_session(messages, model=model)
return {
"response": messages[-1].get("content"),
"chat_history": messages[1:],
"app_related": True,
"usage": usage,
"responder": current_brain, # "architect" or "engineer"
"streamed": streamed_response
}
@MethodHook
def ask_copilot(self, terminal_buffer, user_question, node_info=None, chunk_callback=None):
"""Single-shot copilot for augmented terminal sessions.
Args:
terminal_buffer: Sanitized terminal screen content (últimas N líneas).
user_question: Pregunta del usuario sobre la sesión activa.
node_info: Optional dict con metadata del nodo (os, name, etc.)
chunk_callback: Optional callable for streaming the guide.
Returns:
dict: {commands: list[str], guide: str, risk_level: str, error: str|None}
"""
import json
import re
node_info = node_info or {}
os_info = node_info.get("os", "unknown")
node_name = node_info.get("name", "unknown")
# Load vendor-specific command reference if available
vendor_reference = ""
if os_info and os_info != "unknown":
try:
os_filename = os_info.lower().replace(" ", "_")
ref_path = os.path.join(self.config.defaultdir, "ai_references", f"{os_filename}.md")
if os.path.exists(ref_path):
with open(ref_path, "r") as f:
vendor_reference = f.read().strip()
except Exception:
pass
system_prompt = f"""Role: TERMINAL COPILOT. You assist a network engineer during a live SSH session.
Rules:
1. Answer the user's question directly based on the Terminal Context.
2. If the user asks you to analyze, parse, or extract data from the Terminal Context, DO IT directly in the <guide> section (you can use markdown tables or lists). Do NOT just give them a command to do it themselves.
3. If the user wants to execute an action, provide the required CLI commands inside a <commands> block, one command per line. If no commands are needed, leave it empty or omit the block.
4. ULTRA-CONCISE. Keep your guide to the point.
5. You MUST output your response in the following strict format:
<guide>
Your brief tactical guide in markdown. 3-4 sentences max.
</guide>
<commands>
command 1
command 2
</commands>
<risk>
low, high, or destructive
</risk>
6. Risk level: "low" for read-only/no commands, "high" for config changes, "destructive" for potentially dangerous ops.
Terminal Context:
{terminal_buffer}
Device OS: {os_info}
Node: {node_name}"""
if vendor_reference:
system_prompt += f"\n\nVendor Command Reference:\n{vendor_reference}"
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_question}
]
try:
response = completion(
model=self.engineer_model,
messages=messages,
api_key=self.engineer_key,
stream=True
)
full_content = ""
streamed_guide = ""
for chunk in response:
delta = chunk.choices[0].delta
if hasattr(delta, 'content') and delta.content:
full_content += delta.content
if chunk_callback:
start_idx = full_content.find("<guide>")
if start_idx != -1:
after_start = full_content[start_idx + 7:]
end_idx = after_start.find("</guide>")
if end_idx != -1:
current_guide = after_start[:end_idx]
else:
current_guide = after_start
if current_guide.endswith("<"): current_guide = current_guide[:-1]
elif current_guide.endswith("</"): current_guide = current_guide[:-2]
elif current_guide.endswith("</g"): current_guide = current_guide[:-3]
elif current_guide.endswith("</gu"): current_guide = current_guide[:-4]
elif current_guide.endswith("</gui"): current_guide = current_guide[:-5]
elif current_guide.endswith("</guid"): current_guide = current_guide[:-6]
elif current_guide.endswith("</guide"): current_guide = current_guide[:-7]
new_text = current_guide[len(streamed_guide):]
if new_text:
chunk_callback(new_text)
streamed_guide += new_text
guide = ""
commands = []
risk_level = "low"
guide_match = re.search(r"<guide>(.*?)</guide>", full_content, re.DOTALL)
if guide_match:
guide = guide_match.group(1).strip()
cmd_match = re.search(r"<commands>(.*?)</commands>", full_content, re.DOTALL)
if cmd_match:
cmds_raw = cmd_match.group(1).strip()
if cmds_raw:
commands = [c.strip() for c in cmds_raw.split('\n') if c.strip()]
risk_match = re.search(r"<risk>(.*?)</risk>", full_content, re.DOTALL)
if risk_match:
risk_level = risk_match.group(1).strip().lower()
if not guide and full_content and not ("<guide>" in full_content):
guide = full_content.strip()
return {
"commands": commands,
"guide": guide,
"risk_level": risk_level,
"error": None
}
except Exception as e:
return {
"commands": [],
"guide": "",
"risk_level": "low",
"error": str(e)
}
@MethodHook
async def aask_copilot(self, terminal_buffer, user_question, node_info=None, chunk_callback=None):
import json
import re
from litellm import acompletion
import asyncio
import warnings
import aiohttp
# Suppress unawaited coroutine warnings from LiteLLM's internal streaming logic during sudden cancellation
warnings.filterwarnings("ignore", message="coroutine '.*async_streaming.*' was never awaited", category=RuntimeWarning)
node_info = node_info or {}
os_info = node_info.get("os", "unknown")
node_name = node_info.get("name", "unknown")
vendor_reference = ""
if os_info and os_info != "unknown":
try:
os_filename = os_info.lower().replace(" ", "_")
ref_path = os.path.join(self.config.defaultdir, "ai_references", f"{os_filename}.md")
if os.path.exists(ref_path):
with open(ref_path, "r") as f:
vendor_reference = f.read().strip()
except Exception:
pass
system_prompt = f"""Role: TERMINAL COPILOT. You assist a network engineer during a live SSH session.
Rules:
1. Answer the user's question directly based on the Terminal Context.
2. If the user asks you to analyze, parse, or extract data from the Terminal Context, DO IT directly in the <guide> section (you can use markdown tables or lists). Do NOT just give them a command to do it themselves.
3. If the user wants to execute an action, provide the required CLI commands inside a <commands> block, one command per line. If no commands are needed, leave it empty or omit the block.
4. ULTRA-CONCISE. Keep your guide to the point.
5. You MUST output your response in the following strict format:
<guide>
Your brief tactical guide in markdown. 3-4 sentences max.
</guide>
<commands>
command 1
command 2
</commands>
<risk>
low, high, or destructive
</risk>
6. Risk level: "low" for read-only/no commands, "high" for config changes, "destructive" for potentially dangerous ops.
Terminal Context:
{terminal_buffer}
Device OS: {os_info}
Node: {node_name}"""
if vendor_reference:
system_prompt += f"\n\nVendor Command Reference:\n{vendor_reference}"
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_question}
]
try:
response = await acompletion(
model=self.engineer_model,
messages=messages,
api_key=self.engineer_key,
stream=True
)
full_content = ""
streamed_guide = ""
async for chunk in response:
delta = chunk.choices[0].delta
if hasattr(delta, 'content') and delta.content:
full_content += delta.content
if chunk_callback:
start_idx = full_content.find("<guide>")
if start_idx != -1:
after_start = full_content[start_idx + 7:]
end_idx = after_start.find("</guide>")
if end_idx != -1:
current_guide = after_start[:end_idx]
else:
current_guide = after_start
if current_guide.endswith("<"): current_guide = current_guide[:-1]
elif current_guide.endswith("</"): current_guide = current_guide[:-2]
elif current_guide.endswith("</g"): current_guide = current_guide[:-3]
elif current_guide.endswith("</gu"): current_guide = current_guide[:-4]
elif current_guide.endswith("</gui"): current_guide = current_guide[:-5]
elif current_guide.endswith("</guid"): current_guide = current_guide[:-6]
elif current_guide.endswith("</guide"): current_guide = current_guide[:-7]
new_text = current_guide[len(streamed_guide):]
if new_text:
chunk_callback(new_text)
streamed_guide += new_text
guide = ""
commands = []
risk_level = "low"
guide_match = re.search(r"<guide>(.*?)</guide>", full_content, re.DOTALL)
if guide_match:
guide = guide_match.group(1).strip()
cmd_match = re.search(r"<commands>(.*?)</commands>", full_content, re.DOTALL)
if cmd_match:
cmds_raw = cmd_match.group(1).strip()
if cmds_raw:
commands = [c.strip() for c in cmds_raw.split('\n') if c.strip()]
risk_match = re.search(r"<risk>(.*?)</risk>", full_content, re.DOTALL)
if risk_match:
risk_level = risk_match.group(1).strip().lower()
if not guide and full_content and not ("<guide>" in full_content):
guide = full_content.strip()
return {
"commands": commands,
"guide": guide,
"risk_level": risk_level,
"error": None
}
except asyncio.CancelledError:
# Client cancelled the request via gRPC or local interrupt
if 'response' in locals():
try:
if hasattr(response, 'aclose'):
# Fire and forget the close to avoid blocking the cancel
asyncio.create_task(response.aclose())
elif hasattr(response, 'close'):
response.close()
except Exception:
pass
return None
except Exception as e:
return {
"commands": [],
"guide": "",
"risk_level": "low",
"error": str(e)
}
@MethodHook
def confirm(self, user_input): return True