JaatoSession

Per-agent conversation state in the jaato framework. JaatoSession manages individual conversation history, tool execution, and model interaction for a single agent (main or subagent).

from shared import JaatoSession
Module shared.jaato_session
Parent JaatoRuntime
Wrapped by JaatoClient (facade)

When to Use JaatoSession

Sessions are created via JaatoRuntime.create_session(). Use sessions directly when you need to:

  • Manage multiple independent conversations (subagents)
  • Access session-specific state
  • Set per-session retry callbacks or streaming settings
  • Work with subagent plugin implementations
Creating sessions
# Via JaatoClient (most common)
client = JaatoClient()
client.connect()  # Reads JAATO_PROVIDER and MODEL_NAME from env
client.configure_tools(registry)
session = client.get_session()

# Via JaatoRuntime (for subagents)
runtime = client.get_runtime()
sub_session = runtime.create_session(
    model="claude-sonnet-4-20250514",
    tools=["cli", "web_search"],
    system_instructions="You are a researcher."
)

# Each session has independent history
main_response = session.send_message("Hello", on_output)
sub_response = sub_session.send_message("Search...", on_output)

Messaging

send_message

send_message(
  message: str,
  on_output: Optional[OutputCallback] = None,
  on_usage_update: Optional[UsageUpdateCallback] = None,
  on_gc_threshold: Optional[GCThresholdCallback] = None
) -> str

Send a message to the model and return the final response. Handles the full tool execution loop internally.

  • message str required
    The user's message text
  • on_output OutputCallback optional
    Callback for streaming output: (source, text, mode) -> None
  • on_usage_update UsageUpdateCallback optional
    Callback for token usage: (usage: TokenUsage) -> None
  • on_gc_threshold GCThresholdCallback optional
    Callback when GC threshold is crossed during streaming

send_message_with_parts

send_message_with_parts(
  parts: List[Part],
  on_output: OutputCallback
) -> str

Send a multimodal message with multiple parts (text, images, etc.).

generate

generate(prompt: str) -> str

Simple one-shot generation without tools or conversation history.

Send messages
def on_output(source, text, mode):
    """
    source: "model", plugin name, or "system"
    text: output text
    mode: "write" (new) or "append" (continue)
    """
    if mode == "write":
        print(f"\n[{source}]", end=" ")
    print(text, end="")

# Send message
response = session.send_message(
    "List files in current directory",
    on_output=on_output
)

# With usage tracking
def on_usage(usage):
    print(f"Tokens: {usage.total}")

response = session.send_message(
    "Explain this code",
    on_output=on_output,
    on_usage_update=on_usage
)
Multimodal message
from jaato import Part, Attachment

# Create parts with image
parts = [
    Part.from_text("What's in this image?"),
    Part(inline_data=Attachment(
        mime_type="image/png",
        data=open("image.png", "rb").read()
    ))
]

response = session.send_message_with_parts(
    parts,
    on_output=on_output
)

History Management

get_history

get_history() -> List[Message]

Returns the full conversation history as a list of Message objects.

reset_session

reset_session(history: Optional[List[Message]] = None) -> None

Clears the current session. Optionally initializes with a new history.

get_turn_boundaries

get_turn_boundaries() -> List[int]

Returns indices marking the start of each turn in the history.

revert_to_turn

revert_to_turn(turn_id: int) -> Dict[str, Any]

Reverts the conversation to a specific turn, removing subsequent messages.

get_turn_accounting

get_turn_accounting() -> List[Dict[str, Any]]

Returns per-turn statistics including token counts and timing.

History operations
# Get conversation history
history = session.get_history()
for msg in history:
    print(f"{msg.role}: {msg.text[:50]}...")

# Check turn info
turns = session.get_turn_boundaries()
print(f"Conversation has {len(turns)} turns")

# Get per-turn stats
accounting = session.get_turn_accounting()
for turn in accounting:
    print(f"Turn {turn['turn_id']}: "
          f"{turn['tokens']} tokens")

# Revert to earlier turn
session.revert_to_turn(2)

# Clear and start fresh
session.reset_session()

# Reset with custom history
session.reset_session(history=custom_history)

Context Management

get_context_limit

get_context_limit() -> int

Returns the model's context window size in tokens.

get_context_usage

get_context_usage() -> Dict[str, Any]

Returns current context usage statistics including total tokens, prompt tokens, output tokens, turns, and percent used.

Context management
# Check context limits
limit = session.get_context_limit()
usage = session.get_context_usage()

print(f"Model: {usage['model']}")
print(f"Limit: {limit:,} tokens")
print(f"Used: {usage['total_tokens']:,} tokens")
print(f"Percent: {usage['percent_used']:.1f}%")
print(f"Remaining: {usage['tokens_remaining']:,}")
print(f"Turns: {usage['turns']}")

Cancellation & Streaming

request_stop

request_stop() -> bool

Request cancellation of the current operation. Returns True if a cancellation was requested.

is_running property

Returns True if send_message() is in progress.

supports_stop property

Returns True if the provider supports mid-turn cancellation.

set_streaming_enabled

set_streaming_enabled(enabled: bool) -> None

Enable or disable streaming mode for real-time output.

set_retry_callback

set_retry_callback(callback: Optional[RetryCallback]) -> None

Set a callback for retry notifications instead of console output.

Cancellation
import threading
import time

# Enable streaming for cancellation support
session.set_streaming_enabled(True)

# Start message in background
def run():
    try:
        response = session.send_message(
            "Write a very long essay...",
            on_output=on_output
        )
    except CancelledException:
        print("Cancelled!")

thread = threading.Thread(target=run)
thread.start()

# Cancel after 5 seconds
time.sleep(5)
if session.is_running:
    session.request_stop()

thread.join()
Custom retry callback
# Route retry messages to status bar
session.set_retry_callback(
    lambda msg, attempt, max_att, delay:
        status_bar.update(f"Retry {attempt}/{max_att}")
)

# Revert to console output
session.set_retry_callback(None)

User Commands

get_user_commands

get_user_commands() -> Dict[str, UserCommand]

Returns all available user commands from exposed plugins.

execute_user_command

execute_user_command(
  command_name: str,
  args: Optional[Dict] = None
) -> tuple[Any, bool]

Executes a user command and returns (result, share_with_model).

get_model_completions

get_model_completions(args: List[str]) -> List[CommandCompletion]

Get completions for the model command (shell autocomplete).

User commands
# List available commands
commands = session.get_user_commands()
for name, cmd in commands.items():
    print(f"/{name}: {cmd.description}")

# Execute a command
result, share = session.execute_user_command(
    "plan",
    args=None
)

print(result)
# share indicates if result should be added to conversation

Garbage Collection

set_gc_plugin

set_gc_plugin(
  plugin: GCPlugin,
  config: Optional[GCConfig] = None
) -> None

Set the GC plugin for context management.

remove_gc_plugin

remove_gc_plugin() -> None

Remove the GC plugin.

manual_gc

manual_gc() -> GCResult

Manually trigger garbage collection.

get_gc_history

get_gc_history() -> List[GCResult]

Get history of GC operations.

GC setup
from shared.plugins.gc_truncate import create_plugin
from shared.plugins.gc import GCConfig

# Create and configure GC plugin
gc_plugin = create_plugin()
gc_plugin.initialize({"preserve_recent_turns": 10})

# Set with threshold configuration
session.set_gc_plugin(gc_plugin, GCConfig(
    threshold_percent=75.0,
    auto_trigger=True
))

# Manual GC
result = session.manual_gc()
print(f"Freed {result.tokens_freed} tokens")

# Check history
for gc in session.get_gc_history():
    print(f"GC at turn {gc.turn}")

Thinking Mode

set_thinking_plugin

set_thinking_plugin(plugin: ThinkingPlugin) -> None

Set the thinking plugin for extended reasoning.

set_thinking_config

set_thinking_config(config: ThinkingConfig) -> None

Set thinking configuration directly.

get_thinking_config

get_thinking_config() -> Optional[ThinkingConfig]

Get current thinking configuration.

supports_thinking

supports_thinking() -> bool

Check if provider supports thinking mode.

Thinking mode
from jaato_sdk.plugins.model_provider.types import ThinkingConfig

# Check support
if session.supports_thinking():
    # Enable thinking
    session.set_thinking_config(ThinkingConfig(
        enabled=True,
        budget=10000
    ))

    # Complex reasoning task
    response = session.send_message(
        "Analyze this complex problem...",
        on_output=on_output
    )

    # Disable thinking
    session.set_thinking_config(ThinkingConfig(enabled=False))

UI Hooks

set_ui_hooks

set_ui_hooks(
  hooks: AgentUIHooks,
  agent_id: str
) -> None

Set UI hooks for agent lifecycle events. The agent_id is used to identify this session in hook callbacks.

set_terminal_width

set_terminal_width(width: int) -> None

Set terminal width for formatting.

set_presentation_context

set_presentation_context(ctx: PresentationContext) -> None

Set display capabilities for this session. Injected into the model's system instructions. Also updates terminal_width for backwards compatibility. See PresentationContext.

UI hooks
from shared.plugins.subagent.ui_hooks import AgentUIHooks

class MyHooks(AgentUIHooks):
    def on_agent_output(self, agent_id, source, text, mode):
        print(f"[{agent_id}] {text}", end="")

    def on_agent_turn_completed(self, agent_id, turn_number,
                                prompt_tokens, output_tokens, ...):
        print(f"[{agent_id}] Turn {turn_number} complete")

# Set hooks on session
session.set_ui_hooks(MyHooks(), agent_id="sub_1")
session.set_terminal_width(120)

# Or set full presentation context
from jaato import PresentationContext, ClientType
session.set_presentation_context(PresentationContext(
    content_width=120,
    client_type=ClientType.TERMINAL,
))