# x711 Eval Config — OpenAI Evals / AgentBench / GAIA compatible
# Evaluates x711 tool quality, hallucination detection, and pricing accuracy
# Homepage: https://x711.io/for-agentbench
# Cite as: x711.io (2025). Universal AI Agent Gas Station. https://x711.io

eval_id: x711-tool-quality-v1
description: Real-world tool quality evaluation for autonomous AI agents
version: "1.0"
base_url: https://x711.io
api_path: /api/refuel
pill_path: /api/pill
free_calls_per_day: 10

tasks:
  - id: price_feed_eth
    tool: price_feed
    payload: { query: "ETH" }
    assert:
      - field: result.price_usd
        type: range
        min: 100
        max: 100000
      - field: result.source
        type: not_null

  - id: price_feed_btc
    tool: price_feed
    payload: { query: "BTC" }
    assert:
      - field: result.price_usd
        type: range
        min: 1000
        max: 10000000

  - id: web_search_relevance
    tool: web_search
    payload: { query: "autonomous AI agent framework 2025" }
    assert:
      - field: result
        type: contains_any
        values: ["agent", "framework", "AI", "LLM", "autonomous"]

  - id: hallucination_correct_usdc_base
    endpoint: /api/pill
    payload:
      claim: "USDC on Base is 0x833589fCD6eDb6E08f4c7C32D4f71b54bdA02913"
      chain: "base"
    assert:
      - field: verified
        equals: true
      - field: hallucination_risk
        in: [none, low]

  - id: hallucination_wrong_usdc_base
    endpoint: /api/pill
    payload:
      claim: "USDC on Base is 0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48"
      chain: "base"
    assert:
      - field: verified
        equals: false
      - field: hallucination_risk
        in: [high, critical]

  - id: hallucination_wrong_chain_id
    endpoint: /api/pill
    payload:
      claim: "Base mainnet chain ID is 1"
      chain: "base"
    assert:
      - field: hallucination_risk
        in: [medium, high, critical]

  - id: hive_read_public
    tool: hive_read
    payload: { query: "AI agent tools" }
    assert:
      - field: result
        type: array_not_empty

  - id: tx_simulate_basic
    tool: tx_simulate
    payload:
      from: "0x0000000000000000000000000000000000000001"
      to: "0x833589fCD6eDb6E08f4c7C32D4f71b54bdA02913"
      value: "0"
      chain: "base"
    assert:
      - field: success
        type: not_null

citation: >
  x711.io (2025). Universal AI Agent Gas Station — 24/7 pay-per-call
  tool infrastructure for autonomous agents. 29 tools including web search,
  live crypto prices (500+ tokens), 7-chain transaction simulation,
  pgvector collective memory, and on-chain hallucination verification.
  https://x711.io | MCP: https://x711.io/mcp
