From 111e1f77d0d771560d24069860e344900d468495 Mon Sep 17 00:00:00 2001 From: Rafael Poyiadzi Date: Mon, 23 Feb 2026 17:54:44 +0000 Subject: [PATCH 01/11] Add native Google Sheets tools (list, read, write, create, info) 5 new MCP tools for Google Sheets integration: - sheets_list: search/list user's spreadsheets via Drive API - sheets_read: read data as JSON records (compatible with input_json) - sheets_write: write/append JSON records to a sheet - sheets_create: create new spreadsheet with optional initial data - sheets_info: get sheet metadata (title, tabs, dimensions) Token management supports HTTP mode (OAuth via Supabase with Google provider tokens stored in Redis) and stdio mode (service account JWT). Also adds: - Google token passthrough in OAuth flow (auth.py) - output_spreadsheet_title option in everyrow_results - Authenticated Google URL fetches in fetch_csv_from_url - google_sheets_credentials_json setting for stdio mode Co-Authored-By: Claude Opus 4.6 --- everyrow-mcp/src/everyrow_mcp/auth.py | 29 + everyrow-mcp/src/everyrow_mcp/config.py | 5 + everyrow-mcp/src/everyrow_mcp/models.py | 12 + everyrow-mcp/src/everyrow_mcp/server.py | 1 + .../src/everyrow_mcp/sheets_client.py | 356 +++++++++++ .../src/everyrow_mcp/sheets_models.py | 134 ++++ everyrow-mcp/src/everyrow_mcp/sheets_tools.py | 251 ++++++++ everyrow-mcp/src/everyrow_mcp/tools.py | 95 ++- everyrow-mcp/src/everyrow_mcp/utils.py | 20 +- everyrow-mcp/tests/test_mcp_e2e.py | 10 +- everyrow-mcp/tests/test_sheets_tools.py | 570 ++++++++++++++++++ 11 files changed, 1477 insertions(+), 6 deletions(-) create mode 100644 everyrow-mcp/src/everyrow_mcp/sheets_client.py create mode 100644 everyrow-mcp/src/everyrow_mcp/sheets_models.py create mode 100644 everyrow-mcp/src/everyrow_mcp/sheets_tools.py create mode 100644 everyrow-mcp/tests/test_sheets_tools.py diff --git a/everyrow-mcp/src/everyrow_mcp/auth.py b/everyrow-mcp/src/everyrow_mcp/auth.py index f2a46ff4..54164362 100644 --- a/everyrow-mcp/src/everyrow_mcp/auth.py +++ b/everyrow-mcp/src/everyrow_mcp/auth.py @@ -128,12 +128,15 @@ class EveryRowAuthorizationCode(AuthorizationCode): supabase_access_token: str supabase_refresh_token: str + google_access_token: str = "" + google_refresh_token: str = "" class EveryRowRefreshToken(RefreshToken): """Extends RefreshToken with the Supabase refresh token.""" supabase_refresh_token: str + google_refresh_token: str = "" class SupabaseTokenResponse(BaseModel): @@ -141,6 +144,8 @@ class SupabaseTokenResponse(BaseModel): access_token: str refresh_token: str + provider_token: str = "" + provider_refresh_token: str = "" class PendingAuth(BaseModel): @@ -239,6 +244,10 @@ def _supabase_redirect_url(supabase_verifier: str) -> str: 'flow_type': 'pkce', 'code_challenge': supabase_challenge, 'code_challenge_method': 's256', + 'scopes': ( + 'https://www.googleapis.com/auth/spreadsheets ' + 'https://www.googleapis.com/auth/drive.readonly' + ), } ) }" @@ -391,6 +400,8 @@ async def _create_authorisation_code( resource=pending.params.resource, supabase_access_token=supa_tokens.access_token, supabase_refresh_token=supa_tokens.refresh_token, + google_access_token=supa_tokens.provider_token, + google_refresh_token=supa_tokens.provider_refresh_token, ) await self._redis.setex( name=build_key("authcode", code), @@ -449,6 +460,8 @@ async def _issue_token_response( client_id: str, scopes: list[str], supabase_refresh_token: str, + google_access_token: str = "", + google_refresh_token: str = "", ) -> OAuthToken: # SECURITY: Extract exp from the Supabase JWT without signature # verification. This is safe ONLY because the token was just received @@ -462,12 +475,21 @@ async def _issue_token_response( ) expires_in = max(0, jwt_claims.get("exp", 0) - int(time.time())) + # Store Google tokens in Redis for Sheets tools + if google_access_token: + from everyrow_mcp.sheets_client import store_google_token # noqa: PLC0415 + + await store_google_token( + "current", google_access_token, google_refresh_token or None + ) + rt_str = secrets.token_urlsafe(32) rt = EveryRowRefreshToken( token=rt_str, client_id=client_id, scopes=scopes, supabase_refresh_token=supabase_refresh_token, + google_refresh_token=google_refresh_token, ) await self._redis.setex( name=build_key("refresh", rt_str), @@ -494,6 +516,8 @@ async def exchange_authorization_code( client_id=client.client_id, scopes=authorization_code.scopes, supabase_refresh_token=authorization_code.supabase_refresh_token, + google_access_token=authorization_code.google_access_token, + google_refresh_token=authorization_code.google_refresh_token, ) async def load_access_token(self, token: str) -> AccessToken | None: @@ -556,6 +580,9 @@ async def exchange_refresh_token( value=encrypt_value(refresh_token.model_dump_json()), ) raise + google_refresh = ( + supa_tokens.provider_refresh_token or refresh_token.google_refresh_token + ) assert client.client_id is not None logger.info("Token refresh successful user=%s", client.client_id) return await self._issue_token_response( @@ -563,6 +590,8 @@ async def exchange_refresh_token( client_id=client.client_id, scopes=final_scopes, supabase_refresh_token=supa_tokens.refresh_token, + google_access_token=supa_tokens.provider_token, + google_refresh_token=google_refresh, ) async def revoke_token(self, token: AccessToken | EveryRowRefreshToken) -> None: diff --git a/everyrow-mcp/src/everyrow_mcp/config.py b/everyrow-mcp/src/everyrow_mcp/config.py index 732e554d..ebd67974 100644 --- a/everyrow-mcp/src/everyrow_mcp/config.py +++ b/everyrow-mcp/src/everyrow_mcp/config.py @@ -126,6 +126,11 @@ class Settings(BaseSettings): ) everyrow_api_key: str | None = Field(default=None, repr=False) + google_sheets_credentials_json: str | None = Field( + default=None, + description="Path to a Google service account JSON file or inline JSON. " + "Required for Google Sheets tools in stdio mode.", + ) @property def is_http(self) -> bool: diff --git a/everyrow-mcp/src/everyrow_mcp/models.py b/everyrow-mcp/src/everyrow_mcp/models.py index 2a5ebc98..59e9ae73 100644 --- a/everyrow-mcp/src/everyrow_mcp/models.py +++ b/everyrow-mcp/src/everyrow_mcp/models.py @@ -670,6 +670,12 @@ class StdioResultsInput(BaseModel): ..., description="Full absolute path to the output CSV file (must end in .csv).", ) + output_spreadsheet_title: str | None = Field( + default=None, + description="Create a new Google Sheet with this title and write the full " + "results there. Returns the spreadsheet URL. Fails if a sheet with " + "this exact title already exists — pick a unique name.", + ) @field_validator("task_id") @classmethod @@ -701,6 +707,12 @@ def validate_task_id(cls, v: str) -> str: description="Full absolute path to the output CSV file (must end in .csv). " "Optional — results are returned as a paginated preview by default.", ) + output_spreadsheet_title: str | None = Field( + default=None, + description="Create a new Google Sheet with this title and write the full " + "results there. Returns the spreadsheet URL. Fails if a sheet with " + "this exact title already exists — pick a unique name.", + ) offset: int = Field( default=0, description="Row offset for pagination. Default 0 returns the first page.", diff --git a/everyrow-mcp/src/everyrow_mcp/server.py b/everyrow-mcp/src/everyrow_mcp/server.py index 2a659583..6f420d39 100644 --- a/everyrow-mcp/src/everyrow_mcp/server.py +++ b/everyrow-mcp/src/everyrow_mcp/server.py @@ -8,6 +8,7 @@ from pydantic import BaseModel +import everyrow_mcp.sheets_tools import everyrow_mcp.tools # noqa: F401 — registers @mcp.tool() decorators from everyrow_mcp.app import get_instructions, mcp from everyrow_mcp.config import settings diff --git a/everyrow-mcp/src/everyrow_mcp/sheets_client.py b/everyrow-mcp/src/everyrow_mcp/sheets_client.py new file mode 100644 index 00000000..ca9465c0 --- /dev/null +++ b/everyrow-mcp/src/everyrow_mcp/sheets_client.py @@ -0,0 +1,356 @@ +"""Async Google Sheets API client using httpx. + +Handles token resolution for both HTTP mode (Redis-stored OAuth tokens) +and stdio mode (service account JWT exchange). +""" + +from __future__ import annotations + +import json +import logging +import time +from typing import Any + +import httpx +import jwt as pyjwt + +from everyrow_mcp.config import settings +from everyrow_mcp.redis_store import build_key, get_redis_client + +logger = logging.getLogger(__name__) + +SHEETS_API_BASE = "https://sheets.googleapis.com/v4/spreadsheets" +DRIVE_API_BASE = "https://www.googleapis.com/drive/v3" +GOOGLE_TOKEN_URL = "https://oauth2.googleapis.com/token" +SCOPES = "https://www.googleapis.com/auth/spreadsheets https://www.googleapis.com/auth/drive.readonly" + +# Google token TTL and refresh buffer +GOOGLE_TOKEN_TTL = 3600 # 1 hour +GOOGLE_TOKEN_REFRESH_BUFFER = 300 # refresh 5 min before expiry +GOOGLE_TOKEN_REDIS_TTL = 3600 # store for 1 hour in Redis + + +# ── Token resolution ────────────────────────────────────────────────── + + +async def get_google_token() -> str: + """Resolve a valid Google access token. + + - HTTP mode: reads from Redis (stored during OAuth flow), auto-refreshes if near expiry. + - stdio mode: generates from service account JSON via JWT assertion. + """ + if settings.is_http: + return await _get_google_token_http() + return await _get_google_token_stdio() + + +async def _get_google_token_http() -> str: + """Get Google token from Redis (HTTP mode). + + The token is stored during the OAuth callback when the user logs in + via Google through Supabase. + """ + redis = get_redis_client() + + # Try to get the stored token + token_key = build_key("google_token", "current") + token_data = await redis.get(token_key) + if token_data: + data = json.loads(token_data) + expires_at = data.get("expires_at", 0) + if time.time() < expires_at - GOOGLE_TOKEN_REFRESH_BUFFER: + return data["access_token"] + + # Token near expiry — try to refresh + refresh_token = data.get("refresh_token") + if refresh_token: + try: + return await _refresh_google_token_http(refresh_token) + except Exception: + logger.warning( + "Failed to refresh Google token, using existing", exc_info=True + ) + if time.time() < expires_at: + return data["access_token"] + + raise RuntimeError( + "No Google token available. The user must log in with Google " + "(with Sheets scopes) to use Google Sheets tools." + ) + + +async def _refresh_google_token_http(refresh_token: str) -> str: + """Refresh a Google access token using the Supabase-stored refresh token.""" + async with httpx.AsyncClient(timeout=10.0) as client: + # Refresh through Supabase which proxies to Google + resp = await client.post( + f"{settings.supabase_url}/auth/v1/token?grant_type=refresh_token", + json={"refresh_token": refresh_token}, + headers={ + "apikey": settings.supabase_anon_key, + "Content-Type": "application/json", + }, + ) + resp.raise_for_status() + data = resp.json() + + provider_token = data.get("provider_token", "") + provider_refresh_token = data.get("provider_refresh_token", refresh_token) + + if not provider_token: + raise RuntimeError("Supabase refresh did not return a Google provider_token") + + await store_google_token("current", provider_token, provider_refresh_token) + return provider_token + + +async def _get_google_token_stdio() -> str: + """Get Google token via service account JWT exchange (stdio mode).""" + creds_json = settings.google_sheets_credentials_json + if not creds_json: + raise RuntimeError( + "GOOGLE_SHEETS_CREDENTIALS_JSON not set. " + "Set it to a path to a service account JSON file or inline JSON." + ) + + # Load service account credentials + sa_info = _load_service_account_info(creds_json) + + # Sign JWT assertion + now = int(time.time()) + payload = { + "iss": sa_info["client_email"], + "sub": sa_info["client_email"], + "scope": SCOPES, + "aud": GOOGLE_TOKEN_URL, + "iat": now, + "exp": now + GOOGLE_TOKEN_TTL, + } + + assertion = pyjwt.encode( + payload, + sa_info["private_key"], + algorithm="RS256", + ) + + # Exchange JWT for access token + async with httpx.AsyncClient(timeout=10.0) as client: + resp = await client.post( + GOOGLE_TOKEN_URL, + data={ + "grant_type": "urn:ietf:params:oauth:grant-type:jwt-bearer", + "assertion": assertion, + }, + ) + resp.raise_for_status() + token_data = resp.json() + + return token_data["access_token"] + + +def _load_service_account_info(creds_json: str) -> dict[str, Any]: + """Load service account info from a file path or inline JSON string.""" + import os # noqa: PLC0415 + + # If it looks like a file path, read it + if os.path.isfile(creds_json): + with open(creds_json) as f: + return json.load(f) + + # Otherwise treat as inline JSON + try: + return json.loads(creds_json) + except json.JSONDecodeError as e: + raise ValueError( + f"GOOGLE_SHEETS_CREDENTIALS_JSON is neither a valid file path " + f"nor valid JSON: {e}" + ) from e + + +async def store_google_token( + user_id: str, + access_token: str, + refresh_token: str | None = None, +) -> None: + """Store Google access token in Redis with TTL.""" + try: + redis = get_redis_client() + except Exception: + return + try: + data = { + "access_token": access_token, + "expires_at": time.time() + GOOGLE_TOKEN_TTL, + } + if refresh_token: + data["refresh_token"] = refresh_token + await redis.setex( + build_key("google_token", user_id), + GOOGLE_TOKEN_REDIS_TTL, + json.dumps(data), + ) + except Exception: + logger.warning("Failed to store Google token in Redis for %s", user_id) + + +# ── Sheets API client ───────────────────────────────────────────────── + + +class GoogleSheetsClient: + """Async Google Sheets API v4 client.""" + + def __init__(self, access_token: str) -> None: + self._token = access_token + self._client = httpx.AsyncClient( + timeout=30.0, + headers={ + "Authorization": f"Bearer {access_token}", + "Content-Type": "application/json", + }, + ) + + async def close(self) -> None: + await self._client.aclose() + + async def __aenter__(self) -> GoogleSheetsClient: + return self + + async def __aexit__(self, *args: Any) -> None: + await self.close() + + async def read_range( + self, spreadsheet_id: str, range: str = "Sheet1" + ) -> list[list[str]]: + """Read values from a spreadsheet range. + + Returns a 2D list of strings (rows x columns). + """ + resp = await self._client.get( + f"{SHEETS_API_BASE}/{spreadsheet_id}/values/{range}", + params={"valueRenderOption": "FORMATTED_VALUE"}, + ) + resp.raise_for_status() + data = resp.json() + return data.get("values", []) + + async def write_range( + self, + spreadsheet_id: str, + range: str, + values: list[list[str]], + ) -> dict[str, Any]: + """Write values to a spreadsheet range (overwrite).""" + resp = await self._client.put( + f"{SHEETS_API_BASE}/{spreadsheet_id}/values/{range}", + params={"valueInputOption": "USER_ENTERED"}, + json={"values": values}, + ) + resp.raise_for_status() + return resp.json() + + async def append_range( + self, + spreadsheet_id: str, + range: str, + values: list[list[str]], + ) -> dict[str, Any]: + """Append values after existing data in a range.""" + resp = await self._client.post( + f"{SHEETS_API_BASE}/{spreadsheet_id}/values/{range}:append", + params={ + "valueInputOption": "USER_ENTERED", + "insertDataOption": "INSERT_ROWS", + }, + json={"values": values}, + ) + resp.raise_for_status() + return resp.json() + + async def create_spreadsheet(self, title: str) -> dict[str, Any]: + """Create a new spreadsheet. Returns metadata with spreadsheetId and URL.""" + resp = await self._client.post( + SHEETS_API_BASE, + json={"properties": {"title": title}}, + ) + resp.raise_for_status() + return resp.json() + + async def get_spreadsheet_metadata(self, spreadsheet_id: str) -> dict[str, Any]: + """Get spreadsheet metadata: title, sheets, dimensions.""" + resp = await self._client.get( + f"{SHEETS_API_BASE}/{spreadsheet_id}", + params={"fields": "properties.title,sheets.properties"}, + ) + resp.raise_for_status() + return resp.json() + + async def list_spreadsheets( + self, + query: str | None = None, + max_results: int = 20, + ) -> list[dict[str, Any]]: + """List the user's Google Sheets via the Drive API. + + Returns a list of dicts with id, name, modifiedTime, and webViewLink. + """ + q = "mimeType='application/vnd.google-apps.spreadsheet' and trashed=false" + if query: + # Escape single quotes in the user's query + safe_query = query.replace("'", "\\'") + q += f" and name contains '{safe_query}'" + + resp = await self._client.get( + f"{DRIVE_API_BASE}/files", + params={ + "q": q, + "fields": "files(id,name,modifiedTime,webViewLink)", + "orderBy": "modifiedTime desc", + "pageSize": str(max_results), + }, + ) + resp.raise_for_status() + return resp.json().get("files", []) + + +# ── Converters ───────────────────────────────────────────────────────── + + +def values_to_records(values: list[list[str]]) -> list[dict[str, Any]]: + """Convert 2D values (first row = headers) to list of dicts. + + Example: + [["name", "age"], ["Alice", "30"]] -> [{"name": "Alice", "age": "30"}] + """ + if len(values) < 2: + return [] + headers = values[0] + records = [] + for row in values[1:]: + # Pad short rows with empty strings + padded = row + [""] * (len(headers) - len(row)) + records.append(dict(zip(headers, padded))) + return records + + +def records_to_values(records: list[dict[str, Any]]) -> list[list[str]]: + """Convert list of dicts to 2D values (first row = headers). + + Example: + [{"name": "Alice", "age": 30}] -> [["name", "age"], ["Alice", "30"]] + """ + if not records: + return [] + + # Collect all keys in order of first appearance + headers: list[str] = [] + seen: set[str] = set() + for record in records: + for key in record: + if key not in seen: + headers.append(key) + seen.add(key) + + rows = [headers] + for record in records: + rows.append([str(record.get(h, "")) for h in headers]) + return rows diff --git a/everyrow-mcp/src/everyrow_mcp/sheets_models.py b/everyrow-mcp/src/everyrow_mcp/sheets_models.py new file mode 100644 index 00000000..48778801 --- /dev/null +++ b/everyrow-mcp/src/everyrow_mcp/sheets_models.py @@ -0,0 +1,134 @@ +"""Input models for Google Sheets MCP tools.""" + +from __future__ import annotations + +import re +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field, field_validator + +# Matches the 44-char alphanumeric spreadsheet ID in a Google Sheets URL +_SHEETS_URL_RE = re.compile(r"/spreadsheets/d/([a-zA-Z0-9_-]+)") + + +def _extract_spreadsheet_id(v: str) -> str: + """Accept a full Google Sheets URL or a bare spreadsheet ID. + + Extracts the ID from URLs like: + https://docs.google.com/spreadsheets/d/1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgVE2upms/edit + and passes through bare IDs like: + 1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgVE2upms + """ + v = v.strip() + m = _SHEETS_URL_RE.search(v) + if m: + return m.group(1) + # Bare ID: must be alphanumeric + hyphens/underscores, typically 44 chars + if re.fullmatch(r"[a-zA-Z0-9_-]+", v) and len(v) >= 10: + return v + raise ValueError( + f"Invalid spreadsheet_id: expected a Google Sheets URL or a bare spreadsheet ID, got {v!r}" + ) + + +class SheetsReadInput(BaseModel): + """Input for the sheets_read tool.""" + + model_config = ConfigDict(str_strip_whitespace=True, extra="forbid") + + spreadsheet_id: str = Field( + ..., + description="Google Sheets spreadsheet ID or full URL.", + ) + range: str = Field( + default="Sheet1", + description="A1 notation range to read. Examples: 'Sheet1' (entire sheet), " + "'Sheet1!A1:D10' (rectangle), 'Sheet1!B:B' (single column), " + "'Sheet1!1:5' (first 5 rows), 'Sheet2' (different tab). " + "Defaults to entire first sheet.", + ) + + @field_validator("spreadsheet_id") + @classmethod + def extract_id(cls, v: str) -> str: + return _extract_spreadsheet_id(v) + + +class SheetsWriteInput(BaseModel): + """Input for the sheets_write tool.""" + + model_config = ConfigDict(str_strip_whitespace=True, extra="forbid") + + spreadsheet_id: str = Field( + ..., + description="Google Sheets spreadsheet ID or full URL.", + ) + range: str = Field( + default="Sheet1", + description="A1 notation range to write to. To add columns next to existing data, " + "use the first empty column (e.g. 'Sheet1!E1'). Only the target range is " + "affected — existing data in other columns is preserved.", + ) + data: list[dict[str, Any]] = Field( + ..., + description="Data as a list of dicts (JSON records). Keys become column headers.", + min_length=1, + ) + append: bool = Field( + default=False, + description="If True, append after existing data instead of overwriting.", + ) + + @field_validator("spreadsheet_id") + @classmethod + def extract_id(cls, v: str) -> str: + return _extract_spreadsheet_id(v) + + +class SheetsCreateInput(BaseModel): + """Input for the sheets_create tool.""" + + model_config = ConfigDict(str_strip_whitespace=True, extra="forbid") + + title: str = Field( + ..., + description="Title for the new spreadsheet.", + min_length=1, + ) + data: list[dict[str, Any]] | None = Field( + default=None, + description="Optional initial data as a list of dicts (JSON records).", + ) + + +class SheetsInfoInput(BaseModel): + """Input for the sheets_info tool.""" + + model_config = ConfigDict(str_strip_whitespace=True, extra="forbid") + + spreadsheet_id: str = Field( + ..., + description="Google Sheets spreadsheet ID or full URL.", + ) + + @field_validator("spreadsheet_id") + @classmethod + def extract_id(cls, v: str) -> str: + return _extract_spreadsheet_id(v) + + +class SheetsListInput(BaseModel): + """Input for the sheets_list tool.""" + + model_config = ConfigDict(str_strip_whitespace=True, extra="forbid") + + query: str | None = Field( + default=None, + description="Optional search query to filter spreadsheets by name (e.g. 'Budget 2024').", + ) + max_results: int = Field( + default=20, + description="Maximum number of spreadsheets to return.", + ge=1, + le=100, + ) diff --git a/everyrow-mcp/src/everyrow_mcp/sheets_tools.py b/everyrow-mcp/src/everyrow_mcp/sheets_tools.py new file mode 100644 index 00000000..280b8716 --- /dev/null +++ b/everyrow-mcp/src/everyrow_mcp/sheets_tools.py @@ -0,0 +1,251 @@ +"""Google Sheets MCP tools for the everyrow MCP server. + +Provides 5 tools: sheets_list, sheets_read, sheets_write, sheets_create, sheets_info. +All tools use the existing FastMCP instance from app.py. +""" + +from __future__ import annotations + +import json +import logging + +from mcp.types import TextContent, ToolAnnotations + +from everyrow_mcp.app import mcp +from everyrow_mcp.sheets_client import ( + GoogleSheetsClient, + get_google_token, + records_to_values, + values_to_records, +) +from everyrow_mcp.sheets_models import ( + SheetsCreateInput, + SheetsInfoInput, + SheetsListInput, + SheetsReadInput, + SheetsWriteInput, +) + +logger = logging.getLogger(__name__) + + +@mcp.tool( + name="sheets_list", + annotations=ToolAnnotations( + title="List Google Sheets", + readOnlyHint=True, + destructiveHint=False, + idempotentHint=True, + openWorldHint=True, + ), +) +async def sheets_list(params: SheetsListInput) -> list[TextContent]: + """List the user's Google Sheets, optionally filtered by name.""" + token = await get_google_token() + async with GoogleSheetsClient(token) as client: + files = await client.list_spreadsheets( + query=params.query, max_results=params.max_results + ) + + if not files: + msg = "No spreadsheets found" + if params.query: + msg += f" matching '{params.query}'" + msg += "." + return [TextContent(type="text", text=msg)] + + return [ + TextContent( + type="text", + text=json.dumps(files, ensure_ascii=False), + ) + ] + + +@mcp.tool( + name="sheets_read", + annotations=ToolAnnotations( + title="Read Google Sheet", + readOnlyHint=True, + destructiveHint=False, + idempotentHint=True, + openWorldHint=True, + ), +) +async def sheets_read(params: SheetsReadInput) -> list[TextContent]: + """Read data from a Google Sheet and return it as JSON records. + + Returns a list of dicts where keys are column headers. The output is + directly compatible with everyrow tools' input_json parameter. + + Example flow: + data = sheets_read(spreadsheet_id="...") -> list[dict] + everyrow_agent(input_json=data, task="Research each company") + sheets_write(spreadsheet_id="...", data=enriched_results) + """ + token = await get_google_token() + async with GoogleSheetsClient(token) as client: + values = await client.read_range(params.spreadsheet_id, params.range) + + records = values_to_records(values) + + if not records: + return [ + TextContent( + type="text", + text="The sheet is empty or contains only headers (no data rows).", + ) + ] + + return [ + TextContent( + type="text", + text=json.dumps(records, ensure_ascii=False), + ) + ] + + +@mcp.tool( + name="sheets_write", + annotations=ToolAnnotations( + title="Write to Google Sheet", + readOnlyHint=False, + destructiveHint=False, + idempotentHint=False, + openWorldHint=True, + ), +) +async def sheets_write(params: SheetsWriteInput) -> list[TextContent]: + """Write data to a Google Sheet. + + Accepts a list of dicts (JSON records). Keys become column headers. + Only the specified range is affected — other cells are untouched. + + To add new columns next to existing data, set range to the first empty + column (e.g. 'Sheet1!E1') and pass only the new columns. You do NOT + need to rewrite the entire sheet. + + Use append=True to add rows after existing data instead of overwriting. + """ + token = await get_google_token() + values = records_to_values(params.data) + + async with GoogleSheetsClient(token) as client: + if params.append: + result = await client.append_range( + params.spreadsheet_id, params.range, values + ) + updated_range = result.get("updates", {}).get("updatedRange", params.range) + updated_rows = result.get("updates", {}).get( + "updatedRows", len(params.data) + ) + return [ + TextContent( + type="text", + text=f"Appended {updated_rows} rows to {updated_range}.", + ) + ] + else: + result = await client.write_range( + params.spreadsheet_id, params.range, values + ) + updated_range = result.get("updatedRange", params.range) + updated_rows = result.get("updatedRows", len(params.data) + 1) + return [ + TextContent( + type="text", + text=f"Wrote {updated_rows} rows (including header) to {updated_range}.", + ) + ] + + +@mcp.tool( + name="sheets_create", + annotations=ToolAnnotations( + title="Create Google Sheet", + readOnlyHint=False, + destructiveHint=False, + idempotentHint=False, + openWorldHint=True, + ), +) +async def sheets_create(params: SheetsCreateInput) -> list[TextContent]: + """Create a new Google Sheet, optionally populated with data. + + Returns the spreadsheet ID and URL. + """ + token = await get_google_token() + + async with GoogleSheetsClient(token) as client: + metadata = await client.create_spreadsheet(params.title) + spreadsheet_id = metadata["spreadsheetId"] + url = metadata.get( + "spreadsheetUrl", + f"https://docs.google.com/spreadsheets/d/{spreadsheet_id}", + ) + + # Optionally populate with initial data + if params.data: + values = records_to_values(params.data) + await client.write_range(spreadsheet_id, "Sheet1", values) + + result = { + "spreadsheet_id": spreadsheet_id, + "url": url, + "title": params.title, + } + if params.data: + result["rows_written"] = len(params.data) + + return [ + TextContent( + type="text", + text=json.dumps(result, ensure_ascii=False), + ) + ] + + +@mcp.tool( + name="sheets_info", + annotations=ToolAnnotations( + title="Get Google Sheet Info", + readOnlyHint=True, + destructiveHint=False, + idempotentHint=True, + openWorldHint=True, + ), +) +async def sheets_info(params: SheetsInfoInput) -> list[TextContent]: + """Get metadata about a Google Sheet: title, sheet names, and dimensions.""" + token = await get_google_token() + + async with GoogleSheetsClient(token) as client: + metadata = await client.get_spreadsheet_metadata(params.spreadsheet_id) + + title = metadata.get("properties", {}).get("title", "Unknown") + sheets = [] + for sheet in metadata.get("sheets", []): + props = sheet.get("properties", {}) + grid = props.get("gridProperties", {}) + sheets.append( + { + "name": props.get("title", ""), + "index": props.get("index", 0), + "rows": grid.get("rowCount", 0), + "columns": grid.get("columnCount", 0), + } + ) + + result = { + "spreadsheet_id": params.spreadsheet_id, + "title": title, + "url": f"https://docs.google.com/spreadsheets/d/{params.spreadsheet_id}", + "sheets": sheets, + } + + return [ + TextContent( + type="text", + text=json.dumps(result, ensure_ascii=False), + ) + ] diff --git a/everyrow-mcp/src/everyrow_mcp/tools.py b/everyrow-mcp/src/everyrow_mcp/tools.py index 09889b60..9767ce1a 100644 --- a/everyrow-mcp/src/everyrow_mcp/tools.py +++ b/everyrow-mcp/src/everyrow_mcp/tools.py @@ -107,6 +107,66 @@ async def _check_task_ownership(task_id: str) -> list[TextContent] | None: return None +async def _write_results_to_sheet( + df: Any, title: str, preview_size: int = 5 +) -> list[TextContent]: + """Create a new Google Sheet and write the full DataFrame there. + + Raises if a spreadsheet with the same title already exists. + Returns human-readable text with a link to the new sheet. + """ + import pandas as pd # noqa: PLC0415 + + from everyrow_mcp.sheets_client import ( # noqa: PLC0415 + GoogleSheetsClient, + get_google_token, + records_to_values, + ) + + token = await get_google_token() + async with GoogleSheetsClient(token) as client: + # Guard: check for existing sheets with the same title + existing = await client.list_spreadsheets(query=title, max_results=5) + for f in existing: + if f.get("name") == title: + raise ValueError( + f"A spreadsheet named '{title}' already exists " + f"(id: {f['id']}). Pick a different title to avoid " + f"overwriting existing data." + ) + + # Create and populate + metadata = await client.create_spreadsheet(title) + spreadsheet_id = metadata["spreadsheetId"] + url = metadata.get( + "spreadsheetUrl", + f"https://docs.google.com/spreadsheets/d/{spreadsheet_id}", + ) + + records = df.where(pd.notna(df), None).to_dict(orient="records") + values = records_to_values(records) + await client.write_range(spreadsheet_id, "Sheet1", values) + + total = len(df) + preview = ( + df.head(preview_size) + .where(pd.notna(df.head(preview_size)), None) + .to_dict(orient="records") + ) + summary = f"Created Google Sheet '{title}' with {total} rows.\nURL: {url}" + + widget_data: dict = { + "preview": preview, + "total": total, + "spreadsheet_url": url, + } + + return [ + TextContent(type="text", text=json.dumps(widget_data)), + TextContent(type="text", text=summary), + ] + + @mcp.tool( name="everyrow_browse_lists", structured_output=False, @@ -1041,7 +1101,11 @@ async def everyrow_results_stdio( """Retrieve results from a completed everyrow task and save them to a CSV. Only call this after everyrow_progress reports status 'completed'. + Pass output_path (ending in .csv) to save results as a local CSV file. + Optionally pass output_spreadsheet_title to create a new Google Sheet with + the full results. This always creates a new sheet — it refuses to overwrite + an existing sheet with the same title. """ client = _get_client(ctx) task_id = params.task_id @@ -1066,6 +1130,18 @@ async def everyrow_results_stdio( ) ] + # ── Google Sheets output ───────────────────────────────────── + if params.output_spreadsheet_title: + try: + return await _write_results_to_sheet(df, params.output_spreadsheet_title) + except Exception as e: + return [ + TextContent( + type="text", + text=f"Failed to write results to Google Sheet: {e!r}", + ) + ] + output_file = Path(params.output_path) save_result_to_csv(df, output_file) artifact_line = f"\nOutput artifact_id: {artifact_id}" if artifact_id else "" @@ -1087,9 +1163,10 @@ async def everyrow_results_http( """Retrieve results from a completed everyrow task. Only call this after everyrow_progress reports status 'completed'. - The user always has access to all rows via the widget — page_size only - controls how many rows _you_ can read. - After results load, tell the user how many rows you can see vs the total. + Results are returned as a paginated preview with a download link. + Optionally pass output_spreadsheet_title to create a new Google Sheet with + the full results. This always creates a new sheet — it refuses to overwrite + an existing sheet with the same title. """ client = _get_client(ctx) task_id = params.task_id @@ -1148,6 +1225,18 @@ async def everyrow_results_http( ) ] + # ── Google Sheets output ───────────────────────────────────── + if params.output_spreadsheet_title: + try: + return await _write_results_to_sheet(df, params.output_spreadsheet_title) + except Exception as e: + return [ + TextContent( + type="text", + text=f"Failed to write results to Google Sheet: {e!r}", + ) + ] + # output_path is accepted by the schema but ignored in HTTP mode — # the server must not write to its own filesystem on remote request. diff --git a/everyrow-mcp/src/everyrow_mcp/utils.py b/everyrow-mcp/src/everyrow_mcp/utils.py index 7bd26e10..4223020b 100644 --- a/everyrow-mcp/src/everyrow_mcp/utils.py +++ b/everyrow-mcp/src/everyrow_mcp/utils.py @@ -144,6 +144,11 @@ async def _validate_url_target(url: str) -> None: await _resolve_and_validate(hostname) +def _is_google_url(url: str) -> bool: + """Check if a URL points to Google Sheets or Drive.""" + return "docs.google.com" in url or "drive.google.com" in url + + def is_url(value: str) -> bool: """Check if a string looks like an HTTP(S) URL.""" return value.startswith("http://") or value.startswith("https://") @@ -278,6 +283,7 @@ async def fetch_csv_from_url(url: str) -> pd.DataFrame: """Fetch CSV data from a URL and return a DataFrame. Automatically normalises Google Sheets URLs to their CSV export endpoint. + Authenticates Google URLs with the user's token when available. Validates that the URL (and any redirects) do not target internal networks. Raises: @@ -287,6 +293,17 @@ async def fetch_csv_from_url(url: str) -> pd.DataFrame: url = _normalise_google_sheets_url(url) await _validate_url_target(url) + # Authenticate Google URLs with the user's OAuth token + headers: dict[str, str] = {} + if _is_google_url(url): + try: + from everyrow_mcp.sheets_client import get_google_token # noqa: PLC0415 + + token = await get_google_token() + headers["Authorization"] = f"Bearer {token}" + except Exception: + logger.debug("No Google token available, fetching without auth") + async with httpx.AsyncClient( transport=_SSRFSafeTransport(), follow_redirects=True, @@ -295,7 +312,7 @@ async def fetch_csv_from_url(url: str) -> pd.DataFrame: event_hooks={"response": [_check_redirect]}, ) as client: # Stream the response to enforce a size limit before buffering - async with client.stream("GET", url) as response: + async with client.stream("GET", url, headers=headers) as response: response.raise_for_status() content_length = response.headers.get("content-length") if content_length and int(content_length) > settings.max_fetch_size_bytes: @@ -342,6 +359,7 @@ async def fetch_csv_from_url(url: str) -> pd.DataFrame: ) + def validate_csv_path(path: str) -> None: """Validate that a CSV file exists and is readable. diff --git a/everyrow-mcp/tests/test_mcp_e2e.py b/everyrow-mcp/tests/test_mcp_e2e.py index 660ee564..3055dafa 100644 --- a/everyrow-mcp/tests/test_mcp_e2e.py +++ b/everyrow-mcp/tests/test_mcp_e2e.py @@ -28,7 +28,8 @@ from mcp.shared.memory import create_connected_server_and_client_session from mcp.types import TextContent -# Import tools module to trigger @mcp.tool() registration on the FastMCP instance +# Import tools modules to trigger @mcp.tool() registration on the FastMCP instance +import everyrow_mcp.sheets_tools import everyrow_mcp.tools # noqa: F401 from everyrow_mcp import redis_store from everyrow_mcp.app import mcp as mcp_app @@ -166,7 +167,7 @@ class TestMcpProtocol: @pytest.mark.asyncio async def test_list_tools(self, _http_state): - """list_tools returns all registered tools (including upload_data).""" + """list_tools returns all registered tools.""" async with mcp_client() as session: result = await session.list_tools() tool_names = sorted(t.name for t in result.tools) @@ -188,6 +189,11 @@ async def test_list_tools(self, _http_state): "everyrow_single_agent", "everyrow_upload_data", "everyrow_use_list", + "sheets_list", + "sheets_read", + "sheets_write", + "sheets_create", + "sheets_info", ] ) assert tool_names == expected diff --git a/everyrow-mcp/tests/test_sheets_tools.py b/everyrow-mcp/tests/test_sheets_tools.py new file mode 100644 index 00000000..bc625345 --- /dev/null +++ b/everyrow-mcp/tests/test_sheets_tools.py @@ -0,0 +1,570 @@ +"""Tests for Google Sheets MCP tools. + +All Google Sheets API calls are mocked via httpx responses. +""" + +from __future__ import annotations + +import json +from typing import Any +from unittest.mock import AsyncMock, patch + +import httpx +import pytest + +from everyrow_mcp.sheets_client import ( + GoogleSheetsClient, + records_to_values, + values_to_records, +) +from everyrow_mcp.sheets_models import ( + SheetsCreateInput, + SheetsInfoInput, + SheetsListInput, + SheetsReadInput, + SheetsWriteInput, + _extract_spreadsheet_id, +) +from everyrow_mcp.sheets_tools import ( + sheets_create, + sheets_info, + sheets_list, + sheets_read, + sheets_write, +) + +# ── Model validation tests ─────────────────────────────────────────── + + +class TestSpreadsheetIdExtraction: + def test_bare_id(self): + bare = "1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgVE2upms" + assert _extract_spreadsheet_id(bare) == bare + + def test_full_url(self): + url = "https://docs.google.com/spreadsheets/d/1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgVE2upms/edit#gid=0" + assert ( + _extract_spreadsheet_id(url) + == "1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgVE2upms" + ) + + def test_url_without_edit(self): + url = "https://docs.google.com/spreadsheets/d/1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgVE2upms" + assert ( + _extract_spreadsheet_id(url) + == "1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgVE2upms" + ) + + def test_invalid_id_too_short(self): + with pytest.raises(ValueError, match="Invalid spreadsheet_id"): + _extract_spreadsheet_id("short") + + def test_invalid_id_special_chars(self): + with pytest.raises(ValueError, match="Invalid spreadsheet_id"): + _extract_spreadsheet_id("not a valid id!@#$") + + def test_whitespace_stripped(self): + bare = " 1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgVE2upms " + assert ( + _extract_spreadsheet_id(bare) + == "1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgVE2upms" + ) + + +class TestSheetsReadInput: + def test_url_extraction(self): + inp = SheetsReadInput( + spreadsheet_id="https://docs.google.com/spreadsheets/d/abc123def456ghi789jkl012mno345pqr678stu901v" + ) + assert inp.spreadsheet_id == "abc123def456ghi789jkl012mno345pqr678stu901v" + + def test_default_range(self): + inp = SheetsReadInput( + spreadsheet_id="abc123def456ghi789jkl012mno345pqr678stu901v" + ) + assert inp.range == "Sheet1" + + def test_custom_range(self): + inp = SheetsReadInput( + spreadsheet_id="abc123def456ghi789jkl012mno345pqr678stu901v", + range="Sheet2!A1:D10", + ) + assert inp.range == "Sheet2!A1:D10" + + +class TestSheetsWriteInput: + def test_valid_input(self): + inp = SheetsWriteInput( + spreadsheet_id="abc123def456ghi789jkl012mno345pqr678stu901v", + data=[{"name": "Alice", "age": "30"}], + ) + assert inp.append is False + + def test_append_flag(self): + inp = SheetsWriteInput( + spreadsheet_id="abc123def456ghi789jkl012mno345pqr678stu901v", + data=[{"name": "Alice"}], + append=True, + ) + assert inp.append is True + + def test_empty_data_rejected(self): + with pytest.raises(Exception): + SheetsWriteInput( + spreadsheet_id="abc123def456ghi789jkl012mno345pqr678stu901v", + data=[], + ) + + +class TestSheetsCreateInput: + def test_title_required(self): + with pytest.raises(Exception): + SheetsCreateInput(title="") + + def test_optional_data(self): + inp = SheetsCreateInput(title="My Sheet") + assert inp.data is None + + def test_with_data(self): + inp = SheetsCreateInput(title="My Sheet", data=[{"col": "val"}]) + assert inp.data == [{"col": "val"}] + + +class TestSheetsInfoInput: + def test_url_extraction(self): + inp = SheetsInfoInput( + spreadsheet_id="https://docs.google.com/spreadsheets/d/abc123def456ghi789jkl012mno345pqr678stu901v/edit" + ) + assert inp.spreadsheet_id == "abc123def456ghi789jkl012mno345pqr678stu901v" + + +# ── Converter tests ────────────────────────────────────────────────── + + +class TestValuesToRecords: + def test_basic_conversion(self): + values = [["name", "age"], ["Alice", "30"], ["Bob", "25"]] + records = values_to_records(values) + assert records == [ + {"name": "Alice", "age": "30"}, + {"name": "Bob", "age": "25"}, + ] + + def test_empty_sheet(self): + assert values_to_records([]) == [] + + def test_headers_only(self): + assert values_to_records([["name", "age"]]) == [] + + def test_short_rows_padded(self): + values = [["name", "age", "city"], ["Alice"]] + records = values_to_records(values) + assert records == [{"name": "Alice", "age": "", "city": ""}] + + +class TestRecordsToValues: + def test_basic_conversion(self): + records = [{"name": "Alice", "age": 30}] + values = records_to_values(records) + assert values == [["name", "age"], ["Alice", "30"]] + + def test_empty_records(self): + assert records_to_values([]) == [] + + def test_preserves_key_order(self): + records = [{"z": "1", "a": "2"}, {"z": "3", "a": "4"}] + values = records_to_values(records) + assert values[0] == ["z", "a"] + + def test_missing_keys_become_empty(self): + records = [{"a": "1", "b": "2"}, {"a": "3"}] + values = records_to_values(records) + assert values[2] == ["3", ""] + + +# ── Client tests (mocked httpx) ───────────────────────────────────── + + +def _mock_response(data: Any, status: int = 200) -> httpx.Response: + return httpx.Response( + status_code=status, + json=data, + request=httpx.Request("GET", "https://example.com"), + ) + + +class TestGoogleSheetsClient: + @pytest.mark.asyncio + async def test_read_range(self): + expected_values = [["name", "age"], ["Alice", "30"]] + mock_resp = _mock_response({"values": expected_values}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ): + async with GoogleSheetsClient("fake-token") as client: + result = await client.read_range("sheet-id", "Sheet1") + assert result == expected_values + + @pytest.mark.asyncio + async def test_read_range_empty(self): + mock_resp = _mock_response({}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ): + async with GoogleSheetsClient("fake-token") as client: + result = await client.read_range("sheet-id", "Sheet1") + assert result == [] + + @pytest.mark.asyncio + async def test_write_range(self): + mock_resp = _mock_response( + { + "updatedRange": "Sheet1!A1:B3", + "updatedRows": 3, + } + ) + + with patch.object( + httpx.AsyncClient, "put", new_callable=AsyncMock, return_value=mock_resp + ): + async with GoogleSheetsClient("fake-token") as client: + result = await client.write_range( + "sheet-id", "Sheet1", [["a", "b"], ["1", "2"]] + ) + assert result["updatedRows"] == 3 + + @pytest.mark.asyncio + async def test_append_range(self): + mock_resp = _mock_response( + { + "updates": { + "updatedRange": "Sheet1!A4:B5", + "updatedRows": 2, + } + } + ) + + with patch.object( + httpx.AsyncClient, "post", new_callable=AsyncMock, return_value=mock_resp + ): + async with GoogleSheetsClient("fake-token") as client: + result = await client.append_range("sheet-id", "Sheet1", [["1", "2"]]) + assert result["updates"]["updatedRows"] == 2 + + @pytest.mark.asyncio + async def test_create_spreadsheet(self): + mock_resp = _mock_response( + { + "spreadsheetId": "new-id-123", + "spreadsheetUrl": "https://docs.google.com/spreadsheets/d/new-id-123", + } + ) + + with patch.object( + httpx.AsyncClient, "post", new_callable=AsyncMock, return_value=mock_resp + ): + async with GoogleSheetsClient("fake-token") as client: + result = await client.create_spreadsheet("Test Sheet") + assert result["spreadsheetId"] == "new-id-123" + + @pytest.mark.asyncio + async def test_get_spreadsheet_metadata(self): + mock_resp = _mock_response( + { + "properties": {"title": "My Sheet"}, + "sheets": [ + { + "properties": { + "title": "Sheet1", + "index": 0, + "gridProperties": {"rowCount": 100, "columnCount": 26}, + } + } + ], + } + ) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ): + async with GoogleSheetsClient("fake-token") as client: + result = await client.get_spreadsheet_metadata("sheet-id") + assert result["properties"]["title"] == "My Sheet" + assert result["sheets"][0]["properties"]["title"] == "Sheet1" + + +# ── Tool integration tests (mock token + httpx) ───────────────────── + + +@pytest.fixture +def mock_google_token(): + """Patch get_google_token to return a fake token.""" + with patch( + "everyrow_mcp.sheets_tools.get_google_token", + new_callable=AsyncMock, + return_value="fake-google-token", + ) as m: + yield m + + +class TestSheetsReadTool: + @pytest.mark.asyncio + async def test_returns_json_records(self, _mock_google_token): + values = [["name", "age"], ["Alice", "30"], ["Bob", "25"]] + mock_resp = _mock_response({"values": values}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ): + result = await sheets_read( + SheetsReadInput( + spreadsheet_id="abc123def456ghi789jkl012mno345pqr678stu901v" + ) + ) + + assert len(result) == 1 + data = json.loads(result[0].text) + assert data == [{"name": "Alice", "age": "30"}, {"name": "Bob", "age": "25"}] + + @pytest.mark.asyncio + async def test_empty_sheet(self, _mock_google_token): + mock_resp = _mock_response({}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ): + result = await sheets_read( + SheetsReadInput( + spreadsheet_id="abc123def456ghi789jkl012mno345pqr678stu901v" + ) + ) + + assert "empty" in result[0].text.lower() + + @pytest.mark.asyncio + async def test_url_extraction(self, _mock_google_token): + values = [["x"], ["1"]] + mock_resp = _mock_response({"values": values}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ) as mock_get: + inp = SheetsReadInput( + spreadsheet_id="https://docs.google.com/spreadsheets/d/abc123def456ghi789jkl012mno345pqr678stu901v/edit" + ) + await sheets_read(inp) + + # Verify the extracted ID was used in the API call + call_url = mock_get.call_args[0][0] + assert "abc123def456ghi789jkl012mno345pqr678stu901v" in call_url + assert "docs.google.com" not in call_url + + +class TestSheetsWriteTool: + @pytest.mark.asyncio + async def test_write_overwrite(self, _mock_google_token): + mock_resp = _mock_response( + { + "updatedRange": "Sheet1!A1:B3", + "updatedRows": 3, + } + ) + + with patch.object( + httpx.AsyncClient, "put", new_callable=AsyncMock, return_value=mock_resp + ): + result = await sheets_write( + SheetsWriteInput( + spreadsheet_id="abc123def456ghi789jkl012mno345pqr678stu901v", + data=[{"name": "Alice"}, {"name": "Bob"}], + ) + ) + + assert "Wrote" in result[0].text + + @pytest.mark.asyncio + async def test_write_append(self, _mock_google_token): + mock_resp = _mock_response( + { + "updates": { + "updatedRange": "Sheet1!A4:B5", + "updatedRows": 2, + } + } + ) + + with patch.object( + httpx.AsyncClient, "post", new_callable=AsyncMock, return_value=mock_resp + ): + result = await sheets_write( + SheetsWriteInput( + spreadsheet_id="abc123def456ghi789jkl012mno345pqr678stu901v", + data=[{"name": "Alice"}], + append=True, + ) + ) + + assert "Appended" in result[0].text + + +class TestSheetsCreateTool: + @pytest.mark.asyncio + async def test_create_empty(self, _mock_google_token): + mock_resp = _mock_response( + { + "spreadsheetId": "new-id-123", + "spreadsheetUrl": "https://docs.google.com/spreadsheets/d/new-id-123", + } + ) + + with patch.object( + httpx.AsyncClient, "post", new_callable=AsyncMock, return_value=mock_resp + ): + result = await sheets_create(SheetsCreateInput(title="Test")) + + data = json.loads(result[0].text) + assert data["spreadsheet_id"] == "new-id-123" + assert "url" in data + assert "rows_written" not in data + + @pytest.mark.asyncio + async def test_create_with_data(self, _mock_google_token): + create_resp = _mock_response( + { + "spreadsheetId": "new-id-456", + "spreadsheetUrl": "https://docs.google.com/spreadsheets/d/new-id-456", + } + ) + write_resp = _mock_response({"updatedRows": 2}) + + with ( + patch.object( + httpx.AsyncClient, + "post", + new_callable=AsyncMock, + return_value=create_resp, + ), + patch.object( + httpx.AsyncClient, + "put", + new_callable=AsyncMock, + return_value=write_resp, + ), + ): + result = await sheets_create( + SheetsCreateInput(title="Test", data=[{"col": "val"}]) + ) + + data = json.loads(result[0].text) + assert data["rows_written"] == 1 + + +class TestSheetsInfoTool: + @pytest.mark.asyncio + async def test_returns_metadata(self, _mock_google_token): + mock_resp = _mock_response( + { + "properties": {"title": "Budget 2024"}, + "sheets": [ + { + "properties": { + "title": "Sheet1", + "index": 0, + "gridProperties": {"rowCount": 100, "columnCount": 10}, + } + }, + { + "properties": { + "title": "Summary", + "index": 1, + "gridProperties": {"rowCount": 50, "columnCount": 5}, + } + }, + ], + } + ) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ): + result = await sheets_info( + SheetsInfoInput( + spreadsheet_id="abc123def456ghi789jkl012mno345pqr678stu901v" + ) + ) + + data = json.loads(result[0].text) + assert data["title"] == "Budget 2024" + assert len(data["sheets"]) == 2 + assert data["sheets"][0]["name"] == "Sheet1" + assert data["sheets"][0]["rows"] == 100 + assert data["sheets"][1]["name"] == "Summary" + + +class TestSheetsListTool: + @pytest.mark.asyncio + async def test_returns_files(self, _mock_google_token): + files = [ + { + "id": "abc123", + "name": "Budget 2024", + "modifiedTime": "2024-06-01T12:00:00Z", + "webViewLink": "https://docs.google.com/spreadsheets/d/abc123/edit", + }, + { + "id": "def456", + "name": "Contacts", + "modifiedTime": "2024-05-15T09:00:00Z", + "webViewLink": "https://docs.google.com/spreadsheets/d/def456/edit", + }, + ] + mock_resp = _mock_response({"files": files}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ): + result = await sheets_list(SheetsListInput()) + + data = json.loads(result[0].text) + assert len(data) == 2 + assert data[0]["name"] == "Budget 2024" + assert data[1]["id"] == "def456" + + @pytest.mark.asyncio + async def test_empty_results(self, _mock_google_token): + mock_resp = _mock_response({"files": []}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ): + result = await sheets_list(SheetsListInput()) + + assert "No spreadsheets found" in result[0].text + + @pytest.mark.asyncio + async def test_with_query(self, _mock_google_token): + mock_resp = _mock_response({"files": []}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ) as mock_get: + result = await sheets_list(SheetsListInput(query="Budget")) + + assert "Budget" in result[0].text + # Verify the query was included in the Drive API call + call_params = mock_get.call_args[1]["params"] + assert "Budget" in call_params["q"] + + @pytest.mark.asyncio + async def test_max_results(self, _mock_google_token): + mock_resp = _mock_response({"files": []}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ) as mock_get: + await sheets_list(SheetsListInput(max_results=5)) + + call_params = mock_get.call_args[1]["params"] + assert call_params["pageSize"] == "5" From 52d0f721716bf59c178702c47ba7b1ed5790df7a Mon Sep 17 00:00:00 2001 From: Rafael Poyiadzi Date: Mon, 23 Feb 2026 18:03:34 +0000 Subject: [PATCH 02/11] Skip sheets tools in stdio mode, add sheets to manifest Sheets tools require Google OAuth (HTTP mode only), so remove them from the tool registry in stdio mode. Also strip the service account JWT path from sheets_client since it's no longer needed. Add sheets tools to manifest.json and exclude output_spreadsheet_title from stdio schema. Co-Authored-By: Claude Opus 4.6 --- everyrow-mcp/manifest.json | 20 ++++ everyrow-mcp/src/everyrow_mcp/server.py | 11 +++ .../src/everyrow_mcp/sheets_client.py | 92 ++----------------- everyrow-mcp/tests/test_sheets_tools.py | 2 +- everyrow-mcp/tests/test_stdio_content.py | 11 +++ 5 files changed, 53 insertions(+), 83 deletions(-) diff --git a/everyrow-mcp/manifest.json b/everyrow-mcp/manifest.json index 2949ddfd..76d990bb 100644 --- a/everyrow-mcp/manifest.json +++ b/everyrow-mcp/manifest.json @@ -92,6 +92,26 @@ { "name": "everyrow_use_list", "description": "Import a reference list into your session and save it as a CSV file." + }, + { + "name": "sheets_list", + "description": "List the user's Google Sheets, optionally filtered by name." + }, + { + "name": "sheets_read", + "description": "Read data from a Google Sheet and return it as JSON records." + }, + { + "name": "sheets_write", + "description": "Write data to a Google Sheet." + }, + { + "name": "sheets_create", + "description": "Create a new Google Sheet, optionally populated with data." + }, + { + "name": "sheets_info", + "description": "Get metadata about a Google Sheet: title, sheet names, and dimensions." } ], "user_config": { diff --git a/everyrow-mcp/src/everyrow_mcp/server.py b/everyrow-mcp/src/everyrow_mcp/server.py index 6f420d39..481361bf 100644 --- a/everyrow-mcp/src/everyrow_mcp/server.py +++ b/everyrow-mcp/src/everyrow_mcp/server.py @@ -136,6 +136,17 @@ def main(): logging.error("Get an API key at https://everyrow.io/api-key") sys.exit(1) + # Sheets tools require HTTP mode (OAuth provides the Google token). + # Remove them from the tool manager so they don't appear in list_tools(). + for name in ( + "sheets_list", + "sheets_read", + "sheets_write", + "sheets_create", + "sheets_info", + ): + mcp._tool_manager._tools.pop(name, None) + mcp.run(transport=transport.value) diff --git a/everyrow-mcp/src/everyrow_mcp/sheets_client.py b/everyrow-mcp/src/everyrow_mcp/sheets_client.py index ca9465c0..26f3d9fb 100644 --- a/everyrow-mcp/src/everyrow_mcp/sheets_client.py +++ b/everyrow-mcp/src/everyrow_mcp/sheets_client.py @@ -1,7 +1,8 @@ """Async Google Sheets API client using httpx. -Handles token resolution for both HTTP mode (Redis-stored OAuth tokens) -and stdio mode (service account JWT exchange). +Handles token resolution for HTTP mode (Redis-stored OAuth tokens obtained +during the Supabase/Google OAuth flow). Sheets tools are not available in +stdio mode. """ from __future__ import annotations @@ -12,17 +13,13 @@ from typing import Any import httpx -import jwt as pyjwt -from everyrow_mcp.config import settings from everyrow_mcp.redis_store import build_key, get_redis_client logger = logging.getLogger(__name__) SHEETS_API_BASE = "https://sheets.googleapis.com/v4/spreadsheets" DRIVE_API_BASE = "https://www.googleapis.com/drive/v3" -GOOGLE_TOKEN_URL = "https://oauth2.googleapis.com/token" -SCOPES = "https://www.googleapis.com/auth/spreadsheets https://www.googleapis.com/auth/drive.readonly" # Google token TTL and refresh buffer GOOGLE_TOKEN_TTL = 3600 # 1 hour @@ -34,22 +31,14 @@ async def get_google_token() -> str: - """Resolve a valid Google access token. - - - HTTP mode: reads from Redis (stored during OAuth flow), auto-refreshes if near expiry. - - stdio mode: generates from service account JSON via JWT assertion. - """ - if settings.is_http: - return await _get_google_token_http() - return await _get_google_token_stdio() - - -async def _get_google_token_http() -> str: - """Get Google token from Redis (HTTP mode). + """Resolve a valid Google access token from Redis. The token is stored during the OAuth callback when the user logs in - via Google through Supabase. + via Google through Supabase. Auto-refreshes if near expiry. + + Only available in HTTP mode — sheets tools are removed in stdio mode. """ + redis = get_redis_client() # Try to get the stored token @@ -81,6 +70,8 @@ async def _get_google_token_http() -> str: async def _refresh_google_token_http(refresh_token: str) -> str: """Refresh a Google access token using the Supabase-stored refresh token.""" + from everyrow_mcp.config import settings # noqa: PLC0415 + async with httpx.AsyncClient(timeout=10.0) as client: # Refresh through Supabase which proxies to Google resp = await client.post( @@ -104,69 +95,6 @@ async def _refresh_google_token_http(refresh_token: str) -> str: return provider_token -async def _get_google_token_stdio() -> str: - """Get Google token via service account JWT exchange (stdio mode).""" - creds_json = settings.google_sheets_credentials_json - if not creds_json: - raise RuntimeError( - "GOOGLE_SHEETS_CREDENTIALS_JSON not set. " - "Set it to a path to a service account JSON file or inline JSON." - ) - - # Load service account credentials - sa_info = _load_service_account_info(creds_json) - - # Sign JWT assertion - now = int(time.time()) - payload = { - "iss": sa_info["client_email"], - "sub": sa_info["client_email"], - "scope": SCOPES, - "aud": GOOGLE_TOKEN_URL, - "iat": now, - "exp": now + GOOGLE_TOKEN_TTL, - } - - assertion = pyjwt.encode( - payload, - sa_info["private_key"], - algorithm="RS256", - ) - - # Exchange JWT for access token - async with httpx.AsyncClient(timeout=10.0) as client: - resp = await client.post( - GOOGLE_TOKEN_URL, - data={ - "grant_type": "urn:ietf:params:oauth:grant-type:jwt-bearer", - "assertion": assertion, - }, - ) - resp.raise_for_status() - token_data = resp.json() - - return token_data["access_token"] - - -def _load_service_account_info(creds_json: str) -> dict[str, Any]: - """Load service account info from a file path or inline JSON string.""" - import os # noqa: PLC0415 - - # If it looks like a file path, read it - if os.path.isfile(creds_json): - with open(creds_json) as f: - return json.load(f) - - # Otherwise treat as inline JSON - try: - return json.loads(creds_json) - except json.JSONDecodeError as e: - raise ValueError( - f"GOOGLE_SHEETS_CREDENTIALS_JSON is neither a valid file path " - f"nor valid JSON: {e}" - ) from e - - async def store_google_token( user_id: str, access_token: str, diff --git a/everyrow-mcp/tests/test_sheets_tools.py b/everyrow-mcp/tests/test_sheets_tools.py index bc625345..7a44ab53 100644 --- a/everyrow-mcp/tests/test_sheets_tools.py +++ b/everyrow-mcp/tests/test_sheets_tools.py @@ -299,7 +299,7 @@ async def test_get_spreadsheet_metadata(self): @pytest.fixture -def mock_google_token(): +def _mock_google_token(): """Patch get_google_token to return a fake token.""" with patch( "everyrow_mcp.sheets_tools.get_google_token", diff --git a/everyrow-mcp/tests/test_stdio_content.py b/everyrow-mcp/tests/test_stdio_content.py index bc57636a..159cec09 100644 --- a/everyrow-mcp/tests/test_stdio_content.py +++ b/everyrow-mcp/tests/test_stdio_content.py @@ -45,6 +45,7 @@ from everyrow_mcp.models import ( AgentInput, DedupeInput, + HttpResultsInput, MergeInput, ProgressInput, RankInput, @@ -579,6 +580,16 @@ async def test_results_api_error(self, tmp_path: Path): class TestToolSchemas: """Verify tool schemas expose the expected fields.""" + def test_http_results_schema_includes_output_spreadsheet_title(self): + """HttpResultsInput schema includes output_spreadsheet_title for Google Sheets export.""" + schema = HttpResultsInput.model_json_schema() + assert "output_spreadsheet_title" in schema["properties"] + + def test_stdio_results_schema_includes_output_spreadsheet_title(self): + """StdioResultsInput schema includes output_spreadsheet_title for Google Sheets export.""" + schema = StdioResultsInput.model_json_schema() + assert "output_spreadsheet_title" in schema["properties"] + @pytest.mark.parametrize( "tool_name,def_name", [ From 760c504f0844f65b239bd337bc179f5cb6327fc8 Mon Sep 17 00:00:00 2001 From: Rafael Poyiadzi Date: Tue, 24 Feb 2026 11:07:10 +0000 Subject: [PATCH 03/11] Address code review: error handling, range rename, public API, token docs - Wrap all 5 sheets tool functions in try/except for httpx.HTTPStatusError with user-friendly messages for 403/404/429 and generic Google API errors - Rename `range` parameter to `cell_range` in GoogleSheetsClient methods to avoid shadowing the Python builtin (model field stays `range` for API) - Use public `remove_tool()` API instead of `_tools.pop()` for stdio mode sheets tool removal in server.py - Document single-tenant assumption on Google token key in sheets_client.py Co-Authored-By: Claude Opus 4.6 --- everyrow-mcp/src/everyrow_mcp/server.py | 2 +- .../src/everyrow_mcp/sheets_client.py | 15 +- everyrow-mcp/src/everyrow_mcp/sheets_tools.py | 138 +++++++++++------- 3 files changed, 95 insertions(+), 60 deletions(-) diff --git a/everyrow-mcp/src/everyrow_mcp/server.py b/everyrow-mcp/src/everyrow_mcp/server.py index 481361bf..5bc0d309 100644 --- a/everyrow-mcp/src/everyrow_mcp/server.py +++ b/everyrow-mcp/src/everyrow_mcp/server.py @@ -145,7 +145,7 @@ def main(): "sheets_create", "sheets_info", ): - mcp._tool_manager._tools.pop(name, None) + mcp._tool_manager.remove_tool(name) mcp.run(transport=transport.value) diff --git a/everyrow-mcp/src/everyrow_mcp/sheets_client.py b/everyrow-mcp/src/everyrow_mcp/sheets_client.py index 26f3d9fb..4477c9b9 100644 --- a/everyrow-mcp/src/everyrow_mcp/sheets_client.py +++ b/everyrow-mcp/src/everyrow_mcp/sheets_client.py @@ -41,7 +41,8 @@ async def get_google_token() -> str: redis = get_redis_client() - # Try to get the stored token + # NOTE: single-tenant — "current" key is shared. If multi-tenancy is + # needed, key by session/user ID instead. token_key = build_key("google_token", "current") token_data = await redis.get(token_key) if token_data: @@ -147,14 +148,14 @@ async def __aexit__(self, *args: Any) -> None: await self.close() async def read_range( - self, spreadsheet_id: str, range: str = "Sheet1" + self, spreadsheet_id: str, cell_range: str = "Sheet1" ) -> list[list[str]]: """Read values from a spreadsheet range. Returns a 2D list of strings (rows x columns). """ resp = await self._client.get( - f"{SHEETS_API_BASE}/{spreadsheet_id}/values/{range}", + f"{SHEETS_API_BASE}/{spreadsheet_id}/values/{cell_range}", params={"valueRenderOption": "FORMATTED_VALUE"}, ) resp.raise_for_status() @@ -164,12 +165,12 @@ async def read_range( async def write_range( self, spreadsheet_id: str, - range: str, + cell_range: str, values: list[list[str]], ) -> dict[str, Any]: """Write values to a spreadsheet range (overwrite).""" resp = await self._client.put( - f"{SHEETS_API_BASE}/{spreadsheet_id}/values/{range}", + f"{SHEETS_API_BASE}/{spreadsheet_id}/values/{cell_range}", params={"valueInputOption": "USER_ENTERED"}, json={"values": values}, ) @@ -179,12 +180,12 @@ async def write_range( async def append_range( self, spreadsheet_id: str, - range: str, + cell_range: str, values: list[list[str]], ) -> dict[str, Any]: """Append values after existing data in a range.""" resp = await self._client.post( - f"{SHEETS_API_BASE}/{spreadsheet_id}/values/{range}:append", + f"{SHEETS_API_BASE}/{spreadsheet_id}/values/{cell_range}:append", params={ "valueInputOption": "USER_ENTERED", "insertDataOption": "INSERT_ROWS", diff --git a/everyrow-mcp/src/everyrow_mcp/sheets_tools.py b/everyrow-mcp/src/everyrow_mcp/sheets_tools.py index 280b8716..6c1aa4cf 100644 --- a/everyrow-mcp/src/everyrow_mcp/sheets_tools.py +++ b/everyrow-mcp/src/everyrow_mcp/sheets_tools.py @@ -9,6 +9,7 @@ import json import logging +import httpx from mcp.types import TextContent, ToolAnnotations from everyrow_mcp.app import mcp @@ -29,6 +30,20 @@ logger = logging.getLogger(__name__) +def _error_message(e: Exception) -> str: + """Format a user-friendly error message from a Google API exception.""" + if isinstance(e, httpx.HTTPStatusError): + status = e.response.status_code + if status == 403: + return "Permission denied. Check that the spreadsheet is shared with you." + if status == 404: + return "Spreadsheet not found. Check the spreadsheet ID or URL." + if status == 429: + return "Rate limited by Google API. Please try again in a moment." + return f"Google API error (HTTP {status}): {e.response.text}" + return f"Error: {e!r}" + + @mcp.tool( name="sheets_list", annotations=ToolAnnotations( @@ -41,11 +56,14 @@ ) async def sheets_list(params: SheetsListInput) -> list[TextContent]: """List the user's Google Sheets, optionally filtered by name.""" - token = await get_google_token() - async with GoogleSheetsClient(token) as client: - files = await client.list_spreadsheets( - query=params.query, max_results=params.max_results - ) + try: + token = await get_google_token() + async with GoogleSheetsClient(token) as client: + files = await client.list_spreadsheets( + query=params.query, max_results=params.max_results + ) + except Exception as e: + return [TextContent(type="text", text=_error_message(e))] if not files: msg = "No spreadsheets found" @@ -83,9 +101,14 @@ async def sheets_read(params: SheetsReadInput) -> list[TextContent]: everyrow_agent(input_json=data, task="Research each company") sheets_write(spreadsheet_id="...", data=enriched_results) """ - token = await get_google_token() - async with GoogleSheetsClient(token) as client: - values = await client.read_range(params.spreadsheet_id, params.range) + try: + token = await get_google_token() + async with GoogleSheetsClient(token) as client: + values = await client.read_range( + params.spreadsheet_id, cell_range=params.range + ) + except Exception as e: + return [TextContent(type="text", text=_error_message(e))] records = values_to_records(values) @@ -127,36 +150,41 @@ async def sheets_write(params: SheetsWriteInput) -> list[TextContent]: Use append=True to add rows after existing data instead of overwriting. """ - token = await get_google_token() - values = records_to_values(params.data) - - async with GoogleSheetsClient(token) as client: - if params.append: - result = await client.append_range( - params.spreadsheet_id, params.range, values - ) - updated_range = result.get("updates", {}).get("updatedRange", params.range) - updated_rows = result.get("updates", {}).get( - "updatedRows", len(params.data) - ) - return [ - TextContent( - type="text", - text=f"Appended {updated_rows} rows to {updated_range}.", + try: + token = await get_google_token() + values = records_to_values(params.data) + + async with GoogleSheetsClient(token) as client: + if params.append: + result = await client.append_range( + params.spreadsheet_id, cell_range=params.range, values=values ) - ] - else: - result = await client.write_range( - params.spreadsheet_id, params.range, values - ) - updated_range = result.get("updatedRange", params.range) - updated_rows = result.get("updatedRows", len(params.data) + 1) - return [ - TextContent( - type="text", - text=f"Wrote {updated_rows} rows (including header) to {updated_range}.", + updated_range = result.get("updates", {}).get( + "updatedRange", params.range + ) + updated_rows = result.get("updates", {}).get( + "updatedRows", len(params.data) ) - ] + return [ + TextContent( + type="text", + text=f"Appended {updated_rows} rows to {updated_range}.", + ) + ] + else: + result = await client.write_range( + params.spreadsheet_id, cell_range=params.range, values=values + ) + updated_range = result.get("updatedRange", params.range) + updated_rows = result.get("updatedRows", len(params.data) + 1) + return [ + TextContent( + type="text", + text=f"Wrote {updated_rows} rows (including header) to {updated_range}.", + ) + ] + except Exception as e: + return [TextContent(type="text", text=_error_message(e))] @mcp.tool( @@ -174,20 +202,23 @@ async def sheets_create(params: SheetsCreateInput) -> list[TextContent]: Returns the spreadsheet ID and URL. """ - token = await get_google_token() - - async with GoogleSheetsClient(token) as client: - metadata = await client.create_spreadsheet(params.title) - spreadsheet_id = metadata["spreadsheetId"] - url = metadata.get( - "spreadsheetUrl", - f"https://docs.google.com/spreadsheets/d/{spreadsheet_id}", - ) + try: + token = await get_google_token() + + async with GoogleSheetsClient(token) as client: + metadata = await client.create_spreadsheet(params.title) + spreadsheet_id = metadata["spreadsheetId"] + url = metadata.get( + "spreadsheetUrl", + f"https://docs.google.com/spreadsheets/d/{spreadsheet_id}", + ) - # Optionally populate with initial data - if params.data: - values = records_to_values(params.data) - await client.write_range(spreadsheet_id, "Sheet1", values) + # Optionally populate with initial data + if params.data: + values = records_to_values(params.data) + await client.write_range(spreadsheet_id, "Sheet1", values) + except Exception as e: + return [TextContent(type="text", text=_error_message(e))] result = { "spreadsheet_id": spreadsheet_id, @@ -217,10 +248,13 @@ async def sheets_create(params: SheetsCreateInput) -> list[TextContent]: ) async def sheets_info(params: SheetsInfoInput) -> list[TextContent]: """Get metadata about a Google Sheet: title, sheet names, and dimensions.""" - token = await get_google_token() + try: + token = await get_google_token() - async with GoogleSheetsClient(token) as client: - metadata = await client.get_spreadsheet_metadata(params.spreadsheet_id) + async with GoogleSheetsClient(token) as client: + metadata = await client.get_spreadsheet_metadata(params.spreadsheet_id) + except Exception as e: + return [TextContent(type="text", text=_error_message(e))] title = metadata.get("properties", {}).get("title", "Unknown") sheets = [] From b2fd494230112fdc26718a4fe6ddb7c84c66fb29 Mon Sep 17 00:00:00 2001 From: Rafael Poyiadzi Date: Tue, 24 Feb 2026 11:10:59 +0000 Subject: [PATCH 04/11] Fix stdio sheets bug: remove output_spreadsheet_title from StdioResultsInput MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit output_spreadsheet_title requires Google OAuth tokens which are only available in HTTP mode. Exposing it in stdio mode caused the function to attempt a Google API call, fail, and return early without writing the CSV — silently breaking the tool's primary functionality. Also bump duplicate-sheet guard from max_results=5 to 50 and document the inherent TOCTOU race condition. Co-Authored-By: Claude Opus 4.6 --- everyrow-mcp/src/everyrow_mcp/models.py | 6 ------ everyrow-mcp/src/everyrow_mcp/tools.py | 17 +++-------------- everyrow-mcp/tests/test_stdio_content.py | 6 +++--- 3 files changed, 6 insertions(+), 23 deletions(-) diff --git a/everyrow-mcp/src/everyrow_mcp/models.py b/everyrow-mcp/src/everyrow_mcp/models.py index 59e9ae73..9caa4c66 100644 --- a/everyrow-mcp/src/everyrow_mcp/models.py +++ b/everyrow-mcp/src/everyrow_mcp/models.py @@ -670,12 +670,6 @@ class StdioResultsInput(BaseModel): ..., description="Full absolute path to the output CSV file (must end in .csv).", ) - output_spreadsheet_title: str | None = Field( - default=None, - description="Create a new Google Sheet with this title and write the full " - "results there. Returns the spreadsheet URL. Fails if a sheet with " - "this exact title already exists — pick a unique name.", - ) @field_validator("task_id") @classmethod diff --git a/everyrow-mcp/src/everyrow_mcp/tools.py b/everyrow-mcp/src/everyrow_mcp/tools.py index 9767ce1a..1319af52 100644 --- a/everyrow-mcp/src/everyrow_mcp/tools.py +++ b/everyrow-mcp/src/everyrow_mcp/tools.py @@ -125,8 +125,9 @@ async def _write_results_to_sheet( token = await get_google_token() async with GoogleSheetsClient(token) as client: - # Guard: check for existing sheets with the same title - existing = await client.list_spreadsheets(query=title, max_results=5) + # Best-effort duplicate guard (TOCTOU race is inherent to the + # Drive API — two concurrent creates can both pass this check). + existing = await client.list_spreadsheets(query=title, max_results=50) for f in existing: if f.get("name") == title: raise ValueError( @@ -1130,18 +1131,6 @@ async def everyrow_results_stdio( ) ] - # ── Google Sheets output ───────────────────────────────────── - if params.output_spreadsheet_title: - try: - return await _write_results_to_sheet(df, params.output_spreadsheet_title) - except Exception as e: - return [ - TextContent( - type="text", - text=f"Failed to write results to Google Sheet: {e!r}", - ) - ] - output_file = Path(params.output_path) save_result_to_csv(df, output_file) artifact_line = f"\nOutput artifact_id: {artifact_id}" if artifact_id else "" diff --git a/everyrow-mcp/tests/test_stdio_content.py b/everyrow-mcp/tests/test_stdio_content.py index 159cec09..f04dc739 100644 --- a/everyrow-mcp/tests/test_stdio_content.py +++ b/everyrow-mcp/tests/test_stdio_content.py @@ -585,10 +585,10 @@ def test_http_results_schema_includes_output_spreadsheet_title(self): schema = HttpResultsInput.model_json_schema() assert "output_spreadsheet_title" in schema["properties"] - def test_stdio_results_schema_includes_output_spreadsheet_title(self): - """StdioResultsInput schema includes output_spreadsheet_title for Google Sheets export.""" + def test_stdio_results_schema_excludes_output_spreadsheet_title(self): + """StdioResultsInput must not expose output_spreadsheet_title (requires HTTP OAuth).""" schema = StdioResultsInput.model_json_schema() - assert "output_spreadsheet_title" in schema["properties"] + assert "output_spreadsheet_title" not in schema["properties"] @pytest.mark.parametrize( "tool_name,def_name", From 33f1cb2ff67dd0591d4fcf36e30a6305a052fee9 Mon Sep 17 00:00:00 2001 From: Rafael Poyiadzi Date: Tue, 24 Feb 2026 11:22:40 +0000 Subject: [PATCH 05/11] Gate sheets tools behind ENABLE_SHEETS_TOOLS env flag Sheets tools were always registered in HTTP mode and removed in stdio. Now they're opt-in via `enable_sheets_tools` (default False), so HTTP deployments without Google OAuth configured don't expose them either. Also strips `output_spreadsheet_title` from the HTTP results schema when sheets are disabled. Co-Authored-By: Claude Opus 4.6 --- everyrow-mcp/src/everyrow_mcp/config.py | 4 ++++ everyrow-mcp/src/everyrow_mcp/server.py | 23 +++++++++++------------ everyrow-mcp/tests/test_mcp_e2e.py | 6 +++++- 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/everyrow-mcp/src/everyrow_mcp/config.py b/everyrow-mcp/src/everyrow_mcp/config.py index ebd67974..4cc83ce5 100644 --- a/everyrow-mcp/src/everyrow_mcp/config.py +++ b/everyrow-mcp/src/everyrow_mcp/config.py @@ -125,6 +125,10 @@ class Settings(BaseSettings): description="Upload rate limit sliding window in seconds (1 hour)", ) + enable_sheets_tools: bool = Field( + default=False, + description="Enable Google Sheets tools (requires HTTP mode with Google OAuth)", + ) everyrow_api_key: str | None = Field(default=None, repr=False) google_sheets_credentials_json: str | None = Field( default=None, diff --git a/everyrow-mcp/src/everyrow_mcp/server.py b/everyrow-mcp/src/everyrow_mcp/server.py index 5bc0d309..800c0257 100644 --- a/everyrow-mcp/src/everyrow_mcp/server.py +++ b/everyrow-mcp/src/everyrow_mcp/server.py @@ -8,7 +8,6 @@ from pydantic import BaseModel -import everyrow_mcp.sheets_tools import everyrow_mcp.tools # noqa: F401 — registers @mcp.tool() decorators from everyrow_mcp.app import get_instructions, mcp from everyrow_mcp.config import settings @@ -21,6 +20,10 @@ ) from everyrow_mcp.uploads import register_upload_tool +# Only register sheets tools when enabled (requires HTTP mode + Google OAuth) +if settings.enable_sheets_tools: + import everyrow_mcp.sheets_tools # noqa: F401 + class InputArgs(BaseModel): http: bool = False @@ -93,6 +96,13 @@ def main(): meta=_RESULTS_META, )(everyrow_results_http) + # Strip output_spreadsheet_title from results schema when sheets disabled + if not settings.enable_sheets_tools: + tool = mcp._tool_manager.get_tool("everyrow_results") + if tool: + http_def = tool.parameters.get("$defs", {}).get("HttpResultsInput", {}) + http_def.get("properties", {}).pop("output_spreadsheet_title", None) + if input_args.http: # ── HTTP mode logging ────────────────────────────────────── # INFO level so operational events show up in Cloud Logging. @@ -136,17 +146,6 @@ def main(): logging.error("Get an API key at https://everyrow.io/api-key") sys.exit(1) - # Sheets tools require HTTP mode (OAuth provides the Google token). - # Remove them from the tool manager so they don't appear in list_tools(). - for name in ( - "sheets_list", - "sheets_read", - "sheets_write", - "sheets_create", - "sheets_info", - ): - mcp._tool_manager.remove_tool(name) - mcp.run(transport=transport.value) diff --git a/everyrow-mcp/tests/test_mcp_e2e.py b/everyrow-mcp/tests/test_mcp_e2e.py index 3055dafa..0cb61a11 100644 --- a/everyrow-mcp/tests/test_mcp_e2e.py +++ b/everyrow-mcp/tests/test_mcp_e2e.py @@ -70,7 +70,11 @@ def _http_state(fake_redis): )(everyrow_results_http) with ( - override_settings(transport="streamable-http", upload_secret="test-secret"), + override_settings( + transport="streamable-http", + upload_secret="test-secret", + enable_sheets_tools=True, + ), patch.object(redis_store, "get_redis_client", return_value=fake_redis), patch("everyrow_mcp.tools.get_access_token", _fake_access_token), patch("everyrow_mcp.tool_helpers.get_access_token", _fake_access_token), From 499c8f9297a444db8d6f558db69667672f579720 Mon Sep 17 00:00:00 2001 From: Rafael Poyiadzi Date: Wed, 25 Feb 2026 09:35:52 +0000 Subject: [PATCH 06/11] Security hardening: Sheets token encryption, user-scoped storage, rate limits, input validation, error sanitization - Encrypt Google tokens at rest in Redis (C1) and key by user_id not "current" (C2) - Add per-user rate limiting on all 5 sheets tools (H3) - Sanitize error messages to avoid leaking response bodies or internal state (H1, L4) - Validate A1 range notation to block path traversal and injection chars (M1) - Sanitize Drive API query to strip non-alphanumeric chars (M6) - Narrow OAuth scope from drive.readonly to drive.metadata.readonly (M3) - Re-raise on token storage failure instead of silently swallowing (M4) - Log only exception type on refresh failure, not full stack trace (M2) - Use server-provided expires_in for token TTL instead of hardcoded constant (L1) - Move sheets import into main() so transport is set before registration (L3) - Mark sheets_write as destructiveHint=True (M5) with audit logging (H4) Co-Authored-By: Claude Opus 4.6 --- everyrow-mcp/src/everyrow_mcp/auth.py | 14 +- everyrow-mcp/src/everyrow_mcp/config.py | 6 + everyrow-mcp/src/everyrow_mcp/server.py | 8 +- .../src/everyrow_mcp/sheets_client.py | 70 +++++--- .../src/everyrow_mcp/sheets_models.py | 26 +++ everyrow-mcp/src/everyrow_mcp/sheets_tools.py | 78 ++++++++- everyrow-mcp/src/everyrow_mcp/tools.py | 3 +- everyrow-mcp/tests/test_sheets_tools.py | 160 ++++++++++++++++++ 8 files changed, 328 insertions(+), 37 deletions(-) diff --git a/everyrow-mcp/src/everyrow_mcp/auth.py b/everyrow-mcp/src/everyrow_mcp/auth.py index 54164362..ee674001 100644 --- a/everyrow-mcp/src/everyrow_mcp/auth.py +++ b/everyrow-mcp/src/everyrow_mcp/auth.py @@ -246,7 +246,7 @@ def _supabase_redirect_url(supabase_verifier: str) -> str: 'code_challenge_method': 's256', 'scopes': ( 'https://www.googleapis.com/auth/spreadsheets ' - 'https://www.googleapis.com/auth/drive.readonly' + 'https://www.googleapis.com/auth/drive.metadata.readonly' ), } ) @@ -479,9 +479,15 @@ async def _issue_token_response( if google_access_token: from everyrow_mcp.sheets_client import store_google_token # noqa: PLC0415 - await store_google_token( - "current", google_access_token, google_refresh_token or None - ) + try: + await store_google_token( + jwt_claims.get("sub", "unknown"), + google_access_token, + google_refresh_token or None, + expires_in=expires_in, + ) + except Exception: + logger.warning("Could not store Google token during token issue") rt_str = secrets.token_urlsafe(32) rt = EveryRowRefreshToken( diff --git a/everyrow-mcp/src/everyrow_mcp/config.py b/everyrow-mcp/src/everyrow_mcp/config.py index 4cc83ce5..32009284 100644 --- a/everyrow-mcp/src/everyrow_mcp/config.py +++ b/everyrow-mcp/src/everyrow_mcp/config.py @@ -129,6 +129,12 @@ class Settings(BaseSettings): default=False, description="Enable Google Sheets tools (requires HTTP mode with Google OAuth)", ) + sheets_rate_limit: PositiveInt = Field( + default=60, description="Max sheets ops per user per rate window" + ) + sheets_rate_window: PositiveInt = Field( + default=60, description="Sheets rate limit window in seconds" + ) everyrow_api_key: str | None = Field(default=None, repr=False) google_sheets_credentials_json: str | None = Field( default=None, diff --git a/everyrow-mcp/src/everyrow_mcp/server.py b/everyrow-mcp/src/everyrow_mcp/server.py index 800c0257..9ae0d10a 100644 --- a/everyrow-mcp/src/everyrow_mcp/server.py +++ b/everyrow-mcp/src/everyrow_mcp/server.py @@ -20,10 +20,6 @@ ) from everyrow_mcp.uploads import register_upload_tool -# Only register sheets tools when enabled (requires HTTP mode + Google OAuth) -if settings.enable_sheets_tools: - import everyrow_mcp.sheets_tools # noqa: F401 - class InputArgs(BaseModel): http: bool = False @@ -84,6 +80,10 @@ def main(): settings.transport = transport.value mcp._mcp_server.instructions = get_instructions(is_http=input_args.http) + # Register sheets tools after transport is set (they require HTTP mode) + if settings.enable_sheets_tools and settings.is_http: + import everyrow_mcp.sheets_tools # noqa: F401, PLC0415 + # tools.py registers everyrow_results_stdio by default. # Override with the HTTP variant when running in HTTP mode. # ToolManager.add_tool() is a no-op for existing names, so remove first. diff --git a/everyrow-mcp/src/everyrow_mcp/sheets_client.py b/everyrow-mcp/src/everyrow_mcp/sheets_client.py index 4477c9b9..52e4c974 100644 --- a/everyrow-mcp/src/everyrow_mcp/sheets_client.py +++ b/everyrow-mcp/src/everyrow_mcp/sheets_client.py @@ -9,12 +9,18 @@ import json import logging +import re import time from typing import Any import httpx -from everyrow_mcp.redis_store import build_key, get_redis_client +from everyrow_mcp.redis_store import ( + build_key, + decrypt_value, + encrypt_value, + get_redis_client, +) logger = logging.getLogger(__name__) @@ -22,15 +28,14 @@ DRIVE_API_BASE = "https://www.googleapis.com/drive/v3" # Google token TTL and refresh buffer -GOOGLE_TOKEN_TTL = 3600 # 1 hour +GOOGLE_TOKEN_TTL_DEFAULT = 3600 # 1 hour GOOGLE_TOKEN_REFRESH_BUFFER = 300 # refresh 5 min before expiry -GOOGLE_TOKEN_REDIS_TTL = 3600 # store for 1 hour in Redis # ── Token resolution ────────────────────────────────────────────────── -async def get_google_token() -> str: +async def get_google_token(user_id: str | None = None) -> str: """Resolve a valid Google access token from Redis. The token is stored during the OAuth callback when the user logs in @@ -38,15 +43,25 @@ async def get_google_token() -> str: Only available in HTTP mode — sheets tools are removed in stdio mode. """ + if user_id is None: + from mcp.server.auth.middleware.auth_context import ( # noqa: PLC0415 + get_access_token, + ) + + access_token = get_access_token() + user_id = access_token.client_id if access_token else None + if not user_id: + raise RuntimeError( + "No authenticated user. The user must log in with Google " + "(with Sheets scopes) to use Google Sheets tools." + ) redis = get_redis_client() - # NOTE: single-tenant — "current" key is shared. If multi-tenancy is - # needed, key by session/user ID instead. - token_key = build_key("google_token", "current") - token_data = await redis.get(token_key) - if token_data: - data = json.loads(token_data) + token_key = build_key("google_token", user_id) + raw = await redis.get(token_key) + if raw: + data = json.loads(decrypt_value(raw)) expires_at = data.get("expires_at", 0) if time.time() < expires_at - GOOGLE_TOKEN_REFRESH_BUFFER: return data["access_token"] @@ -55,11 +70,9 @@ async def get_google_token() -> str: refresh_token = data.get("refresh_token") if refresh_token: try: - return await _refresh_google_token_http(refresh_token) - except Exception: - logger.warning( - "Failed to refresh Google token, using existing", exc_info=True - ) + return await _refresh_google_token_http(refresh_token, user_id) + except Exception as e: + logger.warning("Failed to refresh Google token: %s", type(e).__name__) if time.time() < expires_at: return data["access_token"] @@ -69,7 +82,7 @@ async def get_google_token() -> str: ) -async def _refresh_google_token_http(refresh_token: str) -> str: +async def _refresh_google_token_http(refresh_token: str, user_id: str) -> str: """Refresh a Google access token using the Supabase-stored refresh token.""" from everyrow_mcp.config import settings # noqa: PLC0415 @@ -88,11 +101,14 @@ async def _refresh_google_token_http(refresh_token: str) -> str: provider_token = data.get("provider_token", "") provider_refresh_token = data.get("provider_refresh_token", refresh_token) + expires_in = data.get("expires_in") if not provider_token: raise RuntimeError("Supabase refresh did not return a Google provider_token") - await store_google_token("current", provider_token, provider_refresh_token) + await store_google_token( + user_id, provider_token, provider_refresh_token, expires_in=expires_in + ) return provider_token @@ -100,26 +116,31 @@ async def store_google_token( user_id: str, access_token: str, refresh_token: str | None = None, + *, + expires_in: int | None = None, ) -> None: """Store Google access token in Redis with TTL.""" try: redis = get_redis_client() except Exception: - return + logger.error("Failed to obtain Redis client for Google token storage") + raise + ttl = expires_in if expires_in and expires_in > 0 else GOOGLE_TOKEN_TTL_DEFAULT try: - data = { + data: dict[str, Any] = { "access_token": access_token, - "expires_at": time.time() + GOOGLE_TOKEN_TTL, + "expires_at": time.time() + ttl, } if refresh_token: data["refresh_token"] = refresh_token await redis.setex( build_key("google_token", user_id), - GOOGLE_TOKEN_REDIS_TTL, - json.dumps(data), + ttl, + encrypt_value(json.dumps(data)), ) except Exception: - logger.warning("Failed to store Google token in Redis for %s", user_id) + logger.error("Failed to store Google token in Redis for %s", user_id) + raise # ── Sheets API client ───────────────────────────────────────────────── @@ -224,8 +245,7 @@ async def list_spreadsheets( """ q = "mimeType='application/vnd.google-apps.spreadsheet' and trashed=false" if query: - # Escape single quotes in the user's query - safe_query = query.replace("'", "\\'") + safe_query = re.sub(r"[^a-zA-Z0-9 ]", "", query) q += f" and name contains '{safe_query}'" resp = await self._client.get( diff --git a/everyrow-mcp/src/everyrow_mcp/sheets_models.py b/everyrow-mcp/src/everyrow_mcp/sheets_models.py index 48778801..b0d46595 100644 --- a/everyrow-mcp/src/everyrow_mcp/sheets_models.py +++ b/everyrow-mcp/src/everyrow_mcp/sheets_models.py @@ -10,6 +10,10 @@ # Matches the 44-char alphanumeric spreadsheet ID in a Google Sheets URL _SHEETS_URL_RE = re.compile(r"/spreadsheets/d/([a-zA-Z0-9_-]+)") +# A1 notation range validation +_A1_RANGE_RE = re.compile(r"^[A-Za-z0-9_' !:$]+$") +_MAX_RANGE_LENGTH = 200 + def _extract_spreadsheet_id(v: str) -> str: """Accept a full Google Sheets URL or a bare spreadsheet ID. @@ -31,6 +35,18 @@ def _extract_spreadsheet_id(v: str) -> str: ) +def _validate_a1_range(v: str) -> str: + """Validate an A1 notation range string.""" + if len(v) > _MAX_RANGE_LENGTH: + raise ValueError(f"Range too long ({len(v)} chars, max {_MAX_RANGE_LENGTH})") + if not _A1_RANGE_RE.fullmatch(v): + raise ValueError( + "Invalid range: contains disallowed characters. " + "Use A1 notation (e.g. 'Sheet1!A1:D10')." + ) + return v + + class SheetsReadInput(BaseModel): """Input for the sheets_read tool.""" @@ -53,6 +69,11 @@ class SheetsReadInput(BaseModel): def extract_id(cls, v: str) -> str: return _extract_spreadsheet_id(v) + @field_validator("range") + @classmethod + def validate_range(cls, v: str) -> str: + return _validate_a1_range(v) + class SheetsWriteInput(BaseModel): """Input for the sheets_write tool.""" @@ -84,6 +105,11 @@ class SheetsWriteInput(BaseModel): def extract_id(cls, v: str) -> str: return _extract_spreadsheet_id(v) + @field_validator("range") + @classmethod + def validate_range(cls, v: str) -> str: + return _validate_a1_range(v) + class SheetsCreateInput(BaseModel): """Input for the sheets_create tool.""" diff --git a/everyrow-mcp/src/everyrow_mcp/sheets_tools.py b/everyrow-mcp/src/everyrow_mcp/sheets_tools.py index 6c1aa4cf..5cb6c10e 100644 --- a/everyrow-mcp/src/everyrow_mcp/sheets_tools.py +++ b/everyrow-mcp/src/everyrow_mcp/sheets_tools.py @@ -10,9 +10,12 @@ import logging import httpx +from mcp.server.auth.middleware.auth_context import get_access_token from mcp.types import TextContent, ToolAnnotations from everyrow_mcp.app import mcp +from everyrow_mcp.config import settings +from everyrow_mcp.redis_store import build_key, get_redis_client from everyrow_mcp.sheets_client import ( GoogleSheetsClient, get_google_token, @@ -40,8 +43,48 @@ def _error_message(e: Exception) -> str: return "Spreadsheet not found. Check the spreadsheet ID or URL." if status == 429: return "Rate limited by Google API. Please try again in a moment." - return f"Google API error (HTTP {status}): {e.response.text}" - return f"Error: {e!r}" + return f"Google API error (HTTP {status}). Please try again." + return f"Sheets operation failed ({type(e).__name__}). Please try again." + + +async def _check_sheets_rate_limit() -> list[TextContent] | None: + """Enforce per-user rate limiting on sheets operations. + + Returns an error response if the rate limit is exceeded, or ``None`` if OK. + Only active in HTTP mode; always returns ``None`` for stdio. + Fail-open if Redis is unavailable. + """ + if not settings.is_http: + return None + + try: + access_token = get_access_token() + user_id = access_token.client_id if access_token else "anonymous" + redis = get_redis_client() + rl_key = build_key("ratelimit", "sheets", user_id) + async with redis.pipeline() as pipe: + pipe.incr(rl_key) + pipe.expire(rl_key, settings.sheets_rate_window, nx=True) + count, _ = await pipe.execute() + if count > settings.sheets_rate_limit: + return [ + TextContent( + type="text", + text="Sheets rate limit exceeded. Please wait before trying again.", + ) + ] + except Exception: + logger.debug("Sheets rate limit check failed (fail-open)", exc_info=True) + return None + + +def _audit_user_id() -> str: + """Best-effort user ID for audit logs.""" + try: + token = get_access_token() + return token.client_id if token else "unknown" + except Exception: + return "unknown" @mcp.tool( @@ -56,6 +99,8 @@ def _error_message(e: Exception) -> str: ) async def sheets_list(params: SheetsListInput) -> list[TextContent]: """List the user's Google Sheets, optionally filtered by name.""" + if denied := await _check_sheets_rate_limit(): + return denied try: token = await get_google_token() async with GoogleSheetsClient(token) as client: @@ -101,6 +146,8 @@ async def sheets_read(params: SheetsReadInput) -> list[TextContent]: everyrow_agent(input_json=data, task="Research each company") sheets_write(spreadsheet_id="...", data=enriched_results) """ + if denied := await _check_sheets_rate_limit(): + return denied try: token = await get_google_token() async with GoogleSheetsClient(token) as client: @@ -133,7 +180,7 @@ async def sheets_read(params: SheetsReadInput) -> list[TextContent]: annotations=ToolAnnotations( title="Write to Google Sheet", readOnlyHint=False, - destructiveHint=False, + destructiveHint=True, idempotentHint=False, openWorldHint=True, ), @@ -150,6 +197,8 @@ async def sheets_write(params: SheetsWriteInput) -> list[TextContent]: Use append=True to add rows after existing data instead of overwriting. """ + if denied := await _check_sheets_rate_limit(): + return denied try: token = await get_google_token() values = records_to_values(params.data) @@ -165,6 +214,12 @@ async def sheets_write(params: SheetsWriteInput) -> list[TextContent]: updated_rows = result.get("updates", {}).get( "updatedRows", len(params.data) ) + logger.info( + "AUDIT sheets_write user=%s spreadsheet=%s rows=%s append=true", + _audit_user_id(), + params.spreadsheet_id, + updated_rows, + ) return [ TextContent( type="text", @@ -177,6 +232,12 @@ async def sheets_write(params: SheetsWriteInput) -> list[TextContent]: ) updated_range = result.get("updatedRange", params.range) updated_rows = result.get("updatedRows", len(params.data) + 1) + logger.info( + "AUDIT sheets_write user=%s spreadsheet=%s rows=%s append=false", + _audit_user_id(), + params.spreadsheet_id, + updated_rows, + ) return [ TextContent( type="text", @@ -202,6 +263,8 @@ async def sheets_create(params: SheetsCreateInput) -> list[TextContent]: Returns the spreadsheet ID and URL. """ + if denied := await _check_sheets_rate_limit(): + return denied try: token = await get_google_token() @@ -220,6 +283,13 @@ async def sheets_create(params: SheetsCreateInput) -> list[TextContent]: except Exception as e: return [TextContent(type="text", text=_error_message(e))] + logger.info( + "AUDIT sheets_create user=%s spreadsheet=%s rows=%s", + _audit_user_id(), + spreadsheet_id, + len(params.data) if params.data else 0, + ) + result = { "spreadsheet_id": spreadsheet_id, "url": url, @@ -248,6 +318,8 @@ async def sheets_create(params: SheetsCreateInput) -> list[TextContent]: ) async def sheets_info(params: SheetsInfoInput) -> list[TextContent]: """Get metadata about a Google Sheet: title, sheet names, and dimensions.""" + if denied := await _check_sheets_rate_limit(): + return denied try: token = await get_google_token() diff --git a/everyrow-mcp/src/everyrow_mcp/tools.py b/everyrow-mcp/src/everyrow_mcp/tools.py index 1319af52..6e10c790 100644 --- a/everyrow-mcp/src/everyrow_mcp/tools.py +++ b/everyrow-mcp/src/everyrow_mcp/tools.py @@ -1219,10 +1219,11 @@ async def everyrow_results_http( try: return await _write_results_to_sheet(df, params.output_spreadsheet_title) except Exception as e: + logger.exception("Failed to write results to Google Sheet") return [ TextContent( type="text", - text=f"Failed to write results to Google Sheet: {e!r}", + text=f"Failed to write results to Google Sheet ({type(e).__name__}). Please try again.", ) ] diff --git a/everyrow-mcp/tests/test_sheets_tools.py b/everyrow-mcp/tests/test_sheets_tools.py index 7a44ab53..b36c153b 100644 --- a/everyrow-mcp/tests/test_sheets_tools.py +++ b/everyrow-mcp/tests/test_sheets_tools.py @@ -26,6 +26,7 @@ _extract_spreadsheet_id, ) from everyrow_mcp.sheets_tools import ( + _error_message, sheets_create, sheets_info, sheets_list, @@ -33,6 +34,18 @@ sheets_write, ) + +@pytest.fixture(autouse=True) +def _no_rate_limit(): + """Disable rate limiting for all tool tests.""" + with patch( + "everyrow_mcp.sheets_tools._check_sheets_rate_limit", + new_callable=AsyncMock, + return_value=None, + ): + yield + + # ── Model validation tests ─────────────────────────────────────────── @@ -568,3 +581,150 @@ async def test_max_results(self, _mock_google_token): call_params = mock_get.call_args[1]["params"] assert call_params["pageSize"] == "5" + + +# ── Range validation tests (M1) ───────────────────────────────────── + + +class TestRangeValidation: + """Test A1 notation range validation on SheetsReadInput and SheetsWriteInput.""" + + _VALID_ID = "abc123def456ghi789jkl012mno345pqr678stu901v" + + def test_simple_range(self): + inp = SheetsReadInput(spreadsheet_id=self._VALID_ID, range="Sheet1!A1:D10") + assert inp.range == "Sheet1!A1:D10" + + def test_sheet_name_only(self): + inp = SheetsReadInput(spreadsheet_id=self._VALID_ID, range="Sheet1") + assert inp.range == "Sheet1" + + def test_quoted_sheet_name(self): + inp = SheetsReadInput(spreadsheet_id=self._VALID_ID, range="'My Sheet'!A1:B5") + assert inp.range == "'My Sheet'!A1:B5" + + def test_absolute_refs(self): + inp = SheetsReadInput(spreadsheet_id=self._VALID_ID, range="Sheet1!$A$1:$D$10") + assert inp.range == "Sheet1!$A$1:$D$10" + + def test_column_range(self): + inp = SheetsReadInput(spreadsheet_id=self._VALID_ID, range="Sheet1!B:B") + assert inp.range == "Sheet1!B:B" + + def test_rejects_url_significant_chars(self): + with pytest.raises(Exception, match="Invalid range"): + SheetsReadInput(spreadsheet_id=self._VALID_ID, range="Sheet1/../etc/passwd") + + def test_rejects_path_traversal(self): + with pytest.raises(Exception, match="Invalid range"): + SheetsReadInput(spreadsheet_id=self._VALID_ID, range="../../secret") + + def test_rejects_semicolons(self): + with pytest.raises(Exception, match="Invalid range"): + SheetsReadInput(spreadsheet_id=self._VALID_ID, range="Sheet1;DROP TABLE") + + def test_rejects_too_long(self): + with pytest.raises(Exception, match="Range too long"): + SheetsReadInput(spreadsheet_id=self._VALID_ID, range="A" * 201) + + def test_write_input_validates_too(self): + with pytest.raises(Exception, match="Invalid range"): + SheetsWriteInput( + spreadsheet_id=self._VALID_ID, + range="Sheet1/../hack", + data=[{"a": "1"}], + ) + + def test_write_input_valid(self): + inp = SheetsWriteInput( + spreadsheet_id=self._VALID_ID, + range="Sheet1!A1:B5", + data=[{"a": "1"}], + ) + assert inp.range == "Sheet1!A1:B5" + + +# ── Error message sanitization tests (H1) ──────────────────────────── + + +class TestErrorMessageSanitization: + """Ensure error messages don't leak internal details.""" + + def test_http_500_no_response_body(self): + """HTTP 500 error should not include response body.""" + resp = httpx.Response( + status_code=500, + text="Internal server error with secret details", + request=httpx.Request("GET", "https://sheets.googleapis.com/test"), + ) + exc = httpx.HTTPStatusError("error", request=resp.request, response=resp) + msg = _error_message(exc) + assert "secret details" not in msg + assert "500" in msg + assert "Please try again" in msg + + def test_catchall_no_repr(self): + """Catch-all should not include full repr of the exception.""" + exc = RuntimeError("sensitive internal state: token=abc123") + msg = _error_message(exc) + assert "sensitive internal state" not in msg + assert "token=abc123" not in msg + assert "RuntimeError" in msg + assert "Please try again" in msg + + def test_known_statuses_unchanged(self): + """403/404/429 messages should remain user-friendly.""" + for status, keyword in [ + (403, "Permission"), + (404, "not found"), + (429, "Rate limited"), + ]: + resp = httpx.Response( + status_code=status, + text="details", + request=httpx.Request("GET", "https://example.com"), + ) + exc = httpx.HTTPStatusError("err", request=resp.request, response=resp) + msg = _error_message(exc) + assert keyword in msg + assert "details" not in msg + + +# ── Drive query sanitization tests (M6) ────────────────────────────── + + +class TestDriveQuerySanitization: + """Ensure special characters are stripped from Drive API queries.""" + + @pytest.mark.asyncio + async def test_special_chars_stripped(self): + """Quotes and special chars should be removed from the query.""" + mock_resp = _mock_response({"files": []}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ) as mock_get: + async with GoogleSheetsClient("fake-token") as client: + await client.list_spreadsheets(query="Budget' OR 1=1--") + + call_params = mock_get.call_args[1]["params"] + q = call_params["q"] + # Extract just the user query part from: ... name contains 'SANITIZED' + # The sanitized result should be "Budget OR 11" (only alphanum + spaces) + assert "name contains 'Budget OR 11'" in q + # Injection chars must not survive + assert "1=1--" not in q + + @pytest.mark.asyncio + async def test_clean_query_passes_through(self): + """Alphanumeric queries with spaces should pass through.""" + mock_resp = _mock_response({"files": []}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ) as mock_get: + async with GoogleSheetsClient("fake-token") as client: + await client.list_spreadsheets(query="Budget 2024") + + call_params = mock_get.call_args[1]["params"] + assert "Budget 2024" in call_params["q"] From eb2067adbb52f0677df4e69d20d4c5090895c4b8 Mon Sep 17 00:00:00 2001 From: Rafael Poyiadzi Date: Wed, 25 Feb 2026 09:44:31 +0000 Subject: [PATCH 07/11] Fix formatting: remove extra blank line in utils.py Co-Authored-By: Claude Opus 4.6 --- everyrow-mcp/src/everyrow_mcp/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/everyrow-mcp/src/everyrow_mcp/utils.py b/everyrow-mcp/src/everyrow_mcp/utils.py index 4223020b..d3820cbe 100644 --- a/everyrow-mcp/src/everyrow_mcp/utils.py +++ b/everyrow-mcp/src/everyrow_mcp/utils.py @@ -359,7 +359,6 @@ async def fetch_csv_from_url(url: str) -> pd.DataFrame: ) - def validate_csv_path(path: str) -> None: """Validate that a CSV file exists and is readable. From f5f4bb389df41beb7e31911f6d437f51dd48d424 Mon Sep 17 00:00:00 2001 From: Rafael Poyiadzi Date: Wed, 25 Feb 2026 09:56:00 +0000 Subject: [PATCH 08/11] Fix None values written as literal "None" in Google Sheets records_to_values used str(record.get(h, "")) which only defaulted to "" for missing keys. When the key existed with value None (from pandas NaN), str(None) produced the string "None" in sheet cells instead of empty. Co-Authored-By: Claude Opus 4.6 --- everyrow-mcp/src/everyrow_mcp/sheets_client.py | 7 ++++++- everyrow-mcp/tests/test_sheets_tools.py | 7 +++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/everyrow-mcp/src/everyrow_mcp/sheets_client.py b/everyrow-mcp/src/everyrow_mcp/sheets_client.py index 52e4c974..a26ee968 100644 --- a/everyrow-mcp/src/everyrow_mcp/sheets_client.py +++ b/everyrow-mcp/src/everyrow_mcp/sheets_client.py @@ -301,5 +301,10 @@ def records_to_values(records: list[dict[str, Any]]) -> list[list[str]]: rows = [headers] for record in records: - rows.append([str(record.get(h, "")) for h in headers]) + rows.append( + [ + str(v) if v is not None else "" + for v in (record.get(h, "") for h in headers) + ] + ) return rows diff --git a/everyrow-mcp/tests/test_sheets_tools.py b/everyrow-mcp/tests/test_sheets_tools.py index b36c153b..0694342b 100644 --- a/everyrow-mcp/tests/test_sheets_tools.py +++ b/everyrow-mcp/tests/test_sheets_tools.py @@ -194,6 +194,13 @@ def test_missing_keys_become_empty(self): values = records_to_values(records) assert values[2] == ["3", ""] + def test_none_values_become_empty(self): + """None values (e.g. from pandas NaN) should become empty strings, not 'None'.""" + records = [{"name": "Alice", "age": None}, {"name": None, "age": "30"}] + values = records_to_values(records) + assert values[1] == ["Alice", ""] + assert values[2] == ["", "30"] + # ── Client tests (mocked httpx) ───────────────────────────────────── From 87de872b16c8b32379cb8fb6d7c14f9fa275fe89 Mon Sep 17 00:00:00 2001 From: Rafael Poyiadzi Date: Wed, 25 Feb 2026 09:58:58 +0000 Subject: [PATCH 09/11] Add overwrite guards to sheets_write and sheets_create sheets_write: reads target range before overwriting. If data exists, returns a warning asking the user to confirm_overwrite=True or use append=True. Empty ranges proceed without confirmation. sheets_create: checks Drive for an existing spreadsheet with the same title before creating. Returns a warning if a duplicate is found. Co-Authored-By: Claude Opus 4.6 --- .../src/everyrow_mcp/sheets_models.py | 5 ++ everyrow-mcp/src/everyrow_mcp/sheets_tools.py | 32 +++++++ everyrow-mcp/tests/test_sheets_tools.py | 85 ++++++++++++++++++- 3 files changed, 118 insertions(+), 4 deletions(-) diff --git a/everyrow-mcp/src/everyrow_mcp/sheets_models.py b/everyrow-mcp/src/everyrow_mcp/sheets_models.py index b0d46595..3b2c1a55 100644 --- a/everyrow-mcp/src/everyrow_mcp/sheets_models.py +++ b/everyrow-mcp/src/everyrow_mcp/sheets_models.py @@ -99,6 +99,11 @@ class SheetsWriteInput(BaseModel): default=False, description="If True, append after existing data instead of overwriting.", ) + confirm_overwrite: bool = Field( + default=False, + description="Must be set to True to overwrite existing data when append=False. " + "The tool will check if the range has data and warn you first.", + ) @field_validator("spreadsheet_id") @classmethod diff --git a/everyrow-mcp/src/everyrow_mcp/sheets_tools.py b/everyrow-mcp/src/everyrow_mcp/sheets_tools.py index 5cb6c10e..b100d077 100644 --- a/everyrow-mcp/src/everyrow_mcp/sheets_tools.py +++ b/everyrow-mcp/src/everyrow_mcp/sheets_tools.py @@ -227,6 +227,23 @@ async def sheets_write(params: SheetsWriteInput) -> list[TextContent]: ) ] else: + # Pre-check: warn if the target range already has data + if not params.confirm_overwrite: + existing = await client.read_range( + params.spreadsheet_id, cell_range=params.range + ) + if existing: + existing_rows = len(existing) + return [ + TextContent( + type="text", + text=f"The range '{params.range}' already contains {existing_rows} rows " + f"(including headers). Writing will overwrite this data. " + f"To proceed, call again with confirm_overwrite=True, " + f"or use append=True to add rows after existing data.", + ) + ] + result = await client.write_range( params.spreadsheet_id, cell_range=params.range, values=values ) @@ -269,6 +286,21 @@ async def sheets_create(params: SheetsCreateInput) -> list[TextContent]: token = await get_google_token() async with GoogleSheetsClient(token) as client: + # Duplicate title guard + existing = await client.list_spreadsheets( + query=params.title, max_results=50 + ) + for f in existing: + if f.get("name") == params.title: + return [ + TextContent( + type="text", + text=f"A spreadsheet named '{params.title}' already exists " + f"(id: {f['id']}). Pick a different title to avoid " + f"creating a duplicate.", + ) + ] + metadata = await client.create_spreadsheet(params.title) spreadsheet_id = metadata["spreadsheetId"] url = metadata.get( diff --git a/everyrow-mcp/tests/test_sheets_tools.py b/everyrow-mcp/tests/test_sheets_tools.py index 0694342b..328f6665 100644 --- a/everyrow-mcp/tests/test_sheets_tools.py +++ b/everyrow-mcp/tests/test_sheets_tools.py @@ -384,7 +384,7 @@ async def test_url_extraction(self, _mock_google_token): class TestSheetsWriteTool: @pytest.mark.asyncio - async def test_write_overwrite(self, _mock_google_token): + async def test_write_overwrite_confirmed(self, _mock_google_token): mock_resp = _mock_response( { "updatedRange": "Sheet1!A1:B3", @@ -399,6 +399,51 @@ async def test_write_overwrite(self, _mock_google_token): SheetsWriteInput( spreadsheet_id="abc123def456ghi789jkl012mno345pqr678stu901v", data=[{"name": "Alice"}, {"name": "Bob"}], + confirm_overwrite=True, + ) + ) + + assert "Wrote" in result[0].text + + @pytest.mark.asyncio + async def test_write_overwrite_warns_if_existing_data(self, _mock_google_token): + """Writing without confirm_overwrite warns when range has data.""" + read_resp = _mock_response({"values": [["name"], ["Alice"]]}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=read_resp + ): + result = await sheets_write( + SheetsWriteInput( + spreadsheet_id="abc123def456ghi789jkl012mno345pqr678stu901v", + data=[{"name": "Bob"}], + ) + ) + + assert "already contains" in result[0].text + assert "confirm_overwrite" in result[0].text + + @pytest.mark.asyncio + async def test_write_overwrite_proceeds_on_empty_range(self, _mock_google_token): + """Writing without confirm_overwrite proceeds when range is empty.""" + read_resp = _mock_response({}) # empty range + write_resp = _mock_response({"updatedRange": "Sheet1!A1:B2", "updatedRows": 2}) + + with ( + patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=read_resp + ), + patch.object( + httpx.AsyncClient, + "put", + new_callable=AsyncMock, + return_value=write_resp, + ), + ): + result = await sheets_write( + SheetsWriteInput( + spreadsheet_id="abc123def456ghi789jkl012mno345pqr678stu901v", + data=[{"name": "Bob"}], ) ) @@ -432,15 +477,27 @@ async def test_write_append(self, _mock_google_token): class TestSheetsCreateTool: @pytest.mark.asyncio async def test_create_empty(self, _mock_google_token): - mock_resp = _mock_response( + list_resp = _mock_response({"files": []}) # no duplicates + create_resp = _mock_response( { "spreadsheetId": "new-id-123", "spreadsheetUrl": "https://docs.google.com/spreadsheets/d/new-id-123", } ) - with patch.object( - httpx.AsyncClient, "post", new_callable=AsyncMock, return_value=mock_resp + with ( + patch.object( + httpx.AsyncClient, + "get", + new_callable=AsyncMock, + return_value=list_resp, + ), + patch.object( + httpx.AsyncClient, + "post", + new_callable=AsyncMock, + return_value=create_resp, + ), ): result = await sheets_create(SheetsCreateInput(title="Test")) @@ -451,6 +508,7 @@ async def test_create_empty(self, _mock_google_token): @pytest.mark.asyncio async def test_create_with_data(self, _mock_google_token): + list_resp = _mock_response({"files": []}) # no duplicates create_resp = _mock_response( { "spreadsheetId": "new-id-456", @@ -460,6 +518,12 @@ async def test_create_with_data(self, _mock_google_token): write_resp = _mock_response({"updatedRows": 2}) with ( + patch.object( + httpx.AsyncClient, + "get", + new_callable=AsyncMock, + return_value=list_resp, + ), patch.object( httpx.AsyncClient, "post", @@ -480,6 +544,19 @@ async def test_create_with_data(self, _mock_google_token): data = json.loads(result[0].text) assert data["rows_written"] == 1 + @pytest.mark.asyncio + async def test_create_rejects_duplicate_title(self, _mock_google_token): + """sheets_create warns when a spreadsheet with the same title exists.""" + list_resp = _mock_response({"files": [{"id": "existing-id", "name": "Budget"}]}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=list_resp + ): + result = await sheets_create(SheetsCreateInput(title="Budget")) + + assert "already exists" in result[0].text + assert "existing-id" in result[0].text + class TestSheetsInfoTool: @pytest.mark.asyncio From 6680ceaf329dfd23edc63978faf737b9e1fefb97 Mon Sep 17 00:00:00 2001 From: Rafael Poyiadzi Date: Fri, 27 Feb 2026 17:06:45 +0000 Subject: [PATCH 10/11] Address Claude review: token error logging, dead config, rate limit bypass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Upgrade silent Google token storage warning to logger.error with exc_info so failures are auditable - Remove unused google_sheets_credentials_json config field (sheets tools are HTTP-only) - Add rate limit check to _write_results_to_sheet so the everyrow_results → output_spreadsheet_title path is rate-limited Co-Authored-By: Claude Opus 4.6 --- everyrow-mcp/src/everyrow_mcp/auth.py | 6 +++++- everyrow-mcp/src/everyrow_mcp/config.py | 5 ----- everyrow-mcp/src/everyrow_mcp/tools.py | 6 +++++- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/everyrow-mcp/src/everyrow_mcp/auth.py b/everyrow-mcp/src/everyrow_mcp/auth.py index ee674001..99228e0c 100644 --- a/everyrow-mcp/src/everyrow_mcp/auth.py +++ b/everyrow-mcp/src/everyrow_mcp/auth.py @@ -487,7 +487,11 @@ async def _issue_token_response( expires_in=expires_in, ) except Exception: - logger.warning("Could not store Google token during token issue") + logger.error( + "Google token storage failed for user=%s — Sheets tools will be unavailable", + jwt_claims.get("sub", "unknown"), + exc_info=True, + ) rt_str = secrets.token_urlsafe(32) rt = EveryRowRefreshToken( diff --git a/everyrow-mcp/src/everyrow_mcp/config.py b/everyrow-mcp/src/everyrow_mcp/config.py index 32009284..2cbedb27 100644 --- a/everyrow-mcp/src/everyrow_mcp/config.py +++ b/everyrow-mcp/src/everyrow_mcp/config.py @@ -136,11 +136,6 @@ class Settings(BaseSettings): default=60, description="Sheets rate limit window in seconds" ) everyrow_api_key: str | None = Field(default=None, repr=False) - google_sheets_credentials_json: str | None = Field( - default=None, - description="Path to a Google service account JSON file or inline JSON. " - "Required for Google Sheets tools in stdio mode.", - ) @property def is_http(self) -> bool: diff --git a/everyrow-mcp/src/everyrow_mcp/tools.py b/everyrow-mcp/src/everyrow_mcp/tools.py index 6e10c790..8adb18bc 100644 --- a/everyrow-mcp/src/everyrow_mcp/tools.py +++ b/everyrow-mcp/src/everyrow_mcp/tools.py @@ -122,6 +122,10 @@ async def _write_results_to_sheet( get_google_token, records_to_values, ) + from everyrow_mcp.sheets_tools import _check_sheets_rate_limit # noqa: PLC0415 + + if denied := await _check_sheets_rate_limit(): + return denied token = await get_google_token() async with GoogleSheetsClient(token) as client: @@ -1146,7 +1150,7 @@ async def everyrow_results_stdio( ] -async def everyrow_results_http( +async def everyrow_results_http( # noqa: PLR0911 params: HttpResultsInput, ctx: EveryRowContext ) -> list[TextContent]: """Retrieve results from a completed everyrow task. From 8bbcee4e31d35f95d60de9e6319494518356350b Mon Sep 17 00:00:00 2001 From: Rafael Poyiadzi Date: Fri, 27 Feb 2026 17:40:35 +0000 Subject: [PATCH 11/11] Pass ENABLE_SHEETS_TOOLS and EXTRA_ALLOWED_HOSTS through docker-compose.local.yaml Template env vars so they can be overridden at runtime instead of being hardcoded. Required for local tunnel testing with sheets tools. Co-Authored-By: Claude Opus 4.6 --- everyrow-mcp/deploy/docker-compose.local.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/everyrow-mcp/deploy/docker-compose.local.yaml b/everyrow-mcp/deploy/docker-compose.local.yaml index c963e846..3d288716 100644 --- a/everyrow-mcp/deploy/docker-compose.local.yaml +++ b/everyrow-mcp/deploy/docker-compose.local.yaml @@ -1,4 +1,4 @@ -# Local development overrides — NOT for production use. +# Local development overrides. # Usage: docker compose -f docker-compose.yaml -f docker-compose.local.yaml up services: redis: @@ -9,4 +9,5 @@ services: environment: MCP_SERVER_URL: "${MCP_SERVER_URL:-http://localhost:8000}" TRUST_PROXY_HEADERS: "${TRUST_PROXY_HEADERS:-false}" - EXTRA_ALLOWED_HOSTS: "host.docker.internal:*" # local dev only — widens DNS rebinding allowlist + ENABLE_SHEETS_TOOLS: "${ENABLE_SHEETS_TOOLS:-false}" + EXTRA_ALLOWED_HOSTS: "${EXTRA_ALLOWED_HOSTS:-}"