diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 2b3c495d..ec7956cc 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -11,7 +11,7 @@ "name": "everyrow", "source": "./", "description": "Give Claude Code a research team. Forecast, score, classify, or research every row of a dataset.", - "version": "0.4.0" + "version": "0.4.1" } ] } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index f5c2e2a2..9d3419e3 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "everyrow", "description": "Give Claude Code a research team. Forecast, score, classify, or research every row of a dataset.", - "version": "0.4.0", + "version": "0.4.1", "author": { "name": "FutureSearch" }, diff --git a/README.md b/README.md index 761275e9..96bce6e1 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ Spin up a team of: | [**Agents**](https://everyrow.io/docs/reference/RESEARCH) | Research, then analyze | 1–3¢/researcher | 10k rows | | [**Forecasters**](https://everyrow.io/docs/reference/FORECAST) | Predict outcomes | 20-50¢/researcher | 10k rows | | [**Scorers**](https://everyrow.io/docs/reference/RANK) | Research, then score | 1-5¢/researcher | 10k rows | -| [**Classifiers**](https://everyrow.io/docs/reference/SCREEN) | Research, then categorize | 0.1-0.7¢/researcher | 10k rows | +| [**Classifiers**](https://everyrow.io/docs/reference/CLASSIFY) | Research, then categorize | 0.1-0.7¢/researcher | 10k rows | | [**Matchers**](https://everyrow.io/docs/reference/MERGE) | Find matching rows | 0.2-0.5¢/researcher | 20k rows | See the full [API reference](https://everyrow.io/docs/api), [guides](https://everyrow.io/docs/guides), and [case studies](https://everyrow.io/docs/case-studies), (for example, see our [case study](https://everyrow.io/docs/case-studies/llm-web-research-agents-at-scale) running a `Research` task on 10k rows, running agents that used 120k LLM calls.) @@ -203,23 +203,23 @@ Requires Python 3.12+. Then you can use the SDK directly: ```python import asyncio import pandas as pd -from everyrow.ops import screen -from pydantic import BaseModel, Field +from everyrow.ops import classify companies = pd.DataFrame([ - {"company": "Airtable",}, {"company": "Vercel",}, {"company": "Notion",} + {"company": "Apple"}, {"company": "JPMorgan Chase"}, {"company": "ExxonMobil"}, + {"company": "Tesla"}, {"company": "Pfizer"}, {"company": "Duke Energy"}, ]) -class JobScreenResult(BaseModel): - qualifies: bool = Field(description="True if company lists jobs with all criteria") - async def main(): - result = await screen( - task="""Qualifies if: 1. Remote-friendly, 2. Senior, and 3. Discloses salary""", + result = await classify( + task="Classify this company by its GICS industry sector", + categories=["Energy", "Materials", "Industrials", "Consumer Discretionary", + "Consumer Staples", "Health Care", "Financials", + "Information Technology", "Communication Services", + "Utilities", "Real Estate"], input=companies, - response_model=JobScreenResult, ) - print(result.data.head()) + print(result.data[["company", "classification"]]) asyncio.run(main()) ``` diff --git a/docs/api.md b/docs/api.md index f777fa75..0349dc20 100644 --- a/docs/api.md +++ b/docs/api.md @@ -1,11 +1,11 @@ --- title: API Reference -description: Complete API reference for everyrow — screen, rank, dedupe, merge, forecast, and research operations powered by LLM web research agents. +description: Complete API reference for everyrow — screen, rank, dedupe, merge, classify, forecast, and research operations powered by LLM web research agents. --- # API Reference -Six operations for processing data with LLM-powered web research agents. Each takes a DataFrame and a natural-language instruction. +Seven operations for processing data with LLM-powered web research agents. Each takes a DataFrame and a natural-language instruction. ## screen @@ -55,6 +55,20 @@ result = await merge(task=..., left_table=df1, right_table=df2) Guides: [Fuzzy Join Without Matching Keys](/docs/fuzzy-join-without-keys) Case Studies: [LLM Merging at Scale](/docs/case-studies/llm-powered-merging-at-scale), [Match Software Vendors to Requirements](/docs/case-studies/match-software-vendors-to-requirements) +## classify + +```python +result = await classify( + task="Classify each company by its primary industry sector", + categories=["Technology", "Finance", "Healthcare", "Energy"], + input=companies_df, +) +``` + +`classify` assigns each row in a DataFrame to one of the provided categories using web research that scales to the difficulty of the classification. Supports binary (yes/no) and multi-category classification with optional reasoning output. + +[Full reference →](/docs/reference/CLASSIFY) + ## forecast ```python diff --git a/docs/case-studies.md b/docs/case-studies.md index c09d02c5..ee98bf1f 100644 --- a/docs/case-studies.md +++ b/docs/case-studies.md @@ -1,12 +1,16 @@ --- title: Case Studies -description: Runnable case studies demonstrating everyrow operations on real datasets — screen, rank, dedupe, merge, and research with LLM-powered agents. +description: Runnable case studies demonstrating everyrow operations on real datasets — classify, screen, rank, dedupe, merge, forecast, and research with LLM-powered agents. --- # Case Studies Runnable case studies with real datasets. Each case study demonstrates an everyrow operation end-to-end with output you can inspect. +## Classify + +- [Classify and Label Data](/docs/classify-dataframe-rows-llm) + ## Screen - [Screen 10,000 Rows](/docs/case-studies/llm-powered-screening-at-scale) @@ -33,6 +37,10 @@ Runnable case studies with real datasets. Each case study demonstrates an everyr - [Link Records Across Medical Datasets](/docs/case-studies/match-clinical-trials-to-papers) - [Merge Costs and Speed](/docs/case-studies/understanding-costs-and-speed-for-merge) +## Forecast + +- [Automating Forecasting Questions](https://futuresearch.ai/automating-forecasting-questions/) + ## Multi-Method - [Multi-Stage Lead Qualification](/docs/case-studies/multi-stage-lead-qualification) diff --git a/docs/getting-started.md b/docs/getting-started.md index 1361ea62..c998186a 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -150,13 +150,15 @@ df = await fetch_task_data("12345678-1234-1234-1234-123456789abc") ## Operations -| Operation | Description | -| ------------------------------- | ------------------------------------------ | -| [Screen](/reference/SCREEN) | Filter rows by criteria requiring judgment | -| [Rank](/reference/RANK) | Score rows by qualitative factors | -| [Dedupe](/reference/DEDUPE) | Deduplicate when fuzzy matching fails | -| [Merge](/reference/MERGE) | Join tables when keys don't match exactly | -| [Research](/reference/RESEARCH) | Run web agents to research each row | +| Operation | Description | +| --------------------------------- | ------------------------------------------ | +| [Classify](/reference/CLASSIFY) | Categorize rows into predefined classes | +| [Screen](/reference/SCREEN) | Filter rows by criteria requiring judgment | +| [Rank](/reference/RANK) | Score rows by qualitative factors | +| [Dedupe](/reference/DEDUPE) | Deduplicate when fuzzy matching fails | +| [Merge](/reference/MERGE) | Join tables when keys don't match exactly | +| [Forecast](/reference/FORECAST) | Predict probabilities for binary questions | +| [Research](/reference/RESEARCH) | Run web agents to research each row | ## See Also diff --git a/docs/guides.md b/docs/guides.md index 9b513caf..7167b3d2 100644 --- a/docs/guides.md +++ b/docs/guides.md @@ -1,12 +1,16 @@ --- title: Guides -description: Step-by-step tutorials for using everyrow to screen, rank, dedupe, merge, and research data with LLM-powered agents. +description: Step-by-step tutorials for using everyrow to classify, screen, rank, dedupe, merge, forecast, and research data with LLM-powered agents. --- # Guides Practical walkthroughs that show you how to use everyrow for common data processing tasks. Each guide covers a single operation end-to-end with working code. +## Classify + +- [Classify and Label Rows](/docs/classify-dataframe-rows-llm) + ## Screen - [Filter a Dataset Intelligently](/docs/filter-dataframe-with-llm) @@ -25,8 +29,11 @@ Practical walkthroughs that show you how to use everyrow for common data process - [Join Tables Without Shared Keys](/docs/fuzzy-join-without-keys) +## Forecast + +- [Forecast Binary Questions](/docs/reference/FORECAST) + ## Research - [Add a Column via Web Research](/docs/add-column-web-lookup) -- [Classify and Label Rows](/docs/classify-dataframe-rows-llm) - [LLM-Powered Data Labeling](/docs/active-learning-llm-oracle) diff --git a/docs/mcp-server.md b/docs/mcp-server.md index 932aab75..658fcc79 100644 --- a/docs/mcp-server.md +++ b/docs/mcp-server.md @@ -65,6 +65,29 @@ Join two CSVs using intelligent entity matching (LEFT JOIN semantics). Returns `task_id` and `session_url`. Call `everyrow_progress` to monitor. +### everyrow_classify + +Classify each row into one of the provided categories. + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `task` | string | Yes | Classification instructions. | +| `categories` | list[string] | Yes | Allowed categories (minimum 2). Each row is assigned exactly one. | +| `classification_field` | string | No | Output column name (default: `"classification"`). | +| `include_reasoning` | boolean | No | Include a reasoning column (default: false). | + +Returns `task_id` and `session_url`. Call `everyrow_progress` to monitor. + +### everyrow_forecast + +Forecast the probability of binary questions. + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `context` | string | No | Optional batch-level context for all questions. | + +Returns `task_id` and `session_url`. Call `everyrow_progress` to monitor. + ### everyrow_agent Run web research agents on each row. diff --git a/docs/reference/CLASSIFY.md b/docs/reference/CLASSIFY.md new file mode 100644 index 00000000..e59ad62a --- /dev/null +++ b/docs/reference/CLASSIFY.md @@ -0,0 +1,113 @@ +--- +title: classify +description: API reference for the EveryRow classify tool, which assigns each row of a dataset into one of the provided categories using web research. +--- + +# Classify + +`classify` takes a DataFrame and a list of allowed categories, then assigns each row to exactly one category using web research that scales to the difficulty of the classification. Supports binary (yes/no) and multi-category classification with optional reasoning output. + +## Examples + +### GICS sector classification + +```python +from pandas import DataFrame +from everyrow.ops import classify + +companies = DataFrame([ + {"company": "Apple"}, + {"company": "JPMorgan Chase"}, + {"company": "ExxonMobil"}, + {"company": "Pfizer"}, + {"company": "Procter & Gamble"}, + {"company": "Tesla"}, + {"company": "AT&T"}, + {"company": "Caterpillar"}, + {"company": "Duke Energy"}, + {"company": "Simon Property Group"}, +]) + +result = await classify( + task="Classify this company by its GICS industry sector", + categories=[ + "Energy", "Materials", "Industrials", "Consumer Discretionary", + "Consumer Staples", "Health Care", "Financials", + "Information Technology", "Communication Services", + "Utilities", "Real Estate", + ], + input=companies, +) +print(result.data[["company", "classification"]]) +``` + +Output: + +| company | classification | +|----------------------|------------------------| +| Apple | Information Technology | +| JPMorgan Chase | Financials | +| ExxonMobil | Energy | +| Pfizer | Health Care | +| Procter & Gamble | Consumer Staples | +| Tesla | Consumer Discretionary | +| AT&T | Communication Services | +| Caterpillar | Industrials | +| Duke Energy | Utilities | +| Simon Property Group | Real Estate | + +### Binary classification + +For yes/no questions, use two categories: + +```python +result = await classify( + task="Is this company founder-led?", + categories=["yes", "no"], + input=companies, +) +``` + +### Custom output column and reasoning + +```python +result = await classify( + task="Classify each company by its primary industry sector", + categories=["Technology", "Finance", "Healthcare", "Energy"], + input=companies, + classification_field="sector", + include_reasoning=True, +) +print(result.data[["company", "sector", "reasoning"]]) +``` + +## Parameters + +| Name | Type | Default | Description | +|------|------|---------|-------------| +| `task` | str | required | Natural-language instructions describing how to classify each row | +| `categories` | list[str] | required | Allowed category values (minimum 2). Each row is assigned exactly one. | +| `input` | DataFrame | required | Rows to classify | +| `classification_field` | str | `"classification"` | Name of the output column for the assigned category | +| `include_reasoning` | bool | `False` | If True, adds a `reasoning` column with the agent's justification | +| `session` | Session | Optional, auto-created if omitted | | + +## Output + +One column is added to each input row (name controlled by `classification_field`): + +| Column | Type | Description | +|--------|------|-------------| +| `classification` | str | One of the provided `categories` values | +| `reasoning` | str | Agent's justification (only if `include_reasoning=True`) | + +## Via MCP + +MCP tool: `everyrow_classify` + +| Parameter | Type | Description | +|-----------|------|-------------| +| `task` | string | Classification instructions | +| `categories` | list[string] | Allowed categories (minimum 2) | +| `classification_field` | string | Output column name (default: `"classification"`) | +| `include_reasoning` | boolean | Include reasoning column (default: false) | diff --git a/everyrow-mcp/README.md b/everyrow-mcp/README.md index adfbe727..0fe18aad 100644 --- a/everyrow-mcp/README.md +++ b/everyrow-mcp/README.md @@ -2,7 +2,7 @@ MCP (Model Context Protocol) server for [everyrow](https://everyrow.io): agent ops at spreadsheet scale. -This server exposes everyrow's 5 core operations as MCP tools, allowing LLM applications to screen, rank, dedupe, merge, and run agents on CSV files. +This server exposes everyrow's core operations as MCP tools, allowing LLM applications to classify, screen, rank, dedupe, merge, forecast, and run agents on CSV files. **All tools operate on local CSV files.** Provide absolute file paths as input, and transformed results are written to new CSV files at your specified output path. @@ -116,6 +116,31 @@ Parameters: Example: Match software products (left, enriched) to parent companies (right, lookup): Photoshop -> Adobe +### everyrow_classify + +Classify each row into one of the provided categories. + +``` +Parameters: +- task: Natural language classification instructions +- categories: Allowed categories (minimum 2) +- classification_field: (optional) Output column name (default: "classification") +- include_reasoning: (optional) Include reasoning column (default: false) +``` + +Example: Classify companies by GICS sector with categories ["Energy", "Financials", "Information Technology", ...] + +### everyrow_forecast + +Forecast the probability of binary questions. + +``` +Parameters: +- context: (optional) Batch-level context for all questions +``` + +Example: "Will the US Federal Reserve cut rates before July 2027?" + ### everyrow_agent Run web research agents on each row of a CSV. diff --git a/everyrow-mcp/manifest.json b/everyrow-mcp/manifest.json index f416028c..481a47d4 100644 --- a/everyrow-mcp/manifest.json +++ b/everyrow-mcp/manifest.json @@ -2,7 +2,7 @@ "manifest_version": "0.4", "name": "everyrow-mcp", "display_name": "Everyrow MCP Server", - "version": "0.4.0", + "version": "0.4.1", "description": "Give your AI a research team. Forecast, score, classify, or research every row of a dataset.", "long_description": "MCP server for everyrow: give your AI a research team. Each operation dispatches web research agents across a dataset to forecast, score, classify, deduplicate, merge, or research at scale.", "author": { @@ -53,6 +53,10 @@ "name": "everyrow_forecast", "description": "Forecast the probability of binary questions from a CSV file." }, + { + "name": "everyrow_classify", + "description": "Classify each row of a dataset into one of the provided categories." + }, { "name": "everyrow_single_agent", "description": "Run a single web research agent on a task, optionally with context data." @@ -97,7 +101,7 @@ "python": ">=3.12" } }, - "keywords": ["everyrow", "dataframe", "csv", "ai", "data-processing", "dedupe", "merge", "rank", "screen"], + "keywords": ["everyrow", "dataframe", "csv", "ai", "data-processing", "classify", "dedupe", "merge", "rank", "screen", "forecast"], "license": "MIT", "privacy_policies": ["https://futuresearch.ai/privacy/"] } diff --git a/everyrow-mcp/pyproject.toml b/everyrow-mcp/pyproject.toml index 60a87dd6..87a72b70 100644 --- a/everyrow-mcp/pyproject.toml +++ b/everyrow-mcp/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "everyrow-mcp" -version = "0.4.0" +version = "0.4.1" description = "MCP server for everyrow: a researcher for every row" readme = "README.md" requires-python = ">=3.12" diff --git a/everyrow-mcp/server.json b/everyrow-mcp/server.json index 7605a8ac..3989200b 100644 --- a/everyrow-mcp/server.json +++ b/everyrow-mcp/server.json @@ -9,13 +9,13 @@ "subfolder": "everyrow-mcp" }, "websiteUrl": "https://github.com/futuresearch/everyrow-sdk/tree/main/everyrow-mcp", - "version": "0.4.0", + "version": "0.4.1", "packages": [ { "registryType": "pypi", "registryBaseUrl": "https://pypi.org", "identifier": "everyrow-mcp", - "version": "0.4.0", + "version": "0.4.1", "runtimeHint": "uvx", "transport": { "type": "stdio" diff --git a/everyrow-mcp/src/everyrow_mcp/models.py b/everyrow-mcp/src/everyrow_mcp/models.py index 468f71a2..da3b6fcb 100644 --- a/everyrow-mcp/src/everyrow_mcp/models.py +++ b/everyrow-mcp/src/everyrow_mcp/models.py @@ -465,6 +465,31 @@ class ForecastInput(_SingleSourceInput): ) +class ClassifyInput(_SingleSourceInput): + """Input for the classify operation.""" + + model_config = ConfigDict(str_strip_whitespace=True, extra="forbid") + + task: str = Field( + ..., + description="Natural language instructions describing how to classify each row.", + min_length=1, + ) + categories: list[str] = Field( + ..., + description="Allowed category values (minimum 2). Each row will be assigned one of these.", + min_length=2, + ) + classification_field: str = Field( + default="classification", + description="Name of the output column that will contain the assigned category.", + ) + include_reasoning: bool = Field( + default=False, + description="If true, adds a 'reasoning' column with the agent's justification.", + ) + + class UploadDataInput(BaseModel): """Input for the upload_data tool.""" diff --git a/everyrow-mcp/src/everyrow_mcp/tools.py b/everyrow-mcp/src/everyrow_mcp/tools.py index c9572e07..3100bc38 100644 --- a/everyrow-mcp/src/everyrow_mcp/tools.py +++ b/everyrow-mcp/src/everyrow_mcp/tools.py @@ -16,6 +16,7 @@ from everyrow.generated.models.public_task_type import PublicTaskType from everyrow.ops import ( agent_map_async, + classify_async, create_table_artifact, dedupe_async, forecast_async, @@ -36,6 +37,7 @@ from everyrow_mcp.models import ( AgentInput, CancelInput, + ClassifyInput, DedupeInput, ForecastInput, HttpResultsInput, @@ -697,6 +699,86 @@ async def everyrow_forecast( ) +@mcp.tool( + name="everyrow_classify", + structured_output=False, + annotations=ToolAnnotations( + title="Classify Rows", + readOnlyHint=False, + destructiveHint=False, + idempotentHint=False, + openWorldHint=True, + ), +) +async def everyrow_classify( + params: ClassifyInput, ctx: EveryRowContext +) -> list[TextContent]: + """Classify each row of a dataset into one of the provided categories. + + Uses web research that scales to the difficulty of the classification. + Each row is assigned exactly one of the provided categories. + + Examples: + - "Classify each company by its primary industry sector" with categories ["Technology", "Finance", "Healthcare", "Energy"] + - "Is this company founder-led?" with categories ["yes", "no"] + - "Classify by Koppen climate zone" with categories ["tropical", "arid", "temperate", "continental", "polar"] + + Output columns added: the ``classification_field`` column (default: ``classification``) + containing the assigned category. Optionally a ``reasoning`` column if ``include_reasoning`` is true. + + This function submits the task and returns immediately with a task_id and session_url. + After receiving a result from this tool, share the session_url with the user. + Then immediately call everyrow_progress(task_id) to monitor. + Once the task is completed, call everyrow_results to save the output. + """ + logger.info( + "everyrow_classify: task=%.80s categories=%s rows=%s", + params.task, + params.categories, + len(params.data) if params.data else "artifact", + ) + log_client_info(ctx, "everyrow_classify") + client = _get_client(ctx) + + _clear_task_state() + input_data = params._aid_or_dataframe + + async with create_session( + client=client, session_id=params.session_id, name=params.session_name + ) as session: + session_url = session.get_url() + session_id_str = str(session.session_id) + cohort_task = await classify_async( + task=params.task, + categories=params.categories, + session=session, + input=input_data, + classification_field=params.classification_field, + include_reasoning=params.include_reasoning, + ) + task_id = str(cohort_task.task_id) + total = len(input_data) if isinstance(input_data, pd.DataFrame) else 0 + write_initial_task_state( + task_id, + task_type=PublicTaskType.CLASSIFY, + session_url=session_url, + total=total, + input_source=params._input_data_mode.value, + ) + + return await create_tool_response( + task_id=task_id, + session_url=session_url, + label=f"Submitted: {total} rows for classification into {len(params.categories)} categories." + if total + else f"Submitted: artifact for classification into {len(params.categories)} categories.", + token=client.token, + total=total, + mcp_server_url=ctx.request_context.lifespan_context.mcp_server_url, + session_id=session_id_str, + ) + + @mcp.tool( name="everyrow_upload_data", structured_output=False, @@ -714,7 +796,7 @@ async def everyrow_upload_data( """Upload data from a URL or local file. Returns an artifact_id for use in processing tools. Use this tool to ingest data before calling everyrow_agent, everyrow_screen, - everyrow_rank, everyrow_dedupe, everyrow_merge, or everyrow_forecast. + everyrow_rank, everyrow_dedupe, everyrow_merge, everyrow_classify, or everyrow_forecast. Supported sources: - HTTP(S) URLs (including Google Sheets — auto-converted to CSV export) diff --git a/everyrow-mcp/tests/test_http_transport.py b/everyrow-mcp/tests/test_http_transport.py index c87516c2..7f194d37 100644 --- a/everyrow-mcp/tests/test_http_transport.py +++ b/everyrow-mcp/tests/test_http_transport.py @@ -207,13 +207,14 @@ async def test_health_endpoint(self, http_client: httpx.AsyncClient): assert r.json() == {"status": "ok"} async def test_list_tools(self, mcp_server: str): - """session.list_tools() returns all 8 registered tools.""" + """session.list_tools() returns all registered tools.""" async with open_mcp_session(mcp_server) as session: resp = await session.list_tools() tool_names = sorted(t.name for t in resp.tools) expected = sorted( [ "everyrow_agent", + "everyrow_classify", "everyrow_single_agent", "everyrow_rank", "everyrow_screen", diff --git a/everyrow-mcp/tests/test_mcp_e2e.py b/everyrow-mcp/tests/test_mcp_e2e.py index 7eaeb2cc..a6d32b33 100644 --- a/everyrow-mcp/tests/test_mcp_e2e.py +++ b/everyrow-mcp/tests/test_mcp_e2e.py @@ -175,6 +175,7 @@ async def test_list_tools(self, _http_state): "everyrow_agent", "everyrow_balance", "everyrow_cancel", + "everyrow_classify", "everyrow_dedupe", "everyrow_forecast", "everyrow_list_sessions", diff --git a/gemini-extension.json b/gemini-extension.json index 6c00536e..a1803c4d 100644 --- a/gemini-extension.json +++ b/gemini-extension.json @@ -1,7 +1,7 @@ { "name": "everyrow", "description": "Gemini extension for the everyrow SDK - AI-powered data processing utilities for transforming, deduping, merging, ranking, and screening dataframes", - "version": "0.4.0", + "version": "0.4.1", "mcpServers": { "everyrow": { "command": "uvx", diff --git a/pyproject.toml b/pyproject.toml index 3e9efb51..43444d0b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ everyrow = { workspace = true } [project] name = "everyrow" -version = "0.4.0" +version = "0.4.1" description = "A researcher for every row. Forecast, score, classify, or research entire datasets." readme = "README.md" requires-python = ">=3.12" diff --git a/skills/everyrow-sdk/SKILL.md b/skills/everyrow-sdk/SKILL.md index f5fd7b49..ab6f6873 100644 --- a/skills/everyrow-sdk/SKILL.md +++ b/skills/everyrow-sdk/SKILL.md @@ -12,10 +12,12 @@ everyrow gives Claude a research team for your data. Use this skill when writing > - GitHub: [github.com/futuresearch/everyrow-sdk](https://github.com/futuresearch/everyrow-sdk) **Operations:** +- Classify rows into predefined categories - Rank/score rows based on qualitative criteria - Deduplicate data using semantic understanding - Merge tables using AI-powered matching - Screen/filter rows based on research-intensive criteria +- Forecast probabilities for binary questions - Run AI agents over dataframe rows ## Installation @@ -28,7 +30,7 @@ pip install everyrow ### MCP Server (for Claude Code, Claude Desktop, Cursor, etc.) -If an MCP server is available (`everyrow_screen`, `everyrow_rank`, etc. tools), you can use it directly without writing Python code. The MCP server operates on local CSV files. +If an MCP server is available (`everyrow_classify`, `everyrow_screen`, `everyrow_rank`, etc. tools), you can use it directly without writing Python code. The MCP server operates on local CSV files. To install the MCP server, add to your MCP config: @@ -84,6 +86,16 @@ export EVERYROW_API_KEY= If you have the everyrow MCP server configured, these tools operate directly on CSV files. +### everyrow_classify +Classify each row into one of the provided categories. +``` +Parameters: +- task: Natural language classification instructions +- categories: Allowed categories (minimum 2) +- classification_field: (optional) Output column name (default: "classification") +- include_reasoning: (optional) Include reasoning column (default: false) +``` + ### everyrow_screen Filter CSV rows based on criteria that require judgment. ``` @@ -129,6 +141,13 @@ Parameters: - use_web_search: (optional) "auto" (default), "yes", or "no" ``` +### everyrow_forecast +Forecast the probability of binary questions. +``` +Parameters: +- context: (optional) Batch-level context for all questions +``` + ### everyrow_agent Run web research agents on each row of a CSV. ``` @@ -244,6 +263,48 @@ print(result.data.head()) Parameters: `task`, `left_table`, `right_table`, `merge_on_left`, `merge_on_right`, `relationship_type`, `use_web_search`, `session` +### classify - Categorize rows + +Assign each row to one of the provided categories: + +```python +from everyrow.ops import classify + +result = await classify( + task="Classify this company by its GICS industry sector", + categories=["Energy", "Materials", "Industrials", "Consumer Discretionary", + "Consumer Staples", "Health Care", "Financials", + "Information Technology", "Communication Services", + "Utilities", "Real Estate"], + input=companies, +) +print(result.data[["company", "classification"]]) +``` + +**Binary classification** - for yes/no questions, use two categories: + +```python +result = await classify( + task="Is this company founder-led?", + categories=["yes", "no"], + input=companies, +) +``` + +**With reasoning** - understand why each row was classified: + +```python +result = await classify( + task="Classify each company by its primary industry sector", + categories=["Technology", "Finance", "Healthcare", "Energy"], + input=companies, + classification_field="sector", + include_reasoning=True, +) +``` + +Parameters: `task`, `categories`, `input`, `classification_field` (default: "classification"), `include_reasoning` (default: False), `session` + ### screen - Evaluate and filter rows Filter rows based on criteria that require research: @@ -284,6 +345,24 @@ result = await screen( Parameters: `task`, `input`, `response_model`, `session` +### forecast - Predict probabilities + +Produce calibrated probability estimates for binary questions: + +```python +from everyrow.ops import forecast + +result = await forecast( + input=DataFrame([ + {"question": "Will the US Federal Reserve cut rates by at least 25bp before July 1, 2027?", + "resolution_criteria": "Resolves YES if the Fed announces at least one rate cut of 25bp or more."}, + ]), +) +print(result.data[["question", "probability", "rationale"]]) +``` + +Parameters: `input`, `context`, `session` + ### single_agent - Single input task Run an AI agent on a single input: @@ -412,7 +491,7 @@ df = await fetch_task_data("12345678-1234-1234-1234-123456789abc") ## Everyrow Long-Running Operations (MCP) -Everyrow operations (screen, rank, dedupe, merge, agent) take 1-10+ minutes. +Everyrow operations (classify, screen, rank, dedupe, merge, forecast, agent) take 1-10+ minutes. All MCP tools use an async pattern: 1. Call the operation tool (e.g., `everyrow_agent(...)`) to get task_id and session_url diff --git a/src/everyrow/ops.py b/src/everyrow/ops.py index 0f3ba2e0..e9b9283f 100644 --- a/src/everyrow/ops.py +++ b/src/everyrow/ops.py @@ -10,6 +10,7 @@ from everyrow.generated.api.artifacts import create_artifact_artifacts_post from everyrow.generated.api.operations import ( agent_map_operations_agent_map_post, + classify_operations_classify_post, dedupe_operations_dedupe_post, forecast_operations_forecast_post, merge_operations_merge_post, @@ -21,6 +22,8 @@ AgentMapOperation, AgentMapOperationInputType1Item, AgentMapOperationResponseSchemaType0, + ClassifyOperation, + ClassifyOperationInputType1Item, CreateArtifactRequest, CreateArtifactRequestDataType0Item, CreateArtifactRequestDataType1, @@ -853,3 +856,100 @@ async def forecast_async( ) cohort_task.set_submitted(response.task_id, response.session_id, session.client) return cohort_task + + +# --- Classify --- + + +async def classify( + task: str, + categories: list[str], + input: DataFrame | UUID | TableResult, + classification_field: str = "classification", + include_reasoning: bool = False, + session: Session | None = None, +) -> TableResult: + """Classify each row of a table into one of the provided categories. + + Uses a two-phase approach: Phase 1 attempts fast batch classification using + web research, and Phase 2 follows up with deeper research on ambiguous rows. + Each row is assigned exactly one of the provided categories. + + Args: + task: Natural-language instructions describing how to classify each row. + categories: Allowed category values (minimum 2). Each row will be + assigned exactly one of these. + input: The input table. Each row is classified independently. + classification_field: Name of the output column that will contain the + assigned category. Default: ``"classification"``. + include_reasoning: If True, adds a ``reasoning`` column with the + agent's justification for the classification. + session: Optional session. If not provided, one will be created + automatically. + + Returns: + TableResult with a ``classification_field`` column (and optionally + ``reasoning``) added to each input row. + """ + if session is None: + async with create_session() as internal_session: + cohort_task = await classify_async( + task=task, + categories=categories, + session=internal_session, + input=input, + classification_field=classification_field, + include_reasoning=include_reasoning, + ) + result = await cohort_task.await_result(on_progress=print_progress) + if isinstance(result, TableResult): + return result + raise EveryrowError("Classify task did not return a table result") + cohort_task = await classify_async( + task=task, + categories=categories, + session=session, + input=input, + classification_field=classification_field, + include_reasoning=include_reasoning, + ) + result = await cohort_task.await_result(on_progress=print_progress) + if isinstance(result, TableResult): + return result + raise EveryrowError("Classify task did not return a table result") + + +async def classify_async( + task: str, + categories: list[str], + session: Session, + input: DataFrame | UUID | TableResult, + classification_field: str = "classification", + include_reasoning: bool = False, +) -> EveryrowTask[BaseModel]: + """Submit a classify task asynchronously. + + Returns: + EveryrowTask that resolves to a TableResult with a classification column. + """ + input_data = _prepare_table_input(input, ClassifyOperationInputType1Item) + + body = ClassifyOperation( + input_=input_data, # type: ignore + task=task, + categories=categories, + session_id=session.session_id, + classification_field=classification_field, + include_reasoning=include_reasoning, + ) + + response = await classify_operations_classify_post.asyncio( + client=session.client, body=body + ) + response = handle_response(response) + + cohort_task: EveryrowTask[BaseModel] = EveryrowTask( + response_model=BaseModel, is_map=True, is_expand=False + ) + cohort_task.set_submitted(response.task_id, response.session_id, session.client) + return cohort_task diff --git a/tests/integration/test_classify.py b/tests/integration/test_classify.py new file mode 100644 index 00000000..10fc6ecf --- /dev/null +++ b/tests/integration/test_classify.py @@ -0,0 +1,68 @@ +"""Integration tests for classify operation.""" + +import pandas as pd +import pytest + +from everyrow.ops import classify +from everyrow.result import TableResult + +pytestmark = [pytest.mark.integration, pytest.mark.asyncio] + + +async def test_classify_assigns_categories(): + """Test that classify returns a TableResult with correct categories.""" + input_df = pd.DataFrame( + [ + { + "company": "Apple Inc.", + "description": "Consumer electronics and software", + }, + { + "company": "JPMorgan Chase", + "description": "Investment banking and financial services", + }, + { + "company": "ExxonMobil", + "description": "Oil and gas exploration and production", + }, + ] + ) + categories = ["Technology", "Finance", "Energy", "Healthcare"] + + result = await classify( + task="Classify each company by its primary industry sector", + categories=categories, + input=input_df, + ) + + assert isinstance(result, TableResult) + assert result.artifact_id is not None + assert "classification" in result.data.columns + assert len(result.data) == 3 + + for _, row in result.data.iterrows(): + assert row["classification"] in categories, ( + f"Invalid classification '{row['classification']}' for {row.get('company')}" + ) + + +async def test_classify_custom_field_and_reasoning(): + """Test custom classification_field and include_reasoning.""" + input_df = pd.DataFrame( + [ + {"company": "Tesla", "description": "Electric vehicles and clean energy"}, + ] + ) + categories = ["Technology", "Automotive", "Energy"] + + result = await classify( + task="Classify each company by its primary industry sector", + categories=categories, + input=input_df, + classification_field="sector", + include_reasoning=True, + ) + + assert isinstance(result, TableResult) + assert "sector" in result.data.columns + assert result.data["sector"].iloc[0] in categories