From c76ef98058362a5cabe2d3234c697311ab92338f Mon Sep 17 00:00:00 2001 From: Dan Schwarz Date: Sun, 22 Feb 2026 08:44:12 -0800 Subject: [PATCH 1/3] Add Claude Code tabs to all guides and case studies Co-Authored-By: Claude Opus 4.6 --- docs-site/src/app/[...slug]/page.tsx | 6 +- .../src/app/case-studies/[slug]/page.tsx | 43 +++- docs-site/src/app/globals.css | 29 +++ docs-site/src/components/GuideTabs.tsx | 62 +++++ docs-site/src/components/MDXContent.tsx | 3 + docs-site/src/utils/docs.ts | 8 +- docs-site/src/utils/notebooks.ts | 70 +++++- docs/active-learning-llm-oracle.md | 118 --------- docs/active-learning-llm-oracle.mdx | 175 ++++++++++++++ docs/add-column-web-lookup.md | 85 ------- docs/add-column-web-lookup.mdx | 169 +++++++++++++ docs/case-studies.md | 27 +-- .../dedupe-crm-company-records/content.mdx | 123 ++++++++++ .../content.mdx | 100 ++++++++ .../llm-powered-merging-at-scale/content.mdx | 125 ++++++++++ .../content.mdx | 131 ++++++++++ .../content.mdx | 141 +++++++++++ .../content.mdx | 131 ++++++++++ .../content.mdx | 121 ++++++++++ .../content.mdx | 126 ++++++++++ .../content.mdx | 139 +++++++++++ .../content.mdx | 125 ++++++++++ .../content.mdx | 120 ++++++++++ .../content.mdx | 124 ++++++++++ .../content.mdx | 132 +++++++++++ .../content.mdx | 156 ++++++++++++ .../content.mdx | 148 ++++++++++++ .../content.mdx | 142 +++++++++++ docs/classify-dataframe-rows-llm.md | 135 ----------- docs/classify-dataframe-rows-llm.mdx | 193 +++++++++++++++ docs/deduplicate-training-data-ml.md | 101 -------- docs/deduplicate-training-data-ml.mdx | 186 +++++++++++++++ docs/filter-dataframe-with-llm.md | 107 --------- docs/filter-dataframe-with-llm.mdx | 193 +++++++++++++++ docs/fuzzy-join-without-keys.md | 74 ------ docs/fuzzy-join-without-keys.mdx | 150 ++++++++++++ docs/guides.md | 14 +- docs/rank-by-external-metric.md | 103 -------- docs/rank-by-external-metric.mdx | 223 ++++++++++++++++++ docs/resolve-entities-python.md | 63 ----- docs/resolve-entities-python.mdx | 144 +++++++++++ docs/scale-deduplication-20k-rows.md | 115 --------- docs/scale-deduplication-20k-rows.mdx | 149 ++++++++++++ 43 files changed, 3888 insertions(+), 941 deletions(-) create mode 100644 docs-site/src/components/GuideTabs.tsx delete mode 100644 docs/active-learning-llm-oracle.md create mode 100644 docs/active-learning-llm-oracle.mdx delete mode 100644 docs/add-column-web-lookup.md create mode 100644 docs/add-column-web-lookup.mdx create mode 100644 docs/case_studies/dedupe-crm-company-records/content.mdx create mode 100644 docs/case_studies/deep-research-bench-pareto-analysis/content.mdx create mode 100644 docs/case_studies/llm-powered-merging-at-scale/content.mdx create mode 100644 docs/case_studies/llm-powered-screening-at-scale/content.mdx create mode 100644 docs/case_studies/match-clinical-trials-to-papers/content.mdx create mode 100644 docs/case_studies/match-software-vendors-to-requirements/content.mdx create mode 100644 docs/case_studies/merge-contacts-with-company-data/content.mdx create mode 100644 docs/case_studies/merge-overlapping-contact-lists/content.mdx create mode 100644 docs/case_studies/multi-stage-lead-qualification/content.mdx create mode 100644 docs/case_studies/research-and-rank-permit-times/content.mdx create mode 100644 docs/case_studies/score-leads-from-fragmented-data/content.mdx create mode 100644 docs/case_studies/score-leads-without-crm-history/content.mdx create mode 100644 docs/case_studies/screen-job-postings-by-criteria/content.mdx create mode 100644 docs/case_studies/screen-stocks-by-investment-thesis/content.mdx create mode 100644 docs/case_studies/screen-stocks-by-margin-sensitivity/content.mdx create mode 100644 docs/case_studies/understanding-costs-and-speed-for-merge/content.mdx delete mode 100644 docs/classify-dataframe-rows-llm.md create mode 100644 docs/classify-dataframe-rows-llm.mdx delete mode 100644 docs/deduplicate-training-data-ml.md create mode 100644 docs/deduplicate-training-data-ml.mdx delete mode 100644 docs/filter-dataframe-with-llm.md create mode 100644 docs/filter-dataframe-with-llm.mdx delete mode 100644 docs/fuzzy-join-without-keys.md create mode 100644 docs/fuzzy-join-without-keys.mdx delete mode 100644 docs/rank-by-external-metric.md create mode 100644 docs/rank-by-external-metric.mdx delete mode 100644 docs/resolve-entities-python.md create mode 100644 docs/resolve-entities-python.mdx delete mode 100644 docs/scale-deduplication-20k-rows.md create mode 100644 docs/scale-deduplication-20k-rows.mdx diff --git a/docs-site/src/app/[...slug]/page.tsx b/docs-site/src/app/[...slug]/page.tsx index 5ddc2940..8c8c0767 100644 --- a/docs-site/src/app/[...slug]/page.tsx +++ b/docs-site/src/app/[...slug]/page.tsx @@ -26,14 +26,16 @@ export async function generateMetadata({ params }: PageProps) { const canonicalUrl = `https://everyrow.io/docs/${slugPath}`; + const pageTitle = doc.metadataTitle || doc.title; + return { - title: doc.title, + title: pageTitle, description: doc.description, alternates: { canonical: canonicalUrl, }, openGraph: { - title: doc.title, + title: pageTitle, description: doc.description, url: canonicalUrl, images: [{ url: "https://everyrow.io/everyrow-og.png" }], diff --git a/docs-site/src/app/case-studies/[slug]/page.tsx b/docs-site/src/app/case-studies/[slug]/page.tsx index 2bdf422a..4b9811cc 100644 --- a/docs-site/src/app/case-studies/[slug]/page.tsx +++ b/docs-site/src/app/case-studies/[slug]/page.tsx @@ -1,8 +1,13 @@ import { notFound } from "next/navigation"; import { DocsLayout } from "@/components/DocsLayout"; import { NotebookActions } from "@/components/NotebookActions"; +import { MDXContent } from "@/components/MDXContent"; import { getNavigation } from "@/utils/docs"; -import { getNotebookBySlug, getNotebookSlugs } from "@/utils/notebooks"; +import { + getCaseStudyMdx, + getNotebookBySlug, + getNotebookSlugs, +} from "@/utils/notebooks"; interface PageProps { params: Promise<{ slug: string }>; @@ -15,8 +20,26 @@ export async function generateStaticParams() { export async function generateMetadata({ params }: PageProps) { const { slug } = await params; - const notebook = getNotebookBySlug(slug); + const mdx = getCaseStudyMdx(slug); + if (mdx) { + const canonicalUrl = `https://everyrow.io/docs/case-studies/${slug}`; + const pageTitle = mdx.metadataTitle || mdx.title; + const pageDescription = mdx.description || `Case study: ${mdx.title}`; + return { + title: pageTitle, + description: pageDescription, + alternates: { canonical: canonicalUrl }, + openGraph: { + title: pageTitle, + description: pageDescription, + url: canonicalUrl, + images: [{ url: "https://everyrow.io/everyrow-og.png" }], + }, + }; + } + + const notebook = getNotebookBySlug(slug); if (!notebook) { return { title: "Not Found" }; } @@ -27,9 +50,7 @@ export async function generateMetadata({ params }: PageProps) { return { title: notebook.title, description, - alternates: { - canonical: canonicalUrl, - }, + alternates: { canonical: canonicalUrl }, openGraph: { title: notebook.title, description, @@ -41,8 +62,18 @@ export async function generateMetadata({ params }: PageProps) { export default async function NotebookPage({ params }: PageProps) { const { slug } = await params; - const notebook = getNotebookBySlug(slug); + const mdx = getCaseStudyMdx(slug); + if (mdx) { + const navigation = getNavigation(); + return ( + + + + ); + } + + const notebook = getNotebookBySlug(slug); if (!notebook) { notFound(); } diff --git a/docs-site/src/app/globals.css b/docs-site/src/app/globals.css index ea80cb5c..e639da2a 100644 --- a/docs-site/src/app/globals.css +++ b/docs-site/src/app/globals.css @@ -459,6 +459,35 @@ a.docs-sidebar-section-title:hover { margin-bottom: 0.25rem; } +/* Guide tabs (Claude Code / Python toggle) */ +.guide-tabs { + margin-top: 1.5rem; +} + +.guide-tab-selector { + display: flex; + gap: 0.5rem; + margin-bottom: 1.5rem; +} + +/* No-JS fallback: show all guide tab contents */ +@supports (scripting: none) { + .guide-tab-selector { + display: none; + } + .guide-tabs .tab-content { + display: block; + padding-top: 1.5rem; + border-top: 1px solid var(--border); + margin-top: 1.5rem; + } + .guide-tabs .tab-content:first-child { + border-top: none; + margin-top: 0; + padding-top: 0; + } +} + /* Installation tabs */ .installation-tabs { margin-top: 1.5rem; diff --git a/docs-site/src/components/GuideTabs.tsx b/docs-site/src/components/GuideTabs.tsx new file mode 100644 index 00000000..3dc487ab --- /dev/null +++ b/docs-site/src/components/GuideTabs.tsx @@ -0,0 +1,62 @@ +"use client"; + +import { useState, createContext, useContext, ReactNode } from "react"; + +type GuideTab = "claude-code" | "python"; + +const GuideTabContext = createContext(null); + +const TABS: { id: GuideTab; label: string }[] = [ + { id: "claude-code", label: "Claude Code" }, + { id: "python", label: "Python" }, +]; + +interface GuideTabsProps { + children: ReactNode; +} + +export function GuideTabs({ children }: GuideTabsProps) { + const [selected, setSelected] = useState("claude-code"); + + return ( + +
+
+ {TABS.map((tab) => ( + + ))} +
+
+ {children} +
+
+
+ ); +} + +interface GuideTabContentProps { + tab: GuideTab; + children: ReactNode; +} + +export function GuideTabContent({ tab, children }: GuideTabContentProps) { + const selected = useContext(GuideTabContext); + + // During SSR or no context, show all content (static fallback) + const isActive = selected === null || selected === tab; + + return ( +
+ {children} +
+ ); +} diff --git a/docs-site/src/components/MDXContent.tsx b/docs-site/src/components/MDXContent.tsx index 37c09cc4..40f1f6e5 100644 --- a/docs-site/src/components/MDXContent.tsx +++ b/docs-site/src/components/MDXContent.tsx @@ -1,6 +1,7 @@ import { MDXRemote } from "next-mdx-remote/rsc"; import { InstallationTabs, TabContent } from "./InstallationTabs"; import { ChainedOpsTabs, StepContent } from "./ChainedOpsTabs"; +import { GuideTabs, GuideTabContent } from "./GuideTabs"; import rehypeHighlight from "rehype-highlight"; import remarkGfm from "remark-gfm"; @@ -9,6 +10,8 @@ const components = { TabContent, ChainedOpsTabs, StepContent, + GuideTabs, + GuideTabContent, }; interface MDXContentProps { diff --git a/docs-site/src/utils/docs.ts b/docs-site/src/utils/docs.ts index 83c4f983..2c047f7f 100644 --- a/docs-site/src/utils/docs.ts +++ b/docs-site/src/utils/docs.ts @@ -9,6 +9,7 @@ const DOCS_DIR = path.join(process.cwd(), "..", "docs"); export interface DocMeta { slug: string; title: string; + metadataTitle?: string; description?: string; category: string; format: "md" | "mdx"; @@ -43,8 +44,8 @@ export function getAllDocs(): DocMeta[] { const fullPath = path.join(dir, entry.name); if (entry.isDirectory()) { - // Skip data directory - if (entry.name === "data") continue; + // Skip directories served by other routes or not documentation + if (["data", "case_studies", "claude-code-runs"].includes(entry.name)) continue; scanDir(fullPath, path.join(prefix, entry.name)); } else if (entry.name.endsWith(".md") || entry.name.endsWith(".mdx")) { const isMdx = entry.name.endsWith(".mdx"); @@ -56,6 +57,7 @@ export function getAllDocs(): DocMeta[] { docs.push({ slug, title: data.title || slugToTitle(path.basename(slug)), + metadataTitle: data.metadataTitle, description: data.description, category: getCategory(relativePath), format: isMdx ? "mdx" : "md", @@ -82,6 +84,7 @@ export function getDocBySlug(slug: string): Doc | null { return { slug: baseSlug, title: data.title || slugToTitle(path.basename(baseSlug)), + metadataTitle: data.metadataTitle, description: data.description, category: getCategory(baseSlug), format: ext === ".mdx" ? "mdx" : "md", @@ -152,6 +155,7 @@ export function getNavigation(): NavSection[] { "guides", "notebooks", "api", + "case-studies", ].includes(d.slug)) .map((d) => ({ slug: d.slug, title: d.title })), }, diff --git a/docs-site/src/utils/notebooks.ts b/docs-site/src/utils/notebooks.ts index 04bcac56..93abf0a2 100644 --- a/docs-site/src/utils/notebooks.ts +++ b/docs-site/src/utils/notebooks.ts @@ -1,5 +1,6 @@ import fs from "fs"; import path from "path"; +import matter from "gray-matter"; const NOTEBOOKS_DIR = path.join(process.cwd(), "src", "notebooks"); const SOURCE_NOTEBOOKS_DIR = path.join(process.cwd(), "..", "docs", "case_studies"); @@ -71,19 +72,43 @@ function slugToTitle(slug: string): string { } export function getAllNotebooks(): NotebookMeta[] { - if (!fs.existsSync(NOTEBOOKS_DIR)) { - return []; + const slugSet = new Set(); + const results: NotebookMeta[] = []; + + // Discover from HTML notebook files + if (fs.existsSync(NOTEBOOKS_DIR)) { + const files = fs.readdirSync(NOTEBOOKS_DIR); + for (const f of files) { + if (!f.endsWith(".html")) continue; + const slug = f.replace(/\.html$/, ""); + slugSet.add(slug); + } } - const files = fs.readdirSync(NOTEBOOKS_DIR); - return files - .filter((f) => f.endsWith(".html")) - .map((f) => { - const slug = f.replace(/\.html$/, ""); + // Discover from case study directories with content.mdx + if (fs.existsSync(SOURCE_NOTEBOOKS_DIR)) { + const dirs = fs.readdirSync(SOURCE_NOTEBOOKS_DIR, { withFileTypes: true }); + for (const dir of dirs) { + if (!dir.isDirectory()) continue; + const mdxPath = path.join(SOURCE_NOTEBOOKS_DIR, dir.name, "content.mdx"); + if (fs.existsSync(mdxPath)) { + slugSet.add(dir.name); + } + } + } + + for (const slug of slugSet) { + // Prefer MDX frontmatter for title/description + const mdx = getCaseStudyMdx(slug); + if (mdx) { + results.push({ slug, title: mdx.title, description: mdx.description }); + } else { const { title, description } = extractMetadataFromSource(slug); - return { slug, title, description }; - }) - .sort((a, b) => a.title.localeCompare(b.title)); + results.push({ slug, title, description }); + } + } + + return results.sort((a, b) => a.title.localeCompare(b.title)); } export function getNotebookBySlug(slug: string): Notebook | null { @@ -107,3 +132,28 @@ export function getNotebookBySlug(slug: string): Notebook | null { export function getNotebookSlugs(): string[] { return getAllNotebooks().map((n) => n.slug); } + +export interface CaseStudyMdx { + title: string; + metadataTitle?: string; + description: string; + content: string; +} + +export function getCaseStudyMdx(slug: string): CaseStudyMdx | null { + const mdxPath = path.join(SOURCE_NOTEBOOKS_DIR, slug, "content.mdx"); + + if (!fs.existsSync(mdxPath)) { + return null; + } + + const fileContent = fs.readFileSync(mdxPath, "utf-8"); + const { data, content } = matter(fileContent); + + return { + title: data.title || slugToTitle(slug), + metadataTitle: data.metadataTitle, + description: data.description || "", + content, + }; +} diff --git a/docs/active-learning-llm-oracle.md b/docs/active-learning-llm-oracle.md deleted file mode 100644 index efddff76..00000000 --- a/docs/active-learning-llm-oracle.md +++ /dev/null @@ -1,118 +0,0 @@ ---- -title: How to replace human data annotators with LLMs in active learning -description: Use everyrow's agent_map as an LLM oracle in an active learning loop. 200 labels in under 5 minutes for $0.26, matching human annotation accuracy within 0.1% across 10 controlled repeats on DBpedia-14. ---- - -# How to Replace Human Data Annotators with LLMs in Active Learning - -![Active Learning: Ground Truth vs LLM Oracle](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs-site/public/images/learning_curve_accuracy.png) - -Human data labeling is slow and expensive. We replaced the human annotator with an LLM oracle in an active learning loop and achieved identical classifier performance — 200 labels in under 5 minutes for $0.26. - -## Install - -```bash -pip install everyrow -export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key -``` - -## Experiment design - -[Active learning](https://en.wikipedia.org/wiki/Active_learning_(machine_learning)) reduces labeling costs by letting the model choose which examples to label next, focusing on the ones it is most uncertain about. But you still need an oracle to provide those labels, traditionally a human annotator. - -We used a TF-IDF + LightGBM classifier with entropy based uncertainty sampling. Each iteration selects the 20 most uncertain examples, sends them to the LLM for annotation, and retrains. 10 iterations, 200 labels total. - -We ran 10 independent repeats with different seeds, each time running both a ground truth oracle (human labels) and the LLM oracle with the same seed, a direct, controlled comparison. - -```python -from typing import Literal - -import pandas as pd -from pydantic import BaseModel, Field - -from everyrow import create_session -from everyrow.ops import agent_map -from everyrow.task import EffortLevel - - -LABEL_NAMES = { - 0: "Company", 1: "Educational Institution", 2: "Artist", - 3: "Athlete", 4: "Office Holder", 5: "Mean Of Transportation", - 6: "Building", 7: "Natural Place", 8: "Village", - 9: "Animal", 10: "Plant", 11: "Album", 12: "Film", 13: "Written Work", -} -CATEGORY_TO_ID = {v: k for k, v in LABEL_NAMES.items()} - - -class DBpediaClassification(BaseModel): - category: Literal[ - "Company", "Educational Institution", "Artist", - "Athlete", "Office Holder", "Mean Of Transportation", - "Building", "Natural Place", "Village", - "Animal", "Plant", "Album", "Film", "Written Work", - ] = Field(description="The DBpedia ontology category") - - -async def query_llm_oracle(texts_df: pd.DataFrame) -> list[int]: - async with create_session(name="Active Learning Oracle") as session: - result = await agent_map( - session=session, - task="Classify this text into exactly one DBpedia ontology category.", - input=texts_df[["text"]], - response_model=DBpediaClassification, - effort_level=EffortLevel.LOW, - ) - return [CATEGORY_TO_ID.get(result.data["category"].iloc[i], -1) - for i in range(len(texts_df))] -``` - -## Results - -| Metric | Value | -| -------------------------- | ------------------ | -| Labels per run | 200 | -| Cost per run | $0.26 | -| Cost per labeled item | $0.0013 | -| Final accuracy (LLM) | 80.7% ± 0.8% | -| Final accuracy (human) | 80.6% ± 1.0% | -| LLM–human label agreement | 96.1% ± 1.6% | -| Repeats | 10 | -| Dataset | DBpedia-14 (14-class text classification) | - -The learning curves overlap almost perfectly. The shaded bands show ±1 standard deviation across 10 repeats — the LLM oracle tracks the ground truth oracle at every iteration. - -Final test accuracies averaged over 10 repeats: - -| Data Labeling Method | Final Accuracy (mean ± std) | -| -------------------------------- | --------------------------- | -| Human annotation (ground truth) | 80.6% ± 1.0% | -| LLM annotation (everyrow) | 80.7% ± 0.8% | - -The LLM oracle is within noise of the ground truth baseline. Automated data labeling produces classifiers just as good as human labeled data. - -The LLM agreed with ground truth labels 96.1% ± 1.6% of the time. Roughly 1 in 25 labels disagrees with the human annotation, but that does not hurt the downstream classifier. - -| Metric | Value | -| ------------------------- | ------- | -| Cost per run (200 labels) | $0.26 | -| Cost per labeled item | $0.0013 | -| Total (10 repeats) | $2.58 | - -200 labels in under 5 minutes for $0.26, fully automated. - -## Limitations - -We tested on one dataset with well separated categories. More ambiguous labeling tasks may see a gap between human and LLM annotation quality. We used a simple classifier (TF-IDF + LightGBM); neural models that overfit individual examples may be less noise tolerant. - -The low cost in this experiment comes from using `EffortLevel.LOW`, which selects a small, fast model and doesn't use web research to improve the label quality. For simple classification tasks with well separated categories, this is sufficient. - -For more ambiguous labeling tasks, you can use `EffortLevel.MEDIUM` or `EffortLevel.HIGH` to get higher quality labels from smarter models using the web. The cost scales accordingly, but even at higher effort levels, LLM labeling remains cheaper and faster than human annotation. - -## Reproduce this experiment - -The full pipeline is available as a [companion notebook on Kaggle](https://www.kaggle.com/code/rafaelpoyiadzi/active-learning-with-an-llm-oracle). The experiment uses the [DBpedia-14 dataset](https://huggingface.co/datasets/fancyzhx/dbpedia_14), a 14-class text classification benchmark. See also the [full blog post](https://futuresearch.ai/active-learning-llm-oracle) for additional discussion. - -## Related - -- [How to Classify DataFrame Rows with an LLM](/classify-dataframe-rows-llm) — label data at scale with `agent_map` -- [How to Deduplicate Training Data in Python](/deduplicate-training-data-ml) — clean ML datasets before training diff --git a/docs/active-learning-llm-oracle.mdx b/docs/active-learning-llm-oracle.mdx new file mode 100644 index 00000000..81efe815 --- /dev/null +++ b/docs/active-learning-llm-oracle.mdx @@ -0,0 +1,175 @@ +--- +title: LLM-Powered Data Labeling +metadataTitle: Using Claude Code to Label Data Instead of Hiring Annotators +description: High-quality labels for classification, tagging, and annotation tasks. LLM-powered labeling that matches human annotator accuracy at a fraction of the time and cost. +--- + +# LLM-Powered Data Labeling + + + + +Claude Code's interactive classification works for labeling a dozen items in conversation. When an active learning loop requests hundreds of labels programmatically, with consistent schema and structured output every time, you need a labeling service that can run on demand. + +Here, we get Claude Code to label 200 text samples from the DBpedia-14 dataset into 14 ontology categories, achieving 98.5% accuracy. + +| Metric | Value | +| --------------------- | ------------ | +| Labels produced | 200 | +| Strict accuracy | 96.0% | +| Normalized accuracy | 98.5% | +| Time | 4.7 minutes | +| Cost | $3.35 | + +First, install the [everyrow](https://github.com/futuresearch/everyrow-sdk) plugin for Claude Code: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + +Set your API key before launching Claude Code: + +```bash +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +Prepare a CSV with 200 text samples from the [DBpedia-14 dataset](https://huggingface.co/datasets/fancyzhx/dbpedia_14). Tell Claude: + +``` +Classify each text in dbpedia_samples.csv into exactly one DBpedia ontology +category: Company, Educational Institution, Artist, Athlete, Office Holder, +Mean Of Transportation, Building, Natural Place, Village, Animal, Plant, +Album, Film, or Written Work. +``` + +Claude calls everyrow's `agent` MCP tool with the classification schema: + +``` +Tool: everyrow_agent +├─ task: "Classify this text into exactly one DBpedia ontology category." +├─ input_csv: "/Users/you/dbpedia_samples.csv" +└─ response_schema: {"category": "enum of 14 DBpedia categories"} + +→ Submitted: 200 rows for processing. + Session: https://everyrow.io/sessions/5f5a052a-c240-43d8-91a4-ad7ad274f6e1 + Task ID: 5f5a... + +Tool: everyrow_progress +├─ task_id: "5f5a..." +→ Running: 0/200 complete, 200 running (15s elapsed) + +... + +Tool: everyrow_progress +→ Completed: 200/200 (0 failed) in 279s. + +Tool: everyrow_results +├─ task_id: "5f5a..." +├─ output_path: "/Users/you/dbpedia_classified.csv" +→ Saved 200 rows to /Users/you/dbpedia_classified.csv +``` + +200 labels in 4.7 minutes. [View the session](https://everyrow.io/sessions/5f5a052a-c240-43d8-91a4-ad7ad274f6e1). + +| Category | Count | +|----------|-------| +| Building | 22 | +| Artist | 20 | +| Mean Of Transportation | 18 | +| Animal | 15 | +| Educational Institution | 15 | +| Company | 13 | +| Album | 13 | +| Office Holder | 12 | +| Film | 12 | +| Natural Place | 11 | + +Of the 8 "strict" mismatches against ground truth, 5 were formatting variants (e.g., "WrittenWork" vs "Written Work"), not true errors. Only 3 were genuinely incorrect classifications: a Village labeled as Settlement, an Educational Institution labeled as University, and an Artist labeled as Writer. These are semantic near-misses, not random errors. + + + + +Human data labeling is slow and expensive. The everyrow SDK can replace the human annotator in an active learning loop, producing structured labels at scale. 200 labels in under 5 minutes for $0.26. + +![Active Learning: Ground Truth vs LLM Oracle](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs-site/public/images/learning_curve_accuracy.png) + +| Metric | Value | +| -------------------------- | ------------------ | +| Labels per run | 200 | +| Cost per run | $0.26 | +| Cost per labeled item | $0.0013 | +| Final accuracy (LLM) | 80.7% ± 0.8% | +| Final accuracy (human) | 80.6% ± 1.0% | +| LLM-human label agreement | 96.1% ± 1.6% | +| Repeats | 10 | +| Dataset | DBpedia-14 (14-class text classification) | + +```bash +pip install everyrow +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +We used a TF-IDF + LightGBM classifier with entropy-based uncertainty sampling. Each iteration selects the 20 most uncertain examples, sends them to the LLM for annotation, and retrains. 10 iterations, 200 labels total. We ran 10 independent repeats with different seeds, comparing the LLM oracle against ground truth labels. + +```python +from typing import Literal + +import pandas as pd +from pydantic import BaseModel, Field + +from everyrow import create_session +from everyrow.ops import agent_map +from everyrow.task import EffortLevel + + +LABEL_NAMES = { + 0: "Company", 1: "Educational Institution", 2: "Artist", + 3: "Athlete", 4: "Office Holder", 5: "Mean Of Transportation", + 6: "Building", 7: "Natural Place", 8: "Village", + 9: "Animal", 10: "Plant", 11: "Album", 12: "Film", 13: "Written Work", +} +CATEGORY_TO_ID = {v: k for k, v in LABEL_NAMES.items()} + + +class DBpediaClassification(BaseModel): + category: Literal[ + "Company", "Educational Institution", "Artist", + "Athlete", "Office Holder", "Mean Of Transportation", + "Building", "Natural Place", "Village", + "Animal", "Plant", "Album", "Film", "Written Work", + ] = Field(description="The DBpedia ontology category") + + +async def query_llm_oracle(texts_df: pd.DataFrame) -> list[int]: + async with create_session(name="Active Learning Oracle") as session: + result = await agent_map( + session=session, + task="Classify this text into exactly one DBpedia ontology category.", + input=texts_df[["text"]], + response_model=DBpediaClassification, + effort_level=EffortLevel.LOW, + ) + return [CATEGORY_TO_ID.get(result.data["category"].iloc[i], -1) + for i in range(len(texts_df))] +``` + +The learning curves overlap almost perfectly. Final test accuracies averaged over 10 repeats: + +| Data Labeling Method | Final Accuracy (mean ± std) | +| -------------------------------- | --------------------------- | +| Human annotation (ground truth) | 80.6% ± 1.0% | +| LLM annotation (everyrow) | 80.7% ± 0.8% | + +The LLM oracle is within noise of the ground truth baseline. The LLM agreed with ground truth labels 96.1% ± 1.6% of the time. Roughly 1 in 25 labels disagrees, but that does not hurt the downstream classifier. + +The low cost ($0.26 per run) comes from using `EffortLevel.LOW`, which selects a small, fast model without web research. For more ambiguous tasks, use `EffortLevel.MEDIUM` or `EffortLevel.HIGH` for higher quality labels. + +The full pipeline is available as a [companion notebook on Kaggle](https://www.kaggle.com/code/rafaelpoyiadzi/active-learning-with-an-llm-oracle). See also the [full blog post](https://futuresearch.ai/active-learning-llm-oracle). + + + + +--- + +Built with [everyrow](https://github.com/futuresearch/everyrow-sdk). Related guides: [Classify DataFrame Rows](/classify-dataframe-rows-llm) (label data at scale), [Deduplicate Training Data](/deduplicate-training-data-ml) (clean ML datasets before training). diff --git a/docs/add-column-web-lookup.md b/docs/add-column-web-lookup.md deleted file mode 100644 index a783b1fa..00000000 --- a/docs/add-column-web-lookup.md +++ /dev/null @@ -1,85 +0,0 @@ ---- -title: How to Add A Column to a DataFrame with Web Research -description: Step-by-step guide to enriching a pandas DataFrame with new columns using LLM-powered web research agents to find and add any data. ---- - -# How to Add a Column to a DataFrame Using Web Lookup - -`pandas.apply()` runs a local function on each row. But it can't use LLM judgment or do web research to find new values. And doing this by hand can be very slow or expensive. EveryRow provides a one-line utility to do this cheaply and at scale. - -This guide shows how to add a column for price, for 246 common software products, in a single method call on your pandas dataframe. - -| Metric | Value | -| ------------ | ------------------------------------------------------------------------- | -| Rows | 246 | -| Cost | $6.68 | -| Time | 15.7 minutes | -| Success rate | 99.6% (1 failed) | -| Session | [view](https://everyrow.io/sessions/e09de4e8-1e0d-44af-8d1a-a25620565ed4) | - -```bash -pip install everyrow -export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key -``` - -The dataset is a list of 246 SaaS and developer tools like Slack, Notion, Asana. Download [saas_products.csv](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs/data/saas_products.csv) to follow along. We find the annual price of each product's lowest paid tier, which isn't available through any structured API; it requires visiting pricing pages that change frequently and present information in different formats. - -```python -import asyncio -import pandas as pd -from pydantic import BaseModel, Field -from everyrow import create_session -from everyrow.ops import agent_map - -class PricingInfo(BaseModel): - lowest_paid_tier_annual_price: float = Field( - description="Annual price in USD for the lowest paid tier. " - "Use monthly price * 12 if only monthly shown. " - "0 if no paid tier exists." - ) - tier_name: str = Field( - description="Name of the lowest paid tier (e.g. 'Pro', 'Starter', 'Basic')" - ) - -async def main(): - df = pd.read_csv("saas_products.csv") # Single column: product - - async with create_session(name="SaaS pricing lookup") as session: - result = await agent_map( - session=session, - task=""" - Find the pricing for this SaaS product's lowest paid tier. - Visit the product's pricing page to find this information. - - Look for the cheapest paid plan (not free tier). Report: - - The annual price in USD (if monthly, multiply by 12) - - The name of that tier - - If the product has no paid tier or pricing isn't public, use 0. - """, - input=df, - response_model=PricingInfo, - ) - print(result.data) - -asyncio.run(main()) -``` - -``` - product tier_name lowest_paid_tier_annual_price -0 Notion Plus 96.00 -1 Slack Pro 87.00 -2 Asana Starter 131.88 -3 Monday.com Basic 108.00 -4 Trello Standard 60.00 -5 Jira Standard 94.92 -6 Linear Basic 120.00 -7 ClickUp Unlimited 84.00 -... -``` - -Each result includes a `research` column showing how the agent found the answer, with citations linking back to sources. For example, Slack's entry shows: "The Pro plan costs $7.25 USD per active user per month when billed annually (from slack.com/pricing/pro). Annual price calculation: $7.25 × 12 months = $87 per user per year." - -The key to doing this cheaply is in the orchestration of the web research agents, using the right batching, parallelism, LLMs, search tools, and page reading tools. Web research agents have degrees of freedom on how to solve problems, and EveryRow optimizes them for cost and accuracy, all in a single method on your pandas dataframe. - -By using LLM web agents, this works for any new column, any enrichment, that you need on your table, as long as the information can be found on the web. diff --git a/docs/add-column-web-lookup.mdx b/docs/add-column-web-lookup.mdx new file mode 100644 index 00000000..88e047cb --- /dev/null +++ b/docs/add-column-web-lookup.mdx @@ -0,0 +1,169 @@ +--- +title: Add a Column via Web Research +metadataTitle: Using Claude Code to Enrich Every Row with Web Research +description: Add pricing, specs, funding rounds, or any public information as new columns. Research agents visit relevant pages and return structured data per row. +--- + +# Add a Column via Web Research + + + + +Ask Claude Code to find the pricing for a SaaS product and it will search the web and give you an answer. Doing that for 246 products means visiting 246 separate pricing pages, each with a different layout and pricing model. That volume of web research needs to happen in parallel. + +Here, we get Claude Code to find the annual price of the lowest paid tier for 246 SaaS products, by visiting each product's pricing page. + +| Metric | Value | +| ------------ | ------------ | +| Rows | 246 | +| Cost | $5.28 | +| Time | 5.5 minutes | +| Success rate | 100% | + +First, install the [everyrow](https://github.com/futuresearch/everyrow-sdk) plugin for Claude Code: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + +Set your API key before launching Claude Code: + +```bash +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +Download the dataset: [saas_products.csv](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs/data/saas_products.csv) (246 SaaS and developer tools like Slack, Notion, Asana). With the CSV in your working directory, tell Claude: + +``` +For each product in saas_products.csv, find the annual price of its lowest paid tier. +Visit the product's pricing page to find this. If only monthly pricing is shown, +multiply by 12. Return the price and the tier name. If no paid tier exists, use 0. +``` + +Claude calls everyrow's `agent` MCP tool to dispatch web research agents for every row: + +``` +Tool: everyrow_agent +├─ task: "Find the pricing for this SaaS product's lowest paid tier..." +├─ input_csv: "/Users/you/saas_products.csv" +└─ response_schema: {"lowest_paid_tier_annual_price": "float", "tier_name": "string"} + +→ Submitted: 246 rows for processing. + Session: https://everyrow.io/sessions/5c19ee04-f1ac-45c6-bf01-a2b77515011b + Task ID: 5c19... + +Tool: everyrow_progress +├─ task_id: "5c19..." +→ Running: 0/246 complete, 246 running (15s elapsed) + +Tool: everyrow_progress +→ Running: 123/246 complete, 123 running (150s elapsed) + +... + +Tool: everyrow_progress +→ Completed: 246/246 (0 failed) in 327s. + +Tool: everyrow_results +├─ task_id: "5c19..." +├─ output_path: "/Users/you/saas_pricing.csv" +→ Saved 246 rows to /Users/you/saas_pricing.csv +``` + +All 246 products researched in 5.5 minutes. [View the session](https://everyrow.io/sessions/5c19ee04-f1ac-45c6-bf01-a2b77515011b). + +| Product | Annual Price | Tier | +|---------|-------------|------| +| 1Password | $35.88 | Individual | +| Airtable | $240.00 | Team | +| Amplitude | $588.00 | Plus | +| Notion | $96.00 | Plus | +| Slack | $87.00 | Pro | + +45 products (18.3%) correctly reported $0 for products with usage-based pricing (AWS ECR, Anthropic API) or no public pricing. Each result includes a research trail showing how the agent found the answer, with citations linking back to sources. + + + + +`pandas.apply()` runs a local function on each row. But it can't use LLM judgment or do web research to find new values. The everyrow SDK dispatches web research agents to look up data for every row in parallel. + +This guide shows how to add a pricing column for 246 SaaS products in a single method call. + +| Metric | Value | +| ------------ | ------------------------------------------------------------------------- | +| Rows | 246 | +| Cost | $6.68 | +| Time | 15.7 minutes | +| Success rate | 99.6% (1 failed) | +| Session | [view](https://everyrow.io/sessions/e09de4e8-1e0d-44af-8d1a-a25620565ed4) | + +```bash +pip install everyrow +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +The dataset is a list of 246 SaaS and developer tools. Download [saas_products.csv](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs/data/saas_products.csv) to follow along. We find the annual price of each product's lowest paid tier, which requires visiting pricing pages that change frequently and present information in different formats. + +```python +import asyncio +import pandas as pd +from pydantic import BaseModel, Field +from everyrow import create_session +from everyrow.ops import agent_map + +class PricingInfo(BaseModel): + lowest_paid_tier_annual_price: float = Field( + description="Annual price in USD for the lowest paid tier. " + "Use monthly price * 12 if only monthly shown. " + "0 if no paid tier exists." + ) + tier_name: str = Field( + description="Name of the lowest paid tier (e.g. 'Pro', 'Starter', 'Basic')" + ) + +async def main(): + df = pd.read_csv("saas_products.csv") # Single column: product + + async with create_session(name="SaaS pricing lookup") as session: + result = await agent_map( + session=session, + task=""" + Find the pricing for this SaaS product's lowest paid tier. + Visit the product's pricing page to find this information. + + Look for the cheapest paid plan (not free tier). Report: + - The annual price in USD (if monthly, multiply by 12) + - The name of that tier + + If the product has no paid tier or pricing isn't public, use 0. + """, + input=df, + response_model=PricingInfo, + ) + print(result.data) + +asyncio.run(main()) +``` + +``` + product tier_name lowest_paid_tier_annual_price +0 Notion Plus 96.00 +1 Slack Pro 87.00 +2 Asana Starter 131.88 +3 Monday.com Basic 108.00 +4 Trello Standard 60.00 +5 Jira Standard 94.92 +6 Linear Basic 120.00 +7 ClickUp Unlimited 84.00 +... +``` + +Each result includes a `research` column showing how the agent found the answer, with citations linking back to sources. The key to doing this cheaply is in the orchestration of the web research agents, using the right batching, parallelism, LLMs, search tools, and page reading tools. + + + + +--- + +Built with [everyrow](https://github.com/futuresearch/everyrow-sdk). See the [agent_map documentation](reference/AGENT_MAP) for more options including response models and effort levels. diff --git a/docs/case-studies.md b/docs/case-studies.md index bece0685..c09d02c5 100644 --- a/docs/case-studies.md +++ b/docs/case-studies.md @@ -9,32 +9,31 @@ Runnable case studies with real datasets. Each case study demonstrates an everyr ## Screen -- [LLM-Powered Screening at Scale](/docs/case-studies/llm-powered-screening-at-scale) +- [Screen 10,000 Rows](/docs/case-studies/llm-powered-screening-at-scale) - [Screen Stocks by Investment Thesis](/docs/case-studies/screen-stocks-by-investment-thesis) -- [Screen Stocks by Margin Sensitivity](/docs/case-studies/screen-stocks-by-margin-sensitivity) -- [Screen Job Postings by Criteria](/docs/case-studies/screen-job-postings-by-criteria) +- [Screen Stocks by Economic Sensitivity](/docs/case-studies/screen-stocks-by-margin-sensitivity) +- [Screen Job Listings](/docs/case-studies/screen-job-postings-by-criteria) ## Rank - [Score Leads from Fragmented Data](/docs/case-studies/score-leads-from-fragmented-data) -- [Score Leads Without CRM History](/docs/case-studies/score-leads-without-crm-history) -- [Research and Rank Permit Times](/docs/case-studies/research-and-rank-permit-times) +- [Score Cold Leads via Web Research](/docs/case-studies/score-leads-without-crm-history) +- [Research and Rank Web Data](/docs/case-studies/research-and-rank-permit-times) ## Dedupe -- [Dedupe CRM Company Records](/docs/case-studies/dedupe-crm-company-records) +- [Deduplicate CRM Records](/docs/case-studies/dedupe-crm-company-records) ## Merge -- [LLM-Powered Merging at Scale](/docs/case-studies/llm-powered-merging-at-scale) -- [Match Software Vendors to Requirements](/docs/case-studies/match-software-vendors-to-requirements) -- [Merge Contacts with Company Data](/docs/case-studies/merge-contacts-with-company-data) -- [Merge Overlapping Contact Lists](/docs/case-studies/merge-overlapping-contact-lists) - -## Research - -- [LLM Web Research Agents at Scale](/docs/case-studies/llm-web-research-agents-at-scale) +- [Merge Thousands of Records](/docs/case-studies/llm-powered-merging-at-scale) +- [Fuzzy Match Across Tables](/docs/case-studies/match-software-vendors-to-requirements) +- [Enrich Contacts with Company Data](/docs/case-studies/merge-contacts-with-company-data) +- [Deduplicate Contact Lists](/docs/case-studies/merge-overlapping-contact-lists) +- [Link Records Across Medical Datasets](/docs/case-studies/match-clinical-trials-to-papers) +- [Merge Costs and Speed](/docs/case-studies/understanding-costs-and-speed-for-merge) ## Multi-Method - [Multi-Stage Lead Qualification](/docs/case-studies/multi-stage-lead-qualification) +- [LLM Cost vs. Accuracy](/docs/case-studies/deep-research-bench-pareto-analysis) diff --git a/docs/case_studies/dedupe-crm-company-records/content.mdx b/docs/case_studies/dedupe-crm-company-records/content.mdx new file mode 100644 index 00000000..b29c281b --- /dev/null +++ b/docs/case_studies/dedupe-crm-company-records/content.mdx @@ -0,0 +1,123 @@ +--- +title: Deduplicate CRM Records +metadataTitle: Get Claude Code to Deduplicate Your CRM +description: CRM data cleaning that catches subsidiaries, acquired companies, name variations, and ticker symbol differences. Accurate deduplication for messy company records. +--- + +# Deduplicate CRM Records + + + + +Claude Code can find exact duplicates. But what if "PANW", "Pallow Alto", and "Paloalto Networks" are all the same company? And "W-Mart", "Wall-Mart", and "WMT Corp" are all Walmart? + +Here, we get Claude Code to deduplicate 500 messy CRM records down to unique companies. + +| Metric | Value | +| ------------------- | ------------ | +| Records processed | 500 | +| Unique entities | 146 | +| Duplicates removed | 354 (70.8%) | +| Cost | $1.38 | +| Time | 7.0 minutes | + +First, install the [everyrow](https://github.com/futuresearch/everyrow-sdk) plugin for Claude Code: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + +Set your API key before launching Claude Code: + +```bash +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +Download [case_01_crm_data.csv](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs/data/case_01_crm_data.csv). Tell Claude: + +``` +Deduplicate this CRM dataset. Two entries are duplicates if they include data +for the same legal entity. +``` + +Claude calls everyrow's `dedupe` MCP tool: + +``` +Tool: everyrow_dedupe +├─ equivalence_relation: "Two entries are duplicates if they include data for the same legal entity." +└─ input_csv: "/Users/you/case_01_crm_data.csv" + +→ Submitted: 500 rows for deduplication. + Session: https://everyrow.io/sessions/0f6aa459-6e83-4df0-b9e8-bdb8ec594d91 + Task ID: 0f6a... + +Tool: everyrow_progress +├─ task_id: "0f6a..." +→ Running: 0/500 complete (30s elapsed) + +... + +Tool: everyrow_progress +→ Completed: 500/500 (0 failed) in 422s. + +Tool: everyrow_results +├─ task_id: "0f6a..." +├─ output_path: "/Users/you/crm_deduplicated.csv" +→ Saved 500 rows to /Users/you/crm_deduplicated.csv +``` + +500 records resolved to 146 unique entities. [View the session](https://everyrow.io/sessions/0f6aa459-6e83-4df0-b9e8-bdb8ec594d91). + +| Cluster | Records | Variants | +|---------|---------|----------| +| Palo Alto Networks | 8 | Pallow Alto, PANW, Paloalto Networks, Palo Alto Net Inc | +| Walmart | 8 | W-Mart, Wall-Mart, WMT Corp, Wallmart, Wal-Mart Stores | +| Uber | 8 | Ubar, Ubr, Uber Tech, Uber Corporation | +| ServiceNow | 6 | Service Now, Service-Now, SerivceNow, Service Now Inc | +| Nike | 4 | Nyke, Nike Corp, Nike Incorporated, Nike Inc. | + +The output includes `equivalence_class_id` and `selected` columns. Filter to `selected == True` to get one record per entity. The system uses embeddings for initial clustering, then LLM pairwise comparison for accuracy. + + + + +The everyrow SDK's `dedupe()` resolves messy CRM records to unique entities using semantic matching. + +| Metric | Value | +| ------------------- | ------------ | +| Records processed | 500 | +| Unique entities | 124 | +| Cost | $3.52 | +| Time | 102 seconds | + +```bash +pip install everyrow +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +```python +import asyncio +import pandas as pd +from everyrow import create_session +from everyrow.ops import dedupe + +data = pd.read_csv("case_01_crm_data.csv") + +async def main(): + async with create_session(name="CRM Deduplication") as session: + result = await dedupe( + session=session, + input=data, + equivalence_relation="Two entries are duplicates if they include data for the same legal entity.", + ) + deduplicated = result.data[result.data["selected"]] + return deduplicated + +clean_data = asyncio.run(main()) +``` + +500 records reduced to 124 unique companies. The output includes `equivalence_class_id`, `equivalence_class_name`, and `selected` columns. The system handles ticker symbols (PANW to Palo Alto Networks), nicknames (Big Blue to IBM), and typos (Wallmart to Walmart). + + + diff --git a/docs/case_studies/deep-research-bench-pareto-analysis/content.mdx b/docs/case_studies/deep-research-bench-pareto-analysis/content.mdx new file mode 100644 index 00000000..3ede6f0c --- /dev/null +++ b/docs/case_studies/deep-research-bench-pareto-analysis/content.mdx @@ -0,0 +1,100 @@ +--- +title: LLM Cost vs. Accuracy +metadataTitle: "Cost vs. Accuracy: Claude, Gemini, and GPT Models in Web Research" +description: Pareto frontier analysis across 26 LLM configurations on agentic web research tasks. Optimal model selection at each price point for maximum accuracy per dollar. +--- + +# LLM Cost vs. Accuracy + + + + +Claude Code can compare model benchmarks. But what if you need to compute Pareto frontiers across 26 model configurations, mapping cost, speed, and accuracy tradeoffs, to understand which models everyrow selects at each effort level? + +Here, we analyze results from the Deep Research Bench (DRB), which evaluates models on agentic web-research tasks. + +| Metric | Value | +| -------------- | ----------- | +| Models evaluated | 26 | +| everyrow cost | $0.00 | + +This analysis doesn't use everyrow's MCP tools. It fetches benchmark data from the DRB public API and computes Pareto frontiers locally. + +The cost Pareto frontier (7 models that achieve the best accuracy for their price): + +| Model | Cost | DRB Score | +|-------|------|-----------| +| GPT-5.1 (low) | $0.040 | 0.428 | +| Gemini 3 Flash (low) | $0.051 | 0.499 | +| Gemini 3 Flash (minimal) | $0.103 | 0.504 | +| Claude 4.6 Opus (low) | $0.243 | 0.531 | +| Claude 4.5 Opus (low) | $0.312 | 0.549 | +| Claude 4.6 Sonnet (high) | $0.456 | 0.549 | +| Claude 4.6 Opus (high) | $0.553 | 0.550 | + +everyrow's effort levels map directly to models on or near these frontiers: + +| Effort Level | Model | DRB Score | Cost | +|-------------|-------|-----------|------| +| LOW | Gemini 3 Flash (minimal) | 0.504 | $0.103 | +| MEDIUM | Gemini 3 Flash (low) | 0.499 | $0.051 | +| HIGH | Claude 4.6 Opus (low) | 0.531 | $0.243 | + +The bulk of accuracy (0.531 out of 0.550 max) comes at less than half the cost of the best model. Going from HIGH to the absolute best (Claude 4.6 Opus high) doubles the cost for only a 3.6% accuracy improvement. + + + + +This notebook analyzes model performance on the Deep Research Bench to understand everyrow's model selection and effort level mapping. + +| Metric | Value | +| -------------- | ----------- | +| Models evaluated | 26 | + +```bash +pip install everyrow requests pandas +``` + +```python +import requests +import pandas as pd + +url = "https://rguraxphqescakvvzmju.supabase.co/rest/v1/rpc/get_average_scores_by_model" +PUBLIC_API_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9..." + +headers = { + "apikey": PUBLIC_API_KEY, + "authorization": f"Bearer {PUBLIC_API_KEY}", + "content-type": "application/json", +} + +response = requests.post(url, headers=headers, json={"min_num_of_distinct_instances": 150}) +df = pd.DataFrame(response.json()) +``` + +To override everyrow's default model selection: + +```python +from everyrow.ops import agent_map +from everyrow.task import LLM + +result = await agent_map( + task="Find each company's latest funding round", + input=companies_df, + effort_level=None, + llm=LLM.CLAUDE_4_6_OPUS_HIGH, + iteration_budget=10, + include_research=True, +) +``` + +| Effort Level | Model | DRB Score | Cost | Runtime | +|-------------|-------|-----------|------|---------| +| LOW | Gemini 3 Flash (minimal) | 0.504 | $0.103 | 116s | +| MEDIUM | Gemini 3 Flash (low) | 0.499 | $0.051 | 96s | +| HIGH | Claude 4.6 Opus (low) | 0.531 | $0.243 | 73s | + +Claude 4.6 Opus (high) achieves the top score (0.550) but at 2x the cost and 2.5x the runtime of the HIGH effort level. For most tasks, the HIGH effort level captures the bulk of accuracy at a fraction of the cost. + + + diff --git a/docs/case_studies/llm-powered-merging-at-scale/content.mdx b/docs/case_studies/llm-powered-merging-at-scale/content.mdx new file mode 100644 index 00000000..df880003 --- /dev/null +++ b/docs/case_studies/llm-powered-merging-at-scale/content.mdx @@ -0,0 +1,125 @@ +--- +title: Merge Thousands of Records +metadataTitle: Using Claude Code to Merge Thousands of Records Intelligently +description: Semantic record matching at production scale. Join thousands of records across two datasets with automatic web search fallback for ambiguous cases. +--- + +# Merge Thousands of Records + + + + +Claude Code is great at matching a person to their website using web search. It can cross-reference names, email domains, and institutions. Doing that for 2,246 people, where each match requires understanding names, affiliations, and URL patterns, is more web research than a single session can support. + +Here, we get Claude Code to match people to their personal websites at scale. + +| Metric | Value | +| -------------- | ------------- | +| Rows processed | 2,246 | +| Matched | 2,243 (99.9%) | +| Total cost | $35.41 | +| Time | 12.5 minutes | + +First, install the [everyrow](https://github.com/futuresearch/everyrow-sdk) plugin for Claude Code: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + +Set your API key before launching Claude Code: + +```bash +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +With both CSVs in your working directory, tell Claude: + +``` +Merge the people CSV with the websites CSV. Match each person to their +personal website(s). +``` + +Claude calls everyrow's `merge` MCP tool: + +``` +Tool: everyrow_merge +├─ task: "Match each person to their website(s)." +├─ left_csv: "/Users/you/people.csv" +└─ right_csv: "/Users/you/websites.csv" + +→ Submitted: 2,246 rows for merging. + Session: https://everyrow.io/sessions/2a929529-2d92-4410-a6a7-ce8713c5d465 + Task ID: 2a92... + +Tool: everyrow_progress +├─ task_id: "2a92..." +→ Running: 0/2246 complete (30s elapsed) + +... + +Tool: everyrow_progress +→ Completed: 2246/2246 (0 failed) in 747s. + +Tool: everyrow_results +├─ task_id: "2a92..." +├─ output_path: "/Users/you/people_with_websites.csv" +→ Saved 2246 rows to /Users/you/people_with_websites.csv +``` + +2,243 of 2,246 matched (99.9%). [View the session](https://everyrow.io/sessions/2a929529-2d92-4410-a6a7-ce8713c5d465). + +Most matches resolved via LLM reasoning on name/email/URL patterns. Harder cases triggered automatic web search to verify person-to-website relationships. At this scale, 54M tokens were consumed across 4,233 LLM requests. + + + + +The everyrow SDK's `merge()` scales to thousands of rows. This notebook demonstrates matching 2,246 people to personal websites, showing how cost grows with scale. + +| Metric | Value | +| -------------- | ------------- | +| Rows processed | 2,246 | +| Cost | $26.80 | + +```bash +pip install everyrow +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +```python +import asyncio +import pandas as pd +from everyrow import create_session +from everyrow.ops import merge + +left_df = pd.read_csv("merge_websites_input_left_2246.csv") +right_df = pd.read_csv("merge_websites_input_right_2246.csv") + +async def main(): + async with create_session(name="Website Matching") as session: + result = await merge( + session=session, + task="Match each person to their website(s).", + left_table=left_df, + right_table=right_df, + ) + return result.data + +merged = asyncio.run(main()) +``` + +Cost grows super-linearly with row count because each additional row increases the candidate pool for every match: + +| Rows | Cost | +|------|------| +| 100 | $0.00 | +| 200 | $0.14 | +| 400 | $0.29 | +| 800 | $2.32 | +| 1,600 | $16.60 | +| 2,246 | $26.80 | + +Most matches resolved by LLM reasoning on name/email/URL patterns. Harder cases trigger automatic web search fallback. + + + diff --git a/docs/case_studies/llm-powered-screening-at-scale/content.mdx b/docs/case_studies/llm-powered-screening-at-scale/content.mdx new file mode 100644 index 00000000..e265bf3b --- /dev/null +++ b/docs/case_studies/llm-powered-screening-at-scale/content.mdx @@ -0,0 +1,131 @@ +--- +title: Screen 10,000 Rows +metadataTitle: How to Screen 10,000 Rows in Claude Code +description: Intelligent filtering at production scale. A two-pass pipeline with fast pre-filtering and per-row LLM evaluation handles tens of thousands of records. +--- + +# Screen 10,000 Rows + + + + +Claude Code handles filtering a hundred rows natively by reading and evaluating each one. Scaling to 10,000 rows needs an approach where a fast pre-filter narrows candidates first, then LLM agents evaluate only the plausible matches individually. + +Here, we get Claude Code to screen 9,949 FDA product recalls to find products relevant to a child born on 2021-08-01. Each row requires reasoning about whether a recalled product (food, medication, device) would plausibly be used for a child of that age. + +| Metric | Value | +| -------------- | ------------- | +| Rows processed | 9,949 | +| Rows passing | 2,271 (22.8%) | +| Total cost | $37.13 | +| Time | 11.8 minutes | + +First, install the [everyrow](https://github.com/futuresearch/everyrow-sdk) plugin for Claude Code: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + +Set your API key before launching Claude Code: + +```bash +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +With the FDA recalls CSV in your working directory, tell Claude: + +``` +Screen this FDA product recalls dataset to find recalls of products that I might +have used for my child born on 2021-08-01. +``` + +Claude calls everyrow's `screen` MCP tool. At this scale, the two-pass pipeline is critical: a fast first pass triages all 9,949 rows, then a careful second pass re-evaluates borderline cases with a stronger model: + +``` +Tool: everyrow_screen +├─ task: "Find recalls of products that I might have used for my child born on 2021-08-01." +├─ input_csv: "/Users/you/fda_product_recalls.csv" +└─ response_schema: null + +→ Submitted: 9,949 rows for screening. + Session: https://everyrow.io/sessions/310fc823-0adc-402c-bff1-7dc43fda2636 + Task ID: 310f... + +Tool: everyrow_progress +├─ task_id: "310f..." +→ Running: 0/9949 complete (30s elapsed) + +... + +Tool: everyrow_progress +→ Completed: 9949/9949 (0 failed) in 707s. + +Tool: everyrow_results +├─ task_id: "310f..." +├─ output_path: "/Users/you/child_relevant_recalls.csv" +→ Saved 2271 rows to /Users/you/child_relevant_recalls.csv +``` + +2,271 of 9,949 recalls are relevant. [View the session](https://everyrow.io/sessions/310fc823-0adc-402c-bff1-7dc43fda2636). + +Sample passing recalls (products a child could have been exposed to): + +| Product | Firm | Why Relevant | +|---------|------|--------------| +| White Hot Dog Enriched Buns | Perfection Bakeries | Food item, child eating solids by recall date | +| ExactaMed Oral Dispenser | Baxter Healthcare | Medical device used for infant medication | +| Chickenless Crispy Tenders | Dr. Praeger's | Food item for toddler-age child | + +Sample non-passing recalls (correctly excluded): + +| Product | Why Excluded | +|---------|-------------| +| Lase Discectomy Device Kit | Surgical back surgery device, not for children | +| Heparin/Lidocaine irrigation | Clinical use only | + + + + +The everyrow SDK's `screen()` function filters a dataframe by applying LLMs to every row. This demonstrates screening at scale: 10,000 FDA product recalls screened for personal relevance. + +| Metric | Value | +| -------------- | ------------- | +| Rows processed | ~9,949 | +| Rows passing | 1,046 (12.8%) | +| Total cost | $12.10 | + +```bash +pip install everyrow +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +```python +import asyncio +import pandas as pd +from everyrow import create_session +from everyrow.ops import screen + +fda_recalls = pd.read_csv("fda_product_recalls.csv") +fda_recalls["center_classification_date"] = pd.to_datetime( + fda_recalls["center_classification_date"], errors="coerce" +) +fda_recalls = fda_recalls[ + fda_recalls["center_classification_date"] > pd.Timestamp("2021-08-01") +] + +async def main(): + async with create_session(name="FDA Recall Screening") as session: + result = await screen( + task="Find recalls of products that I might have used for my child born on 2021-08-01.", + input=fda_recalls, + ) + return result.data + +results = asyncio.run(main()) +``` + +At $0.001 per row, the cost scales linearly. The two-pass pipeline uses a fast model for initial triage and a stronger model for borderline cases, keeping accuracy high while controlling cost. + + + diff --git a/docs/case_studies/match-clinical-trials-to-papers/content.mdx b/docs/case_studies/match-clinical-trials-to-papers/content.mdx new file mode 100644 index 00000000..a1f3a55a --- /dev/null +++ b/docs/case_studies/match-clinical-trials-to-papers/content.mdx @@ -0,0 +1,141 @@ +--- +title: Link Records Across Medical Datasets +metadataTitle: How to Use Claude Code to Link Records Across Medical Databases +description: Record linkage across structured medical and scientific databases. Semantic matching on drug names, conditions, study design, and domain-specific terminology. +--- + +# Link Records Across Medical Datasets + + + + +Claude Code is great at reading a paper abstract and matching it to a clinical trial. When you have 700 papers and 200 trials, the matching requires evaluating thousands of potential pairs for drug aliases, rewritten trial titles, and study design terminology. + +Here, we get Claude Code to match PubMed papers to the clinical trials they report results for. + +| Metric | Value | +| -------------- | ------------- | +| Papers | 700 | +| Trials | 200 | +| Matched pairs | 73 | +| F1 Score | 84.7% | +| Total cost | $27.81 | +| Time | 7.5 minutes | + +First, install the [everyrow](https://github.com/futuresearch/everyrow-sdk) plugin for Claude Code: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + +Set your API key before launching Claude Code: + +```bash +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +With the papers and trials CSVs in your working directory, tell Claude: + +``` +Match these PubMed papers to the clinical trials they report results for. +A paper matches a trial if it describes the results of that trial. Look for +matching interventions/drugs, conditions, study design, and outcomes. Drug +names may appear as brand or generic. Not every paper has a matching trial. +``` + +Claude calls everyrow's `merge` MCP tool with many-to-one relationship: + +``` +Tool: everyrow_merge +├─ task: "Match publications to the clinical trial they report results for..." +├─ left_csv: "/Users/you/papers_700.csv" +├─ right_csv: "/Users/you/trials_200.csv" +└─ relationship_type: "many_to_one" + +→ Submitted: 700 rows for merging. + Session: https://everyrow.io/sessions/d02d59b7-29fd-4e23-b35c-38c6a9096c34 + Task ID: d02d... + +Tool: everyrow_progress +→ Running: 0/700 complete (30s elapsed) + +... + +Tool: everyrow_progress +→ Completed: 700/700 (0 failed) in 448s. + +Tool: everyrow_results +→ Saved 700 rows to /Users/you/matched_trials.csv +``` + +73 paper-trial matches found. [View the session](https://everyrow.io/sessions/d02d59b7-29fd-4e23-b35c-38c6a9096c34). + +Scored against 64 gold-labeled pairs: + +| Metric | Value | +|--------|-------| +| True positives | 58 | +| False positives | 15 | +| False negatives | 6 | +| Precision | 79.5% | +| Recall | 90.6% | +| F1 Score | 84.7% | + +627 papers were correctly left unmatched (distractors with no corresponding trial). The many-to-one relationship correctly models that multiple papers can report results from the same trial. + + + + +The everyrow SDK's `merge()` handles semantic matching across medical terminology, drug aliases, and study design descriptions. This notebook demonstrates matching papers to clinical trials with gold-label evaluation. + +| Metric | Value | +| -------------- | -------- | +| Papers | 700 | +| Trials | 200 | +| F1 Score | 87.2% | +| Cost | ~$20 | + +```bash +pip install everyrow +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +```python +import asyncio +import pandas as pd +from everyrow import create_session +from everyrow.ops import merge + +trials_df = pd.read_csv("trials_200.csv") +papers_df = pd.read_csv("papers_700.csv") + +async def main(): + async with create_session(name="Clinical Trials to Papers") as session: + result = await merge( + session=session, + task=""" + Match publications to the clinical trial they report results for. + Look for matching interventions/drugs, conditions, study design, + outcomes, and sponsor/institution. Drug names may appear as brand + or generic. Not every paper has a matching trial. + """, + left_table=papers_df, + right_table=trials_df, + ) + return result.data + +merged = asyncio.run(main()) +matched = merged.dropna(subset=["nct_id"]) +``` + +| Metric | EveryRow | Claude Code Only | +|--------|----------|-----------------| +| F1 Score | 87.2% | 74.5% | +| Precision | 84.1% | 100% | +| Recall | 90.6% | ~59% | + +EveryRow maintains accuracy as datasets grow by dynamically allocating more agents. Its higher recall (90.6% vs ~59%) comes from finding matches that require deeper semantic understanding of medical terminology. + + + diff --git a/docs/case_studies/match-software-vendors-to-requirements/content.mdx b/docs/case_studies/match-software-vendors-to-requirements/content.mdx new file mode 100644 index 00000000..d3bd4f7a --- /dev/null +++ b/docs/case_studies/match-software-vendors-to-requirements/content.mdx @@ -0,0 +1,131 @@ +--- +title: Fuzzy Match Across Tables +metadataTitle: Get Claude Code to Fuzzy Match Records Across Datasets +description: Fuzzy matching across tables with different naming conventions. A cascade from exact matching through LLM reasoning to web search achieves high accuracy at every ambiguity level. +--- + +# Fuzzy Match Across Tables + + + + +Claude Code handles exact-key merges natively by writing pandas code. Scaling to fuzzy matching, then semantic matching, then web search fallback needs an approach where each strategy is tested independently and the cascade is evaluated empirically. + +Here, we run 5 merge experiments on 438 S&P 500 companies, testing the cascade from exact matching to web search. + +| Metric | Value | +| -------------- | ------------ | +| Total merges | 5 | +| Rows per merge | 438 | +| Total cost | $3.67 | +| Total time | 7.1 minutes | + +First, install the [everyrow](https://github.com/futuresearch/everyrow-sdk) plugin for Claude Code: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + +Set your API key before launching Claude Code: + +```bash +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +With the company CSVs in your working directory, tell Claude to run each experiment. For the company-to-ticker merge: + +``` +Merge company_info.csv with valuations.csv. The first table has company names, +the second has stock tickers. Match companies to their stock tickers. +``` + +Claude calls everyrow's `merge` MCP tool: + +``` +Tool: everyrow_merge +├─ task: "Merge the tables based on company name and ticker" +├─ left_csv: "/Users/you/company_info.csv" +└─ right_csv: "/Users/you/valuations.csv" + +→ Submitted: 438 rows for merging. + Session: https://everyrow.io/sessions/d7819b7e-c48d-49e5-9f6e-55d972b85467 + +... + +Tool: everyrow_results +→ Saved 438 rows to /Users/you/merged.csv +``` + +Results across all 5 experiments: + +| Experiment | Accuracy | Cost | Time | +|-----------|----------|------|------| +| 0% noise (baseline) | 100% | $0.00 | 6s | +| 5% character corruption | 100% | $0.10 | 23s | +| 10% character corruption | 100% | $0.34 | 43s | +| Company name to ticker (LLM) | 100% | $1.01 | 203s | +| CEO name to company (Web) | 96.3% | $2.22 | 151s | + +The cascade escalates automatically: exact matches are free, fuzzy matches handle typos for free, LLM reasoning handles semantic matches at ~$0.002/row, and web search is used only for stale or obscure data at ~$0.01/row. + + + + +The everyrow SDK implements a merge cascade (Exact, Fuzzy, LLM, Web) that automatically uses the simplest method that works for each row. This notebook tests the cascade across 5 experiments with increasing difficulty. + +| Metric | Value | +| -------------- | ------------ | +| Total merges | 5 | +| Rows per merge | 438 | + +```bash +pip install everyrow +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +```python +import asyncio +import pandas as pd +from everyrow import create_session +from everyrow.ops import merge + +companies = pd.read_csv("company_info.csv") +valuations = pd.read_csv("valuations.csv") + +async def main(): + # Experiment 1: Clean data (exact matches) + async with create_session(name="Exact Match") as session: + result = await merge( + session=session, + task="Merge the tables on company name", + left_table=companies, + right_table=valuations, + merge_on_left="company", + merge_on_right="company", + ) + + # Experiment 2: Company name to ticker (LLM match) + async with create_session(name="LLM Match") as session: + result = await merge( + session=session, + task="Merge the tables based on company name and ticker", + left_table=companies, + right_table=valuations, + ) + +asyncio.run(main()) +``` + +| Experiment | Matched | Accuracy | Cost | +|-----------|---------|----------|------| +| 0% noise | 100% | 100% | $0.13 | +| 5% noise | 100% | 100% | $0.32 | +| 10% noise | 100% | 100% | $0.44 | +| LLM (company to ticker) | 100% | 100% | $1.00 | +| Web (CEO matching) | 95.7% | 96.7% | $3.69 | + +The cascade optimizes cost automatically. For the 10% noise experiment, 26.5% of rows matched exactly, 30.8% via fuzzy matching (both free), and only 42.7% required LLM reasoning. + + + diff --git a/docs/case_studies/merge-contacts-with-company-data/content.mdx b/docs/case_studies/merge-contacts-with-company-data/content.mdx new file mode 100644 index 00000000..fa5336df --- /dev/null +++ b/docs/case_studies/merge-contacts-with-company-data/content.mdx @@ -0,0 +1,121 @@ +--- +title: Enrich Contacts with Company Data +metadataTitle: How to Use Claude Code to Enrich Contact Records with Company Data +description: Join contact-level and organization-level records automatically. Resolves company name variations, abbreviations, and legal suffixes into clean, matched data. +--- + +# Enrich Contacts with Company Data + + + + +Claude Code's pandas merge works when column values match exactly. When "Bridgewater" needs to match "Bridgewater Associates" and "D.E. Shaw" needs to match "D. E. Shaw & Co.", the merge needs fuzzy matching that understands company name conventions. + +Here, we get Claude Code to merge 10 contacts with 10 fund records, handling company name variations. + +| Metric | Value | +| -------------- | ---------- | +| Rows processed | 10 | +| Matched | 10 (100%) | +| Total cost | $0.00 | +| Time | 9 seconds | + +First, install the [everyrow](https://github.com/futuresearch/everyrow-sdk) plugin for Claude Code: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + +Set your API key before launching Claude Code: + +```bash +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +With both CSVs in your working directory, tell Claude: + +``` +Merge crm_contacts.csv with crm_funds.csv. Match contacts to their fund based +on company name, ignoring legal suffixes (LLC, Inc, LP), abbreviations +(Mgmt = Management, Tech = Technologies), and extra descriptors. +``` + +Claude calls everyrow's `merge` MCP tool: + +``` +Tool: everyrow_merge +├─ task: "Match contacts to their associated fund/company..." +├─ left_csv: "/Users/you/crm_contacts.csv" +├─ right_csv: "/Users/you/crm_funds.csv" +├─ merge_on_left: "company_name" +├─ merge_on_right: "fund_name" +└─ relationship_type: "one_to_one" + +→ Submitted: 10 rows for merging. + Session: https://everyrow.io/sessions/8e2eb233-e2cb-4144-b949-0e4fb4962cb2 + +Tool: everyrow_results +→ Saved 10 rows to /Users/you/merged_contacts.csv +``` + +All 10 matched in 9 seconds for $0.00. [View the session](https://everyrow.io/sessions/8e2eb233-e2cb-4144-b949-0e4fb4962cb2). + +| Contact | Company (left) | Fund (right) | +|---------|---------------|--------------| +| John Smith | Bridgewater | Bridgewater Associates | +| Sarah Johnson | Citadel LLC | Citadel | +| Jessica Wang | D.E. Shaw | D. E. Shaw & Co. | +| Robert Brown | Point72 Asset Mgmt | Point72 Asset Management | +| Amanda Wilson | Renaissance Tech | Renaissance Technologies | + +The merge cascade handled all variations (abbreviations, suffixes, spacing) via fuzzy matching without needing LLM calls. When simpler methods work, everyrow uses them. + + + + +The everyrow SDK's `merge()` handles company name variations for CRM data imports. + +| Metric | Value | +| -------------- | ---------- | +| Rows processed | 10 | +| Matched | 10 (100%) | +| Cost | $0.00 | + +```bash +pip install everyrow +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +```python +import asyncio +import pandas as pd +from everyrow import create_session +from everyrow.ops import merge + +contacts_df = pd.read_csv("crm_contacts.csv") +funds_df = pd.read_csv("crm_funds.csv") + +async def main(): + async with create_session(name="CRM Merge Workflow") as session: + result = await merge( + session=session, + task=""" + Match contacts to their associated fund/company. + Company names may vary between tables. Match on core company name, + ignoring legal suffixes, abbreviations, and descriptors. + """, + left_table=contacts_df, + right_table=funds_df, + merge_on_left="company_name", + merge_on_right="fund_name", + ) + return result.data + +merged = asyncio.run(main()) +``` + +All 10 contacts matched at $0.00. The fuzzy matching cascade handled abbreviations ("Mgmt" to "Management"), legal suffixes ("LLC"), and spacing variations without needing LLM calls. + + + diff --git a/docs/case_studies/merge-overlapping-contact-lists/content.mdx b/docs/case_studies/merge-overlapping-contact-lists/content.mdx new file mode 100644 index 00000000..d0e8228d --- /dev/null +++ b/docs/case_studies/merge-overlapping-contact-lists/content.mdx @@ -0,0 +1,126 @@ +--- +title: Deduplicate Contact Lists +metadataTitle: Can Claude Code Identify Duplicate People Across Contact Lists? +description: Identity resolution across contact databases. Accurately match people despite nicknames, initials, and inconsistent formatting across sources. +--- + +# Deduplicate Contact Lists + + + + +Claude Code can diff two lists. But what if "Dr. Sarah Chen" on one list is "S. Chen" on another, and "Robert Johnson" appears as "Bob Johnson"? You need semantic matching that understands nicknames and initials. + +Here, we get Claude Code to merge two overlapping contact lists (12 and 10 people) to identify duplicates across name formats. + +| Metric | Value | +| -------------- | ----------- | +| Left list | 12 contacts | +| Right list | 10 contacts | +| Matched pairs | 7 | +| Total cost | $0.00 | +| Time | 128 seconds | + +First, install the [everyrow](https://github.com/futuresearch/everyrow-sdk) plugin for Claude Code: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + +Set your API key before launching Claude Code: + +```bash +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +With both contact CSVs in your working directory, tell Claude: + +``` +Merge these two contact lists to find the same person across both. Account for +nicknames (Bob/Robert, Mike/Michael, Tom/Thomas), initials (S. Chen = Sarah Chen), +and institution matching. When in doubt, favor false negatives over false positives. +``` + +Claude calls everyrow's `merge` MCP tool: + +``` +Tool: everyrow_merge +├─ task: "Match contacts between two lists to identify the same person..." +├─ left_csv: "/Users/you/contacts_list_a.csv" +├─ right_csv: "/Users/you/contacts_list_b.csv" +├─ merge_on_left: "name" +├─ merge_on_right: "full_name" +└─ relationship_type: "one_to_one" + +→ Submitted: 12 rows for merging. + Session: https://everyrow.io/sessions/1d39b32d-d71e-48e8-8ac7-de907f86745a + +Tool: everyrow_results +→ Saved 12 rows to /Users/you/merged_contacts.csv +``` + +7 matches found, 5 correctly left unmatched. [View the session](https://everyrow.io/sessions/1d39b32d-d71e-48e8-8ac7-de907f86745a). + +| List A | List B | Match Type | +|--------|--------|------------| +| Dr. Sarah Chen | S. Chen | Initial + institution | +| Michael O'Brien | Mike O'Brien | Nickname | +| James Wilson | James R. Wilson | Middle initial | +| Robert Johnson | Bob Johnson | Nickname | +| Thomas Lee | Tom Lee | Nickname | +| Priya Sharma | Priya S. | Initial | +| Elena Rodriguez | Elena R. | Initial | + +David Kim, Anna Kowalski, Maria Santos, Jennifer Park, and Christopher Davis were correctly left unmatched (no counterpart in the other list). The merge handled all variations via fuzzy matching without needing LLM calls. + + + + +The everyrow SDK's `merge()` identifies the same person across lists with different name formats, nicknames, and initials. + +| Metric | Value | +| -------------- | ----------- | +| Left list | 12 contacts | +| Right list | 10 contacts | +| Matched pairs | 7 | +| Cost | $0.00 | + +```bash +pip install everyrow +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +```python +import asyncio +import pandas as pd +from everyrow import create_session +from everyrow.ops import merge + +list_a = pd.read_csv("contacts_list_a.csv") +list_b = pd.read_csv("contacts_list_b.csv") + +async def main(): + async with create_session(name="Contact List Merge") as session: + result = await merge( + session=session, + task=""" + Match contacts between two lists to identify the same person. + Account for nicknames (Bob/Robert, Mike/Michael, Tom/Thomas), + initials (S. Chen = Sarah Chen), and institution matching. + Favor false negatives over false positives. + """, + left_table=list_a, + right_table=list_b, + merge_on_left="name", + merge_on_right="full_name", + ) + return result.data + +merged = asyncio.run(main()) +``` + +7 matches found at $0.00. The merge correctly handled nickname matching (Bob/Robert, Mike/Michael, Tom/Thomas), initial matching (S. Chen to Sarah Chen), and left 5 contacts unmatched when no counterpart existed. + + + diff --git a/docs/case_studies/multi-stage-lead-qualification/content.mdx b/docs/case_studies/multi-stage-lead-qualification/content.mdx new file mode 100644 index 00000000..8da09d4f --- /dev/null +++ b/docs/case_studies/multi-stage-lead-qualification/content.mdx @@ -0,0 +1,139 @@ +--- +title: Multi-Stage Lead Qualification +metadataTitle: How We Use Claude Code to Build Multi-Step Data Pipelines +description: Chain scoring, filtering, enrichment, and screening into a single automated pipeline. Pass intermediate results between stages with custom logic at each step. +--- + +# Multi-Stage Lead Qualification + + + + +Claude Code handles a single scoring or filtering step natively. Chaining three stages (score by research adoption, filter by threshold, estimate team sizes, then screen by a compound rule) needs an approach where each stage passes its output to the next with custom logic between steps. + +Here, we get Claude Code to run a three-stage qualification pipeline on 20 investment funds. + +| Metric | Value | +| -------------- | ----------- | +| Input funds | 20 | +| After scoring | 15 | +| Final qualified| 14 | +| Total cost | $0.53 | +| Time | 4.2 minutes | + +First, install the [everyrow](https://github.com/futuresearch/everyrow-sdk) plugin for Claude Code: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + +Set your API key before launching Claude Code: + +```bash +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +Tell Claude to run the multi-stage pipeline: + +``` +I have a CSV of 20 investment funds. Run this pipeline: +1. Score each fund 0-100 on likelihood to adopt research tools +2. Filter to funds scoring >= 50 +3. For remaining funds, estimate their investment team size +4. Final screen: include if score >= 70 OR team size <= 5 +``` + +Claude chains three everyrow operations with a pandas filter step: + +``` +Tool: everyrow_rank (Stage 1: Score by research adoption) +├─ task: "Score funds 0-100 on likelihood to adopt research tools" +├─ field_name: "score" +→ 20 rows scored in 73s. Session: https://everyrow.io/sessions/680fb865-... + +[Claude filters to score >= 50: 15 rows remain] + +Tool: everyrow_rank (Stage 3: Estimate team size) +├─ task: "Estimate investment team size per fund" +├─ field_name: "team_size_estimate" +→ 15 rows scored in 131s. Session: https://everyrow.io/sessions/ab54d4c9-... + +Tool: everyrow_screen (Stage 4: Final inclusion) +├─ task: "Include if score >= 70 OR team <= 5" +→ 14 of 15 pass in 49s. Session: https://everyrow.io/sessions/5f18a461-... +``` + +14 of 20 funds qualified. The one excluded fund (Fixed Income Plus, score 55, team 12) fell below the score threshold and had too large a team. + +| Fund | Score | Team Size | Qualified | +|------|-------|-----------|-----------| +| Tiny Ventures GP | 85 | 1 | Yes | +| Boutique Micro Fund | 92 | 2 | Yes | +| Nano Cap Hunters | 95 | 4 | Yes | +| Deep Dive Capital | 95 | 5 | Yes | +| Activist Value Fund | 95 | 12 | Yes | +| Fixed Income Plus | 55 | 12 | No | + + + + +The everyrow SDK chains multiple operations in a single session. This notebook demonstrates a three-stage lead qualification pipeline using `rank()`, pandas filtering, and `screen()`. + +| Metric | Value | +| -------------- | ----------- | +| Input funds | 20 | +| Final qualified| 14 | + +```bash +pip install everyrow +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +```python +import asyncio +import pandas as pd +from pydantic import BaseModel, Field +from everyrow import create_session +from everyrow.ops import rank, screen + +class InclusionResult(BaseModel): + passes: bool = Field(description="Include if score >= 70 OR team_size <= 5") + +async def main(): + async with create_session(name="Multi-Stage Lead Screening") as session: + # Stage 1: Score by research tool adoption + scored = await rank( + session=session, + task="Score funds 0-100 on likelihood to adopt research tools", + input=funds_df, + field_name="score", + ) + + # Stage 2: Filter by threshold + filtered = scored.data[scored.data["score"] >= 50].copy() + + # Stage 3: Research team sizes + with_teams = await rank( + session=session, + task="Estimate investment team size per fund", + input=filtered, + field_name="team_size_estimate", + ) + + # Stage 4: Final screening + final = await screen( + session=session, + task="Include if score >= 70 OR team size <= 5", + input=with_teams.data, + response_model=InclusionResult, + ) + return final.data + +results = asyncio.run(main()) +``` + +The pipeline chains three everyrow operations in a single session: score, filter (pandas), team size estimation, and nuanced inclusion screening. 14 of 20 funds qualified. The single exclusion (Fixed Income Plus) had a moderate score (55) and large team (12). + + + diff --git a/docs/case_studies/research-and-rank-permit-times/content.mdx b/docs/case_studies/research-and-rank-permit-times/content.mdx new file mode 100644 index 00000000..0a5176f5 --- /dev/null +++ b/docs/case_studies/research-and-rank-permit-times/content.mdx @@ -0,0 +1,125 @@ +--- +title: Research and Rank Web Data +metadataTitle: Can Claude Code Research Dozens of Websites and Rank the Results? +description: Aggregate structured data from dozens of independent websites into a single ranked dataset. Research agents visit each source and extract comparable results. +--- + +# Research and Rank Web Data + + + + +Ask Claude Code to find the permit processing time for San Antonio and it will find the city's website and give you an answer. Doing that for 30 cities, each with a different government website that publishes permit data in a different format, needs 30 independent research agents. + +Here, we get Claude Code to research and rank 30 Texas cities by residential building permit processing time. + +| Metric | Value | +| -------------- | ----------- | +| Rows processed | 30 | +| Cost | $0.88 | +| Time | 2.5 minutes | + +First, install the [everyrow](https://github.com/futuresearch/everyrow-sdk) plugin for Claude Code: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + +Set your API key before launching Claude Code: + +```bash +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +With the Texas cities CSV in your working directory, tell Claude: + +``` +Research and rank these 30 Texas cities by residential building permit processing +time in business days. Look up each city's actual permit processing data. +``` + +Claude calls everyrow's `rank` MCP tool. Each agent conducts independent web research: + +``` +Tool: everyrow_rank +├─ task: "Research residential building permit processing time for this Texas city..." +├─ input_csv: "/Users/you/texas_cities.csv" +├─ field_name: "permit_days" +├─ field_type: "int" +└─ ascending_order: true + +→ Submitted: 30 rows for ranking. + Session: https://everyrow.io/sessions/6d0fdc1d-b12e-4b80-9f3a-143923a7e3b9 + Task ID: 6d0f... + +... + +Tool: everyrow_results +→ Saved 30 rows to /Users/you/permit_times.csv +``` + +[View the session](https://everyrow.io/sessions/6d0fdc1d-b12e-4b80-9f3a-143923a7e3b9). + +Top 10 fastest cities: + +| City | Population | Region | Permit Days | +|------|-----------|--------|-------------| +| San Antonio | 1,500,000 | South Texas | 3 | +| Irving | 240,000 | North Texas | 3 | +| McAllen | 145,000 | Rio Grande Valley | 3 | +| Plano | 285,000 | North Texas | 5 | +| Lubbock | 260,000 | West Texas | 5 | +| Brownsville | 185,000 | Rio Grande Valley | 5 | +| Killeen | 155,000 | Central Texas | 5 | +| Waco | 140,000 | Central Texas | 6 | +| Fort Worth | 920,000 | North Texas | 7 | +| Garland | 240,000 | North Texas | 7 | + +Slowest: Denton (31 days), Round Rock (30 days), Houston (30 days). Average across all 30 cities: 10.3 days. Each result includes detailed source citations with URLs from official city permit department pages. + + + + +The everyrow SDK's `rank()` dispatches web research agents to find data that isn't available through any structured API. Each agent visits official city websites to find permit processing timelines. + +| Metric | Value | +| -------------- | ----------- | +| Rows processed | 30 | + +```bash +pip install everyrow +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +```python +import asyncio +import pandas as pd +from everyrow import create_session +from everyrow.ops import rank + +texas_cities_df = pd.read_csv("texas_cities.csv") + +async def main(): + async with create_session(name="Texas Permit Times Research") as session: + result = await rank( + session=session, + task=""" + Research the residential building permit processing time for + this Texas city. Find official data from the city's permit + department. Return the number of business days. + """, + input=texas_cities_df, + field_name="score", + ) + return result.data + +results = asyncio.run(main()) +``` + +Fastest: Corpus Christi (2 days), San Antonio (3 days), Irving (3 days). Slowest: Round Rock (30 days), Houston (30 days). Average: 10.0 days. Median: 10.0 days. + +By region: Rio Grande Valley cities are fastest (4.0 days avg), Central Texas slowest (15.3 days avg). Each result includes a research column with source citations. + + + diff --git a/docs/case_studies/score-leads-from-fragmented-data/content.mdx b/docs/case_studies/score-leads-from-fragmented-data/content.mdx new file mode 100644 index 00000000..a5738b1f --- /dev/null +++ b/docs/case_studies/score-leads-from-fragmented-data/content.mdx @@ -0,0 +1,120 @@ +--- +title: Score Leads from Fragmented Data +metadataTitle: How to Score Business Leads from Incomplete Data in Claude Code +description: Lead scoring for companies whose data is scattered across disconnected sources. Assess characteristics that predict product fit across fragmented, incomplete records. +--- + +# Score Leads from Fragmented Data + + + + +Claude Code is great at researching a single company's operations. Scoring 20 prospects on data fragmentation risk is harder. Each one needs web research into operational complexity, M&A history, and system diversity, then a consistent 0-100 score calibrated across the full set. + +Here, we get Claude Code to score B2B companies on their likelihood of needing data integration solutions. + +| Metric | Value | +| -------------- | ---------- | +| Rows processed | 20 | +| Cost | $0.08 | +| Time | 40 seconds | + +First, install the [everyrow](https://github.com/futuresearch/everyrow-sdk) plugin for Claude Code: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + +Set your API key before launching Claude Code: + +```bash +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +Download [b2b_companies.csv](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs/data/b2b_companies.csv). Tell Claude: + +``` +Score each company from 0-100 on their likelihood of suffering from data +fragmentation challenges. High scores for multi-location operations, M&A +history, disconnected systems. Low scores for single-location, cloud-native, +integrated stacks. +``` + +Claude calls everyrow's `rank` MCP tool: + +``` +Tool: everyrow_rank +├─ task: "Score each company from 0-100 on data fragmentation risk..." +├─ input_csv: "/Users/you/b2b_companies.csv" +├─ field_name: "score" +├─ field_type: "int" +└─ ascending_order: false + +→ Submitted: 20 rows for ranking. + Session: https://everyrow.io/sessions/0a3de921-5ffd-4031-91be-9a447e51b96a + +Tool: everyrow_results +→ Saved 20 rows to /Users/you/scored_leads.csv +``` + +[View the session](https://everyrow.io/sessions/0a3de921-5ffd-4031-91be-9a447e51b96a). + +| Company | Industry | Score | +|---------|----------|-------| +| QuickServe Restaurants | Food Service | 95 | +| Global Logistics Partners | Logistics | 95 | +| CityMed Physicians Group | Healthcare | 95 | +| Midwest Healthcare Network | Healthcare | 92 | +| First National Bancorp | Banking | 92 | +| ... | ... | ... | +| TechFlow Solutions | Software | 15 | +| CloudFirst Startup | Software | 10 | +| SimpleRetail Co | Retail | 10 | +| Boutique Law LLP | Legal | 10 | + +Multi-location businesses (restaurants, healthcare networks, logistics) score highest. Cloud-native single-location companies score lowest. No web research was needed since the company descriptions contained enough context. + + + + +The everyrow SDK's `rank()` scores every row using LLM reasoning. For B2B lead scoring, it evaluates operational complexity from company descriptions. + +| Metric | Value | +| -------------- | ---------- | +| Rows processed | 20 | + +```bash +pip install everyrow +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +```python +import asyncio +import pandas as pd +from everyrow import create_session +from everyrow.ops import rank + +companies_df = pd.read_csv("b2b_companies.csv") + +async def main(): + async with create_session(name="Data Fragmentation Lead Scoring") as session: + result = await rank( + session=session, + task=""" + Score each company from 0-100 on data fragmentation risk. + High (70-100): Multiple locations, M&A history, disconnected systems. + Low (0-30): Single location, cloud-native, integrated stack. + """, + input=companies_df, + field_name="score", + ) + return result.data.sort_values("score", ascending=False) + +results = asyncio.run(main()) +``` + +Top scores: QuickServe Restaurants (95), Global Logistics Partners (95), TransGlobal Shipping (92). Bottom scores: CloudFirst Startup (5), SimpleRetail Co (10), Boutique Law LLP (10). Food service, logistics, hospitality, and healthcare companies score highest due to multi-location operations and disconnected systems. + + + diff --git a/docs/case_studies/score-leads-without-crm-history/content.mdx b/docs/case_studies/score-leads-without-crm-history/content.mdx new file mode 100644 index 00000000..54c955e7 --- /dev/null +++ b/docs/case_studies/score-leads-without-crm-history/content.mdx @@ -0,0 +1,124 @@ +--- +title: Score Cold Leads via Web Research +metadataTitle: Using Claude Code to Research and Score Leads from Scratch +description: Cold lead qualification powered by live web research. Score prospects based on publicly available information about their strategy, team, and market position. +--- + +# Score Cold Leads via Web Research + + + + +Ask Claude Code to evaluate whether a hedge fund buys research tools and it will investigate the firm's strategy and team structure. Doing that for 15 firms, each with unique investment approaches ranging from pure quant to fundamental research, needs per-firm web research running in parallel. + +Here, we get Claude Code to rank investment firms using web research to assess each firm's research intensity. + +| Metric | Value | +| -------------- | ----------- | +| Rows processed | 15 | +| Cost | $0.30 | +| Time | 149 seconds | + +First, install the [everyrow](https://github.com/futuresearch/everyrow-sdk) plugin for Claude Code: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + +Set your API key before launching Claude Code: + +```bash +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +Download [investment_firms.csv](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs/data/investment_firms.csv). Tell Claude: + +``` +Score each investment firm from 0-100 on their likelihood to purchase +third-party research tools. High scores for fundamental/activist/short-sellers. +Low scores for passive index funds and pure quant. +``` + +Claude calls everyrow's `rank` MCP tool. Each agent researches the firm's actual strategy: + +``` +Tool: everyrow_rank +├─ task: "Score each investment firm on likelihood to purchase research tools..." +├─ input_csv: "/Users/you/investment_firms.csv" +├─ field_name: "score" +├─ field_type: "int" +└─ ascending_order: false + +→ Submitted: 15 rows for ranking. + Session: https://everyrow.io/sessions/f759d5fb-822d-4bb0-b978-85b36909b919 + +... + +Tool: everyrow_results +→ Saved 15 rows to /Users/you/scored_firms.csv +``` + +[View the session](https://everyrow.io/sessions/f759d5fb-822d-4bb0-b978-85b36909b919). + +| Firm | Score | Strategy | +|------|-------|----------| +| Muddy Waters Research | 95 | Short-seller, research-driven | +| ValueAct Capital | 95 | Activist, deep research | +| Elliott Management | 95 | Activist, multi-strategy | +| Baupost Group | 92 | Value, fundamental research | +| Third Point | 92 | Activist, event-driven | +| Lone Pine Capital | 90 | Long/short equity | +| Pershing Square | 90 | Concentrated activist | +| ... | ... | ... | +| AQR Capital | 20 | Systematic/quant | +| Two Sigma | 15 | Quantitative | +| Renaissance Technologies | 20 | Pure quant | +| Bridgewater Associates | 15 | Systematic macro | +| Vanguard Index Funds | 0 | Passive index | + +Activist and fundamental research firms score highest. Pure quant and passive index funds score lowest. The web research verified each firm's actual strategy and team composition. + + + + +The everyrow SDK's `rank()` performs web research on each row to score firms by a criterion that requires external knowledge. + +| Metric | Value | +| -------------- | ----------- | +| Rows processed | 15 | + +```bash +pip install everyrow +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +```python +import asyncio +import pandas as pd +from everyrow import create_session +from everyrow.ops import rank + +firms_df = pd.read_csv("investment_firms.csv") + +async def main(): + async with create_session(name="Research Tool Adoption Scoring") as session: + result = await rank( + session=session, + task=""" + Score each investment firm from 0-100 on likelihood to purchase + third-party research tools. High for fundamental/activist/short-sellers. + Low for passive index funds and pure quant. + """, + input=firms_df, + field_name="score", + ) + return result.data + +results = asyncio.run(main()) +``` + +9 firms scored 70+ (high priority), 6 scored below 40 (low priority). Muddy Waters Research (98) and Pershing Square (95) top the list. Vanguard Index Funds (10) and Renaissance Technologies (15) are at the bottom. + + + diff --git a/docs/case_studies/screen-job-postings-by-criteria/content.mdx b/docs/case_studies/screen-job-postings-by-criteria/content.mdx new file mode 100644 index 00000000..937f2112 --- /dev/null +++ b/docs/case_studies/screen-job-postings-by-criteria/content.mdx @@ -0,0 +1,132 @@ +--- +title: Screen Job Listings +metadataTitle: Using Claude Code to Intelligently Filter Job Listings +description: Intelligent job filtering that reads and understands full postings. Screen for seniority, remote flexibility, and compensation transparency across large job boards. +--- + +# Screen Job Listings + + + + +Claude Code can read a job posting and tell you if it's remote-friendly. But what if you need to screen many postings against three criteria simultaneously, with structured yes/no output for each? + +Here, we get Claude Code to screen 15 job postings for roles that are remote-friendly, senior-level, and have disclosed salary. + +| Metric | Value | +| -------------- | ---------- | +| Rows processed | 15 | +| Rows passing | 7 (46.7%) | +| Total cost | $0.03 | +| Time | 57 seconds | + +First, install the [everyrow](https://github.com/futuresearch/everyrow-sdk) plugin for Claude Code: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + +Set your API key before launching Claude Code: + +```bash +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +Download [job_postings.csv](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs/data/job_postings.csv). Tell Claude: + +``` +Screen each job posting to find roles that meet ALL THREE criteria: +1. Remote-friendly: explicitly allows remote, hybrid, or distributed work +2. Senior-level: title includes Senior/Staff/Lead/Principal, or requires 5+ years +3. Salary disclosed: specific compensation figures, not "competitive" or "DOE" +``` + +Claude calls everyrow's `screen` MCP tool: + +``` +Tool: everyrow_screen +├─ task: "Screen each job posting to determine if it meets ALL THREE criteria..." +├─ input_csv: "/Users/you/job_postings.csv" +└─ response_schema: null + +→ Submitted: 15 rows for screening. + Session: https://everyrow.io/sessions/17d3075d-1788-4fba-8c46-a20f169976ec + Task ID: ab78... + +Tool: everyrow_progress +→ Completed: 15/15 (0 failed) in 57s. + +Tool: everyrow_results +├─ task_id: "ab78..." +├─ output_path: "/Users/you/qualified_jobs.csv" +→ Saved 7 rows to /Users/you/qualified_jobs.csv +``` + +7 of 15 postings qualified. [View the session](https://everyrow.io/sessions/17d3075d-1788-4fba-8c46-a20f169976ec). + +| Company | Title | Why It Passed | +|---------|-------|---------------| +| TechCorp | Senior Backend Engineer | Remote (US), 7+ years, $180k-220k | +| DataDriven Inc | Staff Data Scientist | Hybrid (NYC), Staff title, $200k-250k | +| RemoteFirst Co | Lead Frontend Engineer | 100% Remote, Lead title, $160k-190k | +| FinTech Pro | Senior Security Engineer | Remote (EU), Senior title, disclosed salary | +| HealthTech | Senior Product Manager | Distributed team, Senior title, salary range | +| MegaCorp | Staff SRE | Hybrid (Seattle), Staff title, $190k-240k | +| EdTech Plus | Senior iOS Developer | Remote first, Senior title, $140k-175k | + +All 15 rows were decisive on the first pass (7 yes, 8 no, 0 borderline). Traditional keyword matching achieves ~68% precision on these criteria; semantic screening exceeds 90%. + + + + +The everyrow SDK screens job postings with LLM-powered evaluation, returning structured results with per-criterion breakdowns. + +| Metric | Value | +| -------------- | ---------- | +| Rows processed | 15 | +| Rows passing | 7 (46.7%) | + +```bash +pip install everyrow +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +```python +import asyncio +import pandas as pd +from pydantic import BaseModel, Field +from everyrow import create_session +from everyrow.ops import screen + +job_postings = pd.read_csv("job_postings.csv") + +class JobScreeningResult(BaseModel): + passes: bool = Field(description="Whether the job meets ALL three criteria") + is_remote_friendly: bool = Field(description="Allows remote/hybrid/distributed work") + is_senior_level: bool = Field(description="Senior/Staff/Lead/Principal or 5+ years") + has_salary_disclosed: bool = Field(description="Specific salary figures provided") + reasoning: str = Field(description="Brief explanation") + +async def main(): + async with create_session(name="Job Posting Screening") as session: + result = await screen( + session=session, + task=""" + Screen job postings for ALL THREE criteria: + 1. Remote-friendly: explicitly allows remote/hybrid/distributed work + 2. Senior-level: title includes Senior/Staff/Lead/Principal or 5+ years + 3. Salary disclosed: specific compensation figures, not "competitive" + """, + input=job_postings, + response_model=JobScreeningResult, + ) + return result.data + +results = asyncio.run(main()) +``` + +7 of 15 postings passed all three criteria. The structured output includes individual boolean fields for each criterion plus reasoning, making it easy to analyze which criteria are most commonly failed. + + + diff --git a/docs/case_studies/screen-stocks-by-investment-thesis/content.mdx b/docs/case_studies/screen-stocks-by-investment-thesis/content.mdx new file mode 100644 index 00000000..3b049a52 --- /dev/null +++ b/docs/case_studies/screen-stocks-by-investment-thesis/content.mdx @@ -0,0 +1,156 @@ +--- +title: Screen Stocks by Investment Thesis +metadataTitle: How We Use Claude Code as a Qualitative Stock Screener +description: Qualitative stock screening across hundreds of tickers. Apply investment theses that require judgment about business models, revenue mix, and market positioning. +--- + +# Screen Stocks by Investment Thesis + + + + +Ask Claude Code to evaluate whether Apple has >75% recurring revenue and benefits from US-China tensions, and it will research both questions and give you a thorough answer. Applying that same depth of analysis to all 502 S&P 500 companies needs 502 independent research tasks running in parallel. + +Here, we get Claude Code to screen the S&P 500 for companies with >75% recurring revenue that would also benefit from escalating US-China tensions over Taiwan. + +| Metric | Value | +| -------------- | ------------ | +| Rows processed | 502 | +| Rows passing | 63 (12.5%) | +| Total cost | $17.15 | +| Time | 15.5 minutes | + +First, install the [everyrow](https://github.com/futuresearch/everyrow-sdk) plugin for Claude Code: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + +Set your API key before launching Claude Code: + +```bash +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +With your S&P 500 CSV in the working directory, tell Claude: + +``` +Screen this S&P 500 dataset to find companies with high-quality recurring revenue +business models that would also benefit from escalating US-China tensions over Taiwan. + +Recurring revenue >75%: Subscription services, long-term contracts, maintenance +agreements, royalty streams. Not one-time product sales or project-based work. + +Taiwan tensions beneficiary: CHIPS Act beneficiaries, defense contractors, +cybersecurity, reshoring plays, alternative supply chain providers. Exclude +companies dependent on Taiwan manufacturing or with significant China revenue at risk. +``` + +Claude calls everyrow's `screen` MCP tool, which runs a two-pass pipeline: a fast first pass triages all rows, then a careful second pass re-evaluates borderline cases: + +``` +Tool: everyrow_screen +├─ task: "Find companies with high-quality recurring revenue business models..." +├─ input_csv: "/Users/you/sp500.csv" +└─ response_schema: null + +→ Submitted: 502 rows for screening. + Session: https://everyrow.io/sessions/374a9a36-55e7-4b7c-92e8-6d396b40071b + Task ID: fc3d... + +Tool: everyrow_progress +├─ task_id: "fc3d..." +→ Running: 0/502 complete, 502 running (30s elapsed) + +... + +Tool: everyrow_progress +→ Completed: 502/502 (0 failed) in 930s. + +Tool: everyrow_results +├─ task_id: "fc3d..." +├─ output_path: "/Users/you/thesis_screen.csv" +→ Saved 63 rows to /Users/you/thesis_screen.csv +``` + +63 of 502 companies passed (12.5%). [View the session](https://everyrow.io/sessions/374a9a36-55e7-4b7c-92e8-6d396b40071b). + +| Sector | Passing | +|--------|---------| +| Information Technology | 13 | +| Utilities | 13 | +| Financials | 12 | +| Industrials | 11 | + +Sample passing companies: + +| Ticker | Company | Sector | +|--------|---------|--------| +| LLY | Eli Lilly | Health Care | +| PLTR | Palantir Technologies | Information Technology | +| NOW | ServiceNow | Information Technology | +| PANW | Palo Alto Networks | Information Technology | +| CRWD | CrowdStrike | Information Technology | +| GD | General Dynamics | Industrials | +| NOC | Northrop Grumman | Industrials | + +Each result includes research explaining the decision. For ServiceNow: "97% of revenue from subscriptions. Critical enabler of cybersecurity and digital transformation for US federal government infrastructure." + + + + +The everyrow SDK screens every row with LLM-powered web research agents, handling the batching and parallelism in a single function call. + +Here, we screen the S&P 500 for companies with >75% recurring revenue that benefit from US-China tensions over Taiwan. + +| Metric | Value | +| -------------- | ------------ | +| Rows processed | 502 | +| Rows passing | 63 (12.5%) | + +```bash +pip install everyrow +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +```python +import asyncio +import pandas as pd +from pydantic import BaseModel, Field +from everyrow import create_session +from everyrow.ops import screen + +stocks = pd.read_csv("sp500.csv") + +class ScreenResult(BaseModel): + passes: bool = Field( + description="True if company has >75% recurring revenue AND is a Taiwan tensions beneficiary" + ) + +async def main(): + async with create_session(name="Stock Screening: Investment Thesis") as session: + result = await screen( + session=session, + task=""" + Find companies with high-quality recurring revenue business models + that would also benefit from escalating US-China tensions over Taiwan. + + Recurring revenue >75%: Subscription services, long-term contracts, + maintenance agreements, royalty streams. + + Taiwan tensions beneficiary: CHIPS Act beneficiaries, defense contractors, + cybersecurity, reshoring plays, alternative supply chain providers. + """, + input=stocks, + response_model=ScreenResult, + ) + return result.data + +results = asyncio.run(main()) +``` + +63 companies passed. Sector breakdown: Information Technology (13), Utilities (13), Financials (12), Industrials (11). The screen identified cybersecurity firms (CrowdStrike, Palo Alto Networks), defense contractors (General Dynamics, Northrop Grumman), and infrastructure companies with high recurring revenue. + + + diff --git a/docs/case_studies/screen-stocks-by-margin-sensitivity/content.mdx b/docs/case_studies/screen-stocks-by-margin-sensitivity/content.mdx new file mode 100644 index 00000000..d6f21e6a --- /dev/null +++ b/docs/case_studies/screen-stocks-by-margin-sensitivity/content.mdx @@ -0,0 +1,148 @@ +--- +title: Screen Stocks by Economic Sensitivity +metadataTitle: Can Claude Code Screen a Financial Dataset for Multi-Factor Sensitivity? +description: Multi-factor sensitivity screening across a stock universe. Evaluate economic exposure, hedging strategy, and historical margin behavior per company. +--- + +# Screen Stocks by Economic Sensitivity + + + + +Claude Code is great at researching one company's oil price sensitivity. It can find margin history, check hedging disclosures, and assess energy exposure. Doing that same five-factor analysis for 502 companies requires more parallel web research than a single session can support. + +Here, we get Claude Code to screen the S&P 500 for companies whose margins compress when oil prices rise. + +| Metric | Value | +| -------------- | ------------ | +| Rows processed | 502 | +| Rows passing | 150 (29.9%) | +| Total cost | $17.22 | +| Time | 17.5 minutes | + +First, install the [everyrow](https://github.com/futuresearch/everyrow-sdk) plugin for Claude Code: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + +Set your API key before launching Claude Code: + +```bash +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +With your S&P 500 CSV in the working directory, tell Claude: + +``` +Screen this S&P 500 dataset to find companies whose profit margins fall when +oil prices go up. Consider energy-intensive operations, transportation dependence, +consumer discretionary sensitivity, and historical correlation with oil price spikes. +Exclude energy companies and those with strong pricing power. +``` + +Claude calls everyrow's `screen` MCP tool: + +``` +Tool: everyrow_screen +├─ task: "Find large cap companies whose profit margins fall when oil prices go up..." +├─ input_csv: "/Users/you/sp500.csv" +└─ response_schema: null + +→ Submitted: 502 rows for screening. + Session: https://everyrow.io/sessions/c460711d-cb8c-44e2-aac7-b5d6bae2e02b + Task ID: a8c9... + +Tool: everyrow_progress +→ Running: 0/502 complete (30s elapsed) + +... + +Tool: everyrow_progress +→ Completed: 502/502 (0 failed) in 1048s. + +Tool: everyrow_results +├─ task_id: "a8c9..." +├─ output_path: "/Users/you/oil_sensitive_stocks.csv" +→ Saved 150 rows to /Users/you/oil_sensitive_stocks.csv +``` + +150 of 502 companies are margin-sensitive to oil prices. [View the session](https://everyrow.io/sessions/c460711d-cb8c-44e2-aac7-b5d6bae2e02b). + +| Sector | Passing | +|--------|---------| +| Consumer Discretionary | 39 | +| Industrials | 39 | +| Consumer Staples | 28 | +| Materials | 18 | +| Health Care | 11 | + +Sample passing companies: + +| Ticker | Company | Sector | +|--------|---------|--------| +| AMZN | Amazon | Consumer Discretionary | +| WMT | Walmart | Consumer Staples | +| UNP | Union Pacific | Industrials | +| DAL | Delta Air Lines | Industrials | +| FDX | FedEx | Industrials | +| NUE | Nucor | Materials | + +For Amazon: "Retail business is highly transportation-dependent, with billions in shipping and fulfillment costs. Analysts note profits can be 'hammered' by higher gas and diesel prices." + + + + +The everyrow SDK screens every row with LLM-powered web research agents to assess margin sensitivity to oil prices. + +| Metric | Value | +| -------------- | ------------ | +| Rows processed | 502 | +| Rows passing | 150 (29.9%) | + +```bash +pip install everyrow +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +```python +import asyncio +import pandas as pd +from pydantic import BaseModel, Field +from everyrow import create_session +from everyrow.ops import screen + +stocks = pd.read_csv("sp500.csv") + +class ScreenResult(BaseModel): + passes: bool = Field( + description="True if company's margins fall when oil prices go up" + ) + +async def main(): + async with create_session(name="Stock Screening: Oil Margin Sensitivity") as session: + result = await screen( + session=session, + task=""" + Find large cap companies whose profit margins fall when oil prices go up. + + Criteria: High energy costs, transportation dependence, consumer + discretionary sensitivity, energy-intensive manufacturing, historical + correlation with oil price spikes. + + Exclude: Energy companies, strong pricing power, minimal energy exposure, + effective energy hedging. + """, + input=stocks, + response_model=ScreenResult, + ) + return result.data + +results = asyncio.run(main()) +``` + +150 companies identified. The affected sectors are Consumer Discretionary (39), Industrials (39), Consumer Staples (28), and Materials (18). Airlines, retailers with heavy logistics, and energy-intensive manufacturers dominate the results. + + + diff --git a/docs/case_studies/understanding-costs-and-speed-for-merge/content.mdx b/docs/case_studies/understanding-costs-and-speed-for-merge/content.mdx new file mode 100644 index 00000000..53934b24 --- /dev/null +++ b/docs/case_studies/understanding-costs-and-speed-for-merge/content.mdx @@ -0,0 +1,142 @@ +--- +title: Merge Costs and Speed +metadataTitle: How Much Does Intelligent Data Merging Cost with Claude Code? +description: Empirical cost and speed analysis across five merge strategies. Transparent pricing from free exact matches through paid LLM and web search tiers. +--- + +# Merge Costs and Speed + + + + +Claude Code is great at merging two tables. But how much does it cost, and what determines the price? The answer depends on how hard each match is: exact and fuzzy matches are free, and only semantic matches that require LLM reasoning incur costs. + +Here, we run 5 merge experiments to empirically measure the cost cascade across increasing match difficulty. + +| Metric | Value | +| -------------- | ----------- | +| Total merges | 5 | +| Total cost | $0.06 | +| Total time | 2.1 minutes | + +First, install the [everyrow](https://github.com/futuresearch/everyrow-sdk) plugin for Claude Code: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + +Set your API key before launching Claude Code: + +```bash +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +Tell Claude to run each experiment with inline-generated data: + +``` +Create a test dataset of 10 companies with exact names, then merge them. +Then create a version with typos and merge again. Then test semantic +matching (Instagram to Meta, YouTube to Alphabet). Then test pharma +subsidiaries (Genentech to Roche, MSD to Merck). Show costs for each. +``` + +Results across all 5 experiments: + +``` +Tool: everyrow_merge (Experiment 1: Exact matches) +→ 10/10 matched, 6s, $0.00 + +Tool: everyrow_merge (Experiment 2: Fuzzy/typo matches) +→ 10/10 matched, 13s, $0.00 + +Tool: everyrow_merge (Experiment 3: Semantic matches) +→ 10/10 matched, 62s, $0.05 + +Tool: everyrow_merge (Experiment 4: Pharma subsidiaries) +→ 13/13 matched, 38s, $0.01 + +Tool: everyrow_merge (Experiment 5: Email domain matching) +→ 5/5 matched, 9s, $0.00 +``` + +| Experiment | Match Type | Cost | Accuracy | +|-----------|-----------|------|----------| +| Exact strings | Exact only | $0.00 | 100% | +| Typos/case | Exact + Fuzzy | $0.00 | 100% | +| Semantic (Instagram→Meta) | Exact + LLM | $0.05 | 100% | +| Pharma (Genentech→Roche) | Exact + Fuzzy + LLM | $0.01 | 100% | +| Email domains | LLM (domain) | $0.00 | 100% | + +The cascade strategy: + +| Strategy | Cost | Example | +|----------|------|---------| +| Exact match | Free | "Apple Inc" to "Apple Inc" | +| Fuzzy match | Free | "Microsft" to "Microsoft" | +| LLM reasoning | ~$0.002/row | "Instagram" to "Meta Platforms" | +| Web search | ~$0.01/row | Obscure or stale data | + + + + +The everyrow SDK implements a cost-optimized merge cascade. This notebook empirically measures the cost of each matching strategy across 5 experiments. + +| Metric | Value | +| -------------- | ----------- | +| Total merges | 5 | +| Total cost | $0.03 | + +```bash +pip install everyrow +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +```python +import asyncio +import pandas as pd +from everyrow import create_session, get_billing_balance +from everyrow.ops import merge + +async def measure_merge(name, task, left_table, right_table, **kwargs): + balance_before = await get_billing_balance() + async with create_session(name=name) as session: + result = await merge( + task=task, + session=session, + left_table=left_table, + right_table=right_table, + **kwargs, + ) + balance_after = await get_billing_balance() + cost = balance_before.current_balance_dollars - balance_after.current_balance_dollars + return result.data, cost + +# Exact matches: $0.00 +result, cost = await measure_merge( + "Exact matches only", + "Match companies by name.", + companies_exact, revenue_exact, + merge_on_left="company", merge_on_right="company_name", +) + +# Semantic matches: ~$0.03 +result, cost = await measure_merge( + "Semantic matches", + "Match companies. Instagram and WhatsApp are owned by Meta.", + companies_semantic, revenue_exact, + merge_on_left="company", merge_on_right="company_name", +) +``` + +| Experiment | Cost | Accuracy | +|-----------|------|----------| +| Exact matches | $0.00 | 100% | +| Fuzzy (typos) | $0.00 | 100% | +| Semantic | $0.03 | 100% | +| Pharma | $0.00 | 61.5% | + +Key finding: exact and fuzzy matches are free. Only rows requiring LLM reasoning incur costs (~$0.002/row for semantic, ~$0.01/row for web search). Providing `merge_on` hints reduces costs by helping the cascade skip LLM reasoning for more rows. + + + diff --git a/docs/classify-dataframe-rows-llm.md b/docs/classify-dataframe-rows-llm.md deleted file mode 100644 index 0fa7e0e6..00000000 --- a/docs/classify-dataframe-rows-llm.md +++ /dev/null @@ -1,135 +0,0 @@ ---- -title: How to Classify and Label Data with an LLM in Python -description: Create and add to ML training datasets using web research agents at scale. The everyrow SDK handles batching, parallelism, structured output, and retries automatically. ---- - -# How to Classify DataFrame Rows with an LLM - -Labeling data with an LLM at scale requires orchestration and can get very expensive. EveryRow can classify each row of a dataframe using LLMs or LLM web agents at low cost, by handling the batching, parallelism, task queues, error handling, and consistency, in a single function call. - -We run [evals](https://evals.futuresearch.ai/) to find the pareto frontier for classification tasks, getting you the most accuracy for your dollar. - -Here, we classify 200 job postings into 9 categories in 2 minutes for $1.74. - -| Metric | Value | -| ------------ | ----------- | -| Rows | 200 | -| Time | 2.1 minutes | -| Cost | $1.74 | -| Cost per row | $0.009 | - -[View the session](https://everyrow.io/sessions/f852c537-1724-44bb-8979-84434ecb2dfe) - -If you're categorizing support tickets, labeling training data, or tagging content by topic, string heuristic or embedding techniques are low accuracy, but training a model is a very high lift. LLMs make it possible to solve this efficiently. - -## Walkthrough - -The `agent_map` function processes each row in parallel with structured output via Pydantic models. You define the schema, describe the task, and get back a DataFrame with your new columns. Download [hn_jobs.csv](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs/data/hn_jobs.csv) to follow along. - -```bash -pip install everyrow -export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io -``` - -```python -import asyncio -from typing import Literal - -import pandas as pd -from pydantic import BaseModel, Field - -from everyrow.ops import agent_map - - -class JobClassification(BaseModel): - category: Literal[ - "backend", "frontend", "fullstack", "data", - "ml_ai", "devops_sre", "mobile", "security", "other" - ] = Field(description="Primary role category") - reasoning: str = Field(description="Why this category was chosen") - - -async def main(): - jobs = pd.read_csv("hn_jobs.csv") - - result = await agent_map( - task="""Classify this job posting by primary role: - - backend: Server-side, API development - - frontend: UI, web development - - fullstack: Both frontend and backend - - data: Data engineering, pipelines, analytics - - ml_ai: Machine learning, AI, deep learning - - devops_sre: Infrastructure, platform engineering - - mobile: iOS, Android development - - security: Security engineering - - other: Product, design, management, etc. - """, - input=jobs, - response_model=JobClassification, - ) - - print(result.data[["id", "category", "reasoning"]]) - - -asyncio.run(main()) -``` - -The output DataFrame includes your original columns plus `category` and `reasoning`: - -``` - id category reasoning -0 46469380 fullstack Role spans React frontend and Django backend... -1 46134153 fullstack Title is "Fullstack Engineer (with DevOps focus)"... -2 46113062 backend Company builds API platform tooling... -3 46467458 ml_ai First role listed is ML Engineer... -4 46466466 other Primary role is Founding Product Manager... -``` - -## Constraining output values - -Use Python's `Literal` type to restrict classifications to specific values: - -```python -category: Literal["positive", "negative", "neutral"] -``` - -The LLM is constrained to only return values from this set. No post-processing or validation needed. - -## Adding confidence scores - -Extend the response model to capture uncertainty: - -```python -class Classification(BaseModel): - category: Literal["spam", "ham"] = Field(description="Email classification") - confidence: Literal["high", "medium", "low"] = Field( - description="How confident the classification is" - ) - signals: str = Field(description="Key signals that drove the decision") -``` - -## Multi-label classification - -For cases where multiple labels can apply, use a list: - -```python -class MultiLabel(BaseModel): - tags: list[str] = Field(description="All applicable tags for this item") - primary_tag: str = Field(description="The most relevant tag") -``` - -## Adding in web research agents - -Choosing the right LLM, and handling the batching, parallelism, and retries is not that hard when there is no web search. But when you want to use the web as part of your classification, e.g. looking at the wikipedia page for entities, cost and complexity can spiral. - -EveryRow supports this natively. And we tune our web research to be as efficient as possible, classifying rows for as little as $0.05/row, though it can cost more if the research is more involved. - -And without web research agents, as in the example at the top, we can classify data for ~$0.009 per row, or 10,000 rows for ~$90. The exact cost depends on input length and the complexity of your response model. Short inputs with simple schemas cost less; long documents with detailed reasoning cost more. - -| Rows | Estimated Cost | Estimated Time | -| ------ | -------------- | -------------- | -| 100 | ~$1 | ~1 min | -| 1,000 | ~$9 | ~5 min | -| 10,000 | ~$90 | ~30 min | - -You can visualize the results at the output URL and see latency and cost numbers. The first $20 of processing is free with no credit card required. diff --git a/docs/classify-dataframe-rows-llm.mdx b/docs/classify-dataframe-rows-llm.mdx new file mode 100644 index 00000000..9f856dfd --- /dev/null +++ b/docs/classify-dataframe-rows-llm.mdx @@ -0,0 +1,193 @@ +--- +title: Classify and Label Rows +metadataTitle: How to Classify and Label Thousands of Rows in Claude Code +description: Multi-class labeling with LLM-powered classification. Structured output, automatic batching, and consistent labels across thousands of rows. +--- + +# Classify and Label Rows + + + + +Claude Code's text processing works for classifying a few dozen items in a single prompt. When you have 200 rows that each need consistent, structured labels across 9 categories, you need batched parallel evaluation with a fixed schema. + +Here, we get Claude Code to classify 200 job postings into 9 role categories in under a minute for $1.53. + +| Metric | Value | +| ------------ | ----------- | +| Rows | 200 | +| Time | 1.0 minutes | +| Cost | $1.53 | +| Cost per row | $0.008 | + +First, install the [everyrow](https://github.com/futuresearch/everyrow-sdk) plugin for Claude Code: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + +Set your API key before launching Claude Code: + +```bash +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +Download the dataset: [hn_jobs.csv](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs/data/hn_jobs.csv) (3,616 Hacker News "Who's Hiring" posts). With the CSV in your working directory, tell Claude: + +``` +Classify each row in hn_jobs.csv by primary engineering role. Use these categories: +backend, frontend, fullstack, data, ml_ai, devops_sre, mobile, security, other. + +For each row, return the category and a short reasoning. +``` + +Claude calls everyrow's `agent` MCP tool with your schema, then polls for progress: + +``` +Tool: everyrow_agent +├─ task: "Classify this job posting by primary role: backend, frontend, fullstack..." +├─ input_csv: "/Users/you/hn_jobs.csv" +└─ response_schema: {"category": "enum", "reasoning": "string"} + +→ Submitted: 200 rows for processing. + Session: https://everyrow.io/sessions/4d82663b-c681-4b12-a48a-17533f162b51 + Task ID: 4d82... + +Tool: everyrow_progress +├─ task_id: "4d82..." +→ Running: 0/200 complete, 200 running (10s elapsed) + +Tool: everyrow_progress +→ Running: 100/200 complete, 100 running (30s elapsed) + +... + +Tool: everyrow_progress +→ Completed: 200/200 (0 failed) in 59s. + +Tool: everyrow_results +├─ task_id: "4d82..." +├─ output_path: "/Users/you/classified_jobs.csv" +→ Saved 200 rows to /Users/you/classified_jobs.csv +``` + +All 200 rows classified in 59 seconds. [View the session](https://everyrow.io/sessions/4d82663b-c681-4b12-a48a-17533f162b51). + +| Category | Count | % | +|------------|-------|-------| +| fullstack | 88 | 44.0% | +| other | 32 | 16.0% | +| backend | 21 | 10.5% | +| ml_ai | 20 | 10.0% | +| data | 13 | 6.5% | +| mobile | 9 | 4.5% | +| devops_sre | 8 | 4.0% | +| frontend | 6 | 3.0% | +| security | 3 | 1.5% | + +Fullstack dominates (44%) because Hacker News "Who's Hiring" posts are predominantly from startups hiring generalist engineers. + + + + +The everyrow Python SDK orchestrates thousands of parallel LLM evaluations in a single function call, for classification tasks that require judgment on every row. + +Here, we classify 200 job postings into 9 categories in 2 minutes for $1.74. + +| Metric | Value | +| ------------ | ----------- | +| Rows | 200 | +| Time | 2.1 minutes | +| Cost | $1.74 | +| Cost per row | $0.009 | + +[View the session](https://everyrow.io/sessions/f852c537-1724-44bb-8979-84434ecb2dfe) + +If you're categorizing support tickets, labeling training data, or tagging content by topic, string heuristic or embedding techniques are low accuracy, but training a model is a very high lift. LLMs make it possible to solve this efficiently. + +```bash +pip install everyrow +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io +``` + +The `agent_map` function processes each row in parallel with structured output via Pydantic models. You define the schema, describe the task, and get back a DataFrame with your new columns. Download [hn_jobs.csv](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs/data/hn_jobs.csv) to follow along. + +```python +import asyncio +from typing import Literal + +import pandas as pd +from pydantic import BaseModel, Field + +from everyrow.ops import agent_map + + +class JobClassification(BaseModel): + category: Literal[ + "backend", "frontend", "fullstack", "data", + "ml_ai", "devops_sre", "mobile", "security", "other" + ] = Field(description="Primary role category") + reasoning: str = Field(description="Why this category was chosen") + + +async def main(): + jobs = pd.read_csv("hn_jobs.csv") + + result = await agent_map( + task="""Classify this job posting by primary role: + - backend: Server-side, API development + - frontend: UI, web development + - fullstack: Both frontend and backend + - data: Data engineering, pipelines, analytics + - ml_ai: Machine learning, AI, deep learning + - devops_sre: Infrastructure, platform engineering + - mobile: iOS, Android development + - security: Security engineering + - other: Product, design, management, etc. + """, + input=jobs, + response_model=JobClassification, + ) + + print(result.data[["id", "category", "reasoning"]]) + + +asyncio.run(main()) +``` + +The output DataFrame includes your original columns plus `category` and `reasoning`: + +``` + id category reasoning +0 46469380 fullstack Role spans React frontend and Django backend... +1 46134153 fullstack Title is "Fullstack Engineer (with DevOps focus)"... +2 46113062 backend Company builds API platform tooling... +3 46467458 ml_ai First role listed is ML Engineer... +4 46466466 other Primary role is Founding Product Manager... +``` + +Use Python's `Literal` type to restrict classifications to specific values. The LLM is constrained to only return values from this set. No post-processing or validation needed. + +For cases where multiple labels can apply, use a list: + +```python +class MultiLabel(BaseModel): + tags: list[str] = Field(description="All applicable tags for this item") + primary_tag: str = Field(description="The most relevant tag") +``` + +Without web research agents, everyrow can classify data for ~$0.009 per row, or 10,000 rows for ~$90. The exact cost depends on input length and the complexity of your response model. + +| Rows | Estimated Cost | Estimated Time | +| ------ | -------------- | -------------- | +| 100 | ~$1 | ~1 min | +| 1,000 | ~$9 | ~5 min | +| 10,000 | ~$90 | ~30 min | + + + + +--- + +Built with [everyrow](https://github.com/futuresearch/everyrow-sdk). See the [agent_map documentation](reference/AGENT_MAP) for more options including response models and effort levels. diff --git a/docs/deduplicate-training-data-ml.md b/docs/deduplicate-training-data-ml.md deleted file mode 100644 index 977e8b9e..00000000 --- a/docs/deduplicate-training-data-ml.md +++ /dev/null @@ -1,101 +0,0 @@ ---- -title: Remove Duplicates from ML Training Data in Python -description: Find and remove semantic duplicates from ML training datasets to prevent data leakage and overfitting. Use LLMs and LLM Research Agents to get maximum accuracy. ---- - -# How to Deduplicate Training Data in Python - -Near-duplicates in ML training data cause data leakage, overfitting, and memorization. This guide shows how to find and remove semantically similar examples that aren't exact matches—paraphrases, reformatted text, or records conveying the same information with different words. - -| Metric | Value | -| ------------------ | ------------------------------------------------------------------------- | -| Input rows | 3,000 | -| Unique after dedupe| 1,928 | -| Duplicates removed | 1,072 (35.7%) | -| Time | 5.3 minutes | -| Cost | $4.21 | -| Session | [view](https://everyrow.io/sessions/ccaa306d-ef68-499b-a684-c0b08f9bfef3) | - -Standard deduplication with `pandas.drop_duplicates()` only catches exact matches. MinHash/LSH (datasketch) works for near-exact text but not semantic similarity. Libraries like dedupe.io require labeled training data. None handle "same meaning, different words" without manual setup. - -```bash -pip install everyrow datasets -export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key -``` - -```python -import asyncio -import pandas as pd -from datasets import load_dataset -from everyrow.ops import dedupe - -# Load a dataset with potential semantic duplicates -# Using PAWS - paraphrase pairs from Wikipedia -dataset = load_dataset( - "google-research-datasets/paws", - "labeled_final", - split="train" -) - -# Extract sentences into a dataframe -sentences = [] -seen = set() -for row in dataset: - for s in [row["sentence1"], row["sentence2"]]: - if s not in seen: - seen.add(s) - sentences.append(s) - if len(sentences) >= 3000: - break - if len(sentences) >= 3000: - break - -df = pd.DataFrame({"text": sentences}) -print(f"Training examples: {len(df)}") - -async def dedupe_training_data(): - result = await dedupe( - input=df, - equivalence_relation=""" - Two sentences are duplicates if they convey the same meaning, - even if phrased differently. This includes: - - Paraphrases (same meaning, different words or word order) - - Minor grammatical variations - - Sentences about the same fact that would be redundant - - NOT duplicates if they describe different facts, even if - they share many words. - """, - ) - - # Get deduplicated dataset - clean_df = result.data[result.data["selected"] == True] - print(f"After deduplication: {len(clean_df)}") - - return clean_df - -clean_data = asyncio.run(dedupe_training_data()) -``` - -The output includes three columns added to your data: `equivalence_class_id` groups duplicates together, `equivalence_class_name` gives each cluster a readable label, and `selected` marks the canonical example to keep. Filter to `selected == True` to get your deduplicated dataset. - -Here are examples of duplicates the system found: - -``` -Cluster: "Glenn Howard's Ontario Championship win" - ✓ Glenn Howard won the Ontario Championship for the 17th time as either third or skip. - For the 17th time the Glenn Howard won the Ontario Championship as third or skip. - -Cluster: "Chananian village location" - ✓ Chananian is a village in Azad Kashmir, the Leepa Valley, Hattian Bala District, Pakistan. - Chananian is a village in Leepa Valley, Hattian Bala District of Azad Kashmir, Pakistan. - Chananian is a village in the Leepa Valley, Hattian Bala district of Azad Kashmir, Pakistan. - -Cluster: "Person's birth and death details" - ✓ David Spurlock was born on 18 November 1959 in Dallas, Texas, and moved to Memphis... - J. David Spurlock was born on November 18, 1959 in Dallas, Texas. He moved to Memphis... -``` - -These are semantic duplicates that exact-match deduplication would miss entirely. The sentences have different word order, date formats ("November 18" vs "18 November"), name variations ("David Spurlock" vs "J. David Spurlock"), and grammatical structure—but they describe the same facts and would be redundant in a training set. - -The 35.7% reduction rate is typical for datasets that weren't explicitly deduplicated during creation. For datasets scraped from the web or aggregated from multiple sources, reduction rates can be higher. The cost scales linearly—expect roughly $1.40 per 1,000 rows for text data of this complexity. diff --git a/docs/deduplicate-training-data-ml.mdx b/docs/deduplicate-training-data-ml.mdx new file mode 100644 index 00000000..030660d1 --- /dev/null +++ b/docs/deduplicate-training-data-ml.mdx @@ -0,0 +1,186 @@ +--- +title: Deduplicate Training Data +metadataTitle: How We Use Claude Code to Find Hidden Duplicates in Training Data +description: Identify paraphrases, reformatted text, and near-copies that share the same underlying content. Semantic deduplication for cleaner training sets. +--- + +# Deduplicate Training Data + + + + +Claude Code handles exact deduplication natively by writing Python to hash and compare rows. Scaling to 3,000 sentences where the duplicates are paraphrases needs an approach where each pair is evaluated for semantic equivalence. "The cat sat on the mat" and "On the mat, the cat was sitting" share no exact n-grams but mean the same thing. + +Here, we get Claude Code to deduplicate 3,000 sentences from the PAWS paraphrase dataset, finding sentences that mean the same thing even when phrased differently. + +| Metric | Value | +| ------------------ | ------------ | +| Input rows | 3,000 | +| Unique after dedupe| 2,027 | +| Duplicates removed | 973 (32.4%) | +| Time | 28.2 minutes | +| Cost | $13.27 | + +First, install the [everyrow](https://github.com/futuresearch/everyrow-sdk) plugin for Claude Code: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + +Set your API key before launching Claude Code: + +```bash +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +The dataset is 3,000 sentences extracted from the [PAWS](https://huggingface.co/datasets/google-research-datasets/paws) paraphrase dataset, where many sentence pairs convey the same fact with different word order. With the CSV in your working directory, tell Claude: + +``` +Deduplicate this dataset of sentences. Two sentences are duplicates if they +convey the same meaning, even if phrased differently. This includes paraphrases, +minor grammatical variations, and sentences about the same fact that would be +redundant in a training set. They are NOT duplicates if they describe different +facts, even if they share many words. +``` + +Claude calls everyrow's `dedupe` MCP tool with your equivalence relation: + +``` +Tool: everyrow_dedupe +├─ equivalence_relation: "Two sentences are duplicates if they convey the same meaning..." +└─ input_csv: "/Users/you/paws_sentences.csv" + +→ Submitted: 3,000 rows for deduplication. + Session: https://everyrow.io/sessions/8eac50da-f318-49cf-9f00-67b35700eb8a + Task ID: 8eac... + +Tool: everyrow_progress +├─ task_id: "8eac..." +→ Running: 0/3000 complete (30s elapsed) + +... + +Tool: everyrow_progress +→ Completed: 3000/3000 (0 failed) in 1691s. + +Tool: everyrow_results +├─ task_id: "8eac..." +├─ output_path: "/Users/you/paws_deduplicated.csv" +→ Saved 3000 rows to /Users/you/paws_deduplicated.csv +``` + +973 duplicates found and removed (32.4% reduction). [View the session](https://everyrow.io/sessions/8eac50da-f318-49cf-9f00-67b35700eb8a). + +Examples of duplicates the system found: + +| Cluster | Variants | +|---------|----------| +| Nick Smith and Duncan become friends | "Chris Egan (Nick Smith) settles down with his family..." / "Nick Smith (Chris Egan) settles in Summer Bay..." | +| WORHP software library description | "WORHP, also referred to as eNLP (European NLP Solver)..." / "WORHP, also referred to by ESA as eNLP..." | +| Baseball series in Havana | "Another series was played in Havana between Cincinnati Reds..." / "In Havana, another series was played between..." | + +These are semantic duplicates that exact-match deduplication would miss entirely. The sentences have different word order, name variations, and grammatical structure, but they describe the same facts and would be redundant in a training set. The output includes `equivalence_class_id`, `equivalence_class_name`, and `selected` columns. Filter to `selected == True` to get the deduplicated dataset. + + + + +Near-duplicates in ML training data cause data leakage, overfitting, and memorization. The everyrow SDK finds and removes semantically similar examples that aren't exact matches: paraphrases, reformatted text, or records conveying the same information with different words. + +| Metric | Value | +| ------------------ | ------------------------------------------------------------------------- | +| Input rows | 3,000 | +| Unique after dedupe| 1,928 | +| Duplicates removed | 1,072 (35.7%) | +| Time | 5.3 minutes | +| Cost | $4.21 | +| Session | [view](https://everyrow.io/sessions/ccaa306d-ef68-499b-a684-c0b08f9bfef3) | + +Standard deduplication with `pandas.drop_duplicates()` only catches exact matches. MinHash/LSH (datasketch) works for near-exact text but not semantic similarity. Libraries like dedupe.io require labeled training data. None handle "same meaning, different words" without manual setup. + +```bash +pip install everyrow datasets +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +```python +import asyncio +import pandas as pd +from datasets import load_dataset +from everyrow.ops import dedupe + +# Load a dataset with potential semantic duplicates +# Using PAWS - paraphrase pairs from Wikipedia +dataset = load_dataset( + "google-research-datasets/paws", + "labeled_final", + split="train" +) + +# Extract sentences into a dataframe +sentences = [] +seen = set() +for row in dataset: + for s in [row["sentence1"], row["sentence2"]]: + if s not in seen: + seen.add(s) + sentences.append(s) + if len(sentences) >= 3000: + break + if len(sentences) >= 3000: + break + +df = pd.DataFrame({"text": sentences}) +print(f"Training examples: {len(df)}") + +async def dedupe_training_data(): + result = await dedupe( + input=df, + equivalence_relation=""" + Two sentences are duplicates if they convey the same meaning, + even if phrased differently. This includes: + - Paraphrases (same meaning, different words or word order) + - Minor grammatical variations + - Sentences about the same fact that would be redundant + + NOT duplicates if they describe different facts, even if + they share many words. + """, + ) + + # Get deduplicated dataset + clean_df = result.data[result.data["selected"] == True] + print(f"After deduplication: {len(clean_df)}") + + return clean_df + +clean_data = asyncio.run(dedupe_training_data()) +``` + +The output includes three columns added to your data: `equivalence_class_id` groups duplicates together, `equivalence_class_name` gives each cluster a readable label, and `selected` marks the canonical example to keep. Filter to `selected == True` to get your deduplicated dataset. + +Here are examples of duplicates the system found: + +``` +Cluster: "Glenn Howard's Ontario Championship win" + ✓ Glenn Howard won the Ontario Championship for the 17th time as either third or skip. + For the 17th time the Glenn Howard won the Ontario Championship as third or skip. + +Cluster: "Chananian village location" + ✓ Chananian is a village in Azad Kashmir, the Leepa Valley, Hattian Bala District, Pakistan. + Chananian is a village in Leepa Valley, Hattian Bala District of Azad Kashmir, Pakistan. + Chananian is a village in the Leepa Valley, Hattian Bala district of Azad Kashmir, Pakistan. + +Cluster: "Person's birth and death details" + ✓ David Spurlock was born on 18 November 1959 in Dallas, Texas, and moved to Memphis... + J. David Spurlock was born on November 18, 1959 in Dallas, Texas. He moved to Memphis... +``` + +The 35.7% reduction rate is typical for datasets that weren't explicitly deduplicated during creation. The cost scales linearly: roughly $1.40 per 1,000 rows for text data of this complexity. + + + + +--- + +Built with [everyrow](https://github.com/futuresearch/everyrow-sdk). See the [dedupe documentation](reference/DEDUPE) for more options including equivalence relation design. diff --git a/docs/filter-dataframe-with-llm.md b/docs/filter-dataframe-with-llm.md deleted file mode 100644 index 54b0a240..00000000 --- a/docs/filter-dataframe-with-llm.md +++ /dev/null @@ -1,107 +0,0 @@ ---- -title: Filter a Pandas DataFrame with LLMs -description: How to screen data by criteria that require research in Python. LLM research agents will cheaply and accurately research every row to screen, filter, or qualify. ---- - -# How to Filter a DataFrame with an LLM - -Here we show how to filter a pandas dataframe by qualitative criteria, when normal filtering like df[df['column'] == value] won't work. - -LLMs, and LLM-web-agents, can evaluate qualitative criteria at high accuracy. But they can be very expensive and difficult to orchestrate at scale. We provide a low cost solution by handling the orchestration, batching, and consistency checking. - -This guide shows how to filter 3,616 job postings for "remote-friendly, senior-level roles with disclosed salary" in 10 minutes for $4.24. - -| Metric | Value | -| ------------------- | ----------- | -| Rows processed | 3,616 | -| Rows passing filter | 216 (6.0%) | -| Total cost | $4.24 | -| Time | 9.9 minutes | -| Cost per row | $0.001 | - -In this example, we want to check job postings for three criteria: - -1. Remote-friendly -2. Senior level -3. Salary is disclosed - -None of these can be done without intelligence, by, e.g. - -```python -# This matches "No remote work available" -df[df['posting'].str.contains('remote', case=False)] -``` - -What you need is a filter that understands: this posting explicitly allows remote work, requires senior experience, and states a specific salary number. - -We use a dataset of 3,616 job postings from Hacker News "Who's Hiring" threads, 10% of all posts every month since March 2020 through January 2026. Download [hn_jobs.csv](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs/data/hn_jobs.csv) to follow along. - -```bash -pip install everyrow -export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key -``` - -```python -import asyncio -import pandas as pd -from pydantic import BaseModel, Field -from everyrow.ops import screen - -jobs = pd.read_csv("hn_jobs.csv") # 3,616 job postings - -class JobScreenResult(BaseModel): - qualifies: bool = Field(description="True if meets ALL criteria") - -async def main(): - result = await screen( - task=""" - A job posting qualifies if it meets ALL THREE criteria: - - 1. Remote-friendly: Explicitly allows remote work, hybrid, WFH, - distributed teams, or "work from anywhere". - - 2. Senior-level: Title contains Senior/Staff/Lead/Principal/Architect, - OR requires 5+ years experience, OR mentions "founding engineer". - - 3. Salary disclosed: Specific compensation numbers are mentioned. - "$150K-200K" qualifies. "Competitive" or "DOE" does not. - """, - input=jobs, - response_model=JobScreenResult, - ) - - qualified = result.data - print(f"Qualified: {len(qualified)} of {len(jobs)}") - return qualified - -qualified_jobs = asyncio.run(main()) -``` - -The screen operation evaluates each row against the natural language criteria and returns only the rows that pass. Out of 3,616 postings, 216 qualified (6.0%). [View the session](https://everyrow.io/sessions/6f742040-7a17-46c3-87fd-419062e69bf2). - -Interestingly, the data reveals a clear trend in tech hiring practices over the pandemic years: - -| Year | Qualified | Total | Pass Rate | -| ---- | --------- | ----- | --------- | -| 2020 | 10 | 594 | 1.7% | -| 2021 | 27 | 1,033 | 2.6% | -| 2022 | 36 | 758 | 4.7% | -| 2023 | 39 | 412 | 9.5% | -| 2024 | 39 | 387 | 10.1% | -| 2025 | 59 | 406 | 14.5% | -| 2026 | 6 | 26 | 23.1% | - -In early 2020, only 1.7% of job postings met all three criteria. By 2025, that number reached 14.5%. More companies now offer remote work, disclose salaries upfront, and hire senior engineers. - -Some examples: - -``` -Bloomberg | Senior Software Engineer | Hybrid (NYC) | $160k - $240k USD + bonus -KoBold Metals | Senior Infrastructure Engineer | Remote (USA) | $170k - $230k -EnergyHub | Director of Engineering | Remote (US) | Salary $225k -Gladly | Staff Software Engineer | Remote (US, Colombia) | $60k–$215k + Equity -``` - ---- - -Built with [everyrow](https://github.com/futuresearch/everyrow-sdk). See the [screen documentation](reference/SCREEN) for more options including batch size tuning and async execution. diff --git a/docs/filter-dataframe-with-llm.mdx b/docs/filter-dataframe-with-llm.mdx new file mode 100644 index 00000000..394669ad --- /dev/null +++ b/docs/filter-dataframe-with-llm.mdx @@ -0,0 +1,193 @@ +--- +title: Filter a Dataset Intelligently +metadataTitle: How to Use Claude Code to Intelligently Filter a Large Dataset +description: Screen rows by nuanced, subjective rules applied in parallel across an entire dataset. Structured pass/fail results with full audit trails. +--- + +# Filter a Dataset Intelligently + + + + +Ask Claude Code to filter job postings for remote, senior roles and it will write solid Python with keyword matching. But "remote-friendly" is not always a keyword. A posting might say "team distributed across three time zones" or "occasional office visits in SF." Screening 3,616 rows with that level of judgment needs per-row LLM evaluation. + +Here, we get Claude Code to screen 3,616 job postings for "remote-friendly, senior-level roles with disclosed salary": three rules that require reading each posting. + +| Metric | Value | +| ------------------- | ------------ | +| Rows processed | 3,616 | +| Rows passing filter | 232 (6.4%) | +| Total cost | $11.02 | +| Time | 8.0 minutes | + +First, install the [everyrow](https://github.com/futuresearch/everyrow-sdk) plugin for Claude Code: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + +Set your API key before launching Claude Code: + +```bash +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +Download the dataset: [hn_jobs.csv](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs/data/hn_jobs.csv) (3,616 Hacker News "Who's Hiring" posts, March 2020 through January 2026). With the CSV in your working directory, tell Claude: + +``` +Screen hn_jobs.csv to find job postings that meet ALL THREE criteria: + +1. Remote-friendly: explicitly allows remote work, hybrid, WFH, distributed + teams, or "work from anywhere" +2. Senior-level: title contains Senior/Staff/Lead/Principal/Architect, OR + requires 5+ years experience, OR mentions "founding engineer" +3. Salary disclosed: specific compensation numbers are mentioned. + "$150K-200K" qualifies. "Competitive" or "DOE" does not. +``` + +Claude calls everyrow's `screen` MCP tool with your criteria, then polls for progress until the operation completes: + +``` +Tool: everyrow_screen +├─ task: "Find job postings that meet ALL THREE criteria: 1. Remote-friendly..." +├─ input_csv: "/Users/you/hn_jobs.csv" +└─ response_schema: null + +→ Submitted: 3,616 rows for screening. + Session: https://everyrow.io/sessions/b47f3d3d-... + Task ID: 8a2f... + +Tool: everyrow_progress +├─ task_id: "8a2f..." +→ Running: 0/3616 complete, 3616 running (18s elapsed) + +Tool: everyrow_progress +→ Running: 1204/3616 complete, 2412 running (142s elapsed) + +... + +Tool: everyrow_progress +→ Completed: 3616/3616 (0 failed) in 478s. + +Tool: everyrow_results +├─ task_id: "8a2f..." +├─ output_path: "/Users/you/qualified_jobs.csv" +→ Saved 232 rows to /Users/you/qualified_jobs.csv +``` + +Under the hood, everyrow runs a two-pass pipeline: a fast first pass triages all 3,616 rows (gemini-3-flash-preview, 12.7M tokens, $10.89), then a careful second pass re-evaluates the borderline cases with a stronger model (claude-sonnet-4, 224K tokens, $0.13). + +232 of 3,616 postings passed (6.4%). [View the session](https://everyrow.io/sessions/b47f3d3d-ca3d-4dc7-9994-e14edea9d2ea). + +The data reveals a clear trend in tech hiring over the pandemic years: + +| Year | Qualified | Total | Pass Rate | +| ---- | --------- | ----- | --------- | +| 2020 | 10 | 594 | 1.7% | +| 2021 | 27 | 1,033 | 2.6% | +| 2022 | 36 | 758 | 4.7% | +| 2023 | 39 | 412 | 9.5% | +| 2024 | 39 | 387 | 10.1% | +| 2025 | 59 | 406 | 14.5% | +| 2026 | 6 | 26 | 23.1% | + +In early 2020, only 1.7% of postings met all three criteria. By 2025, 14.5% did. More companies now offer remote work, disclose salaries upfront, and hire senior engineers through Hacker News. + +Sample qualified postings: + +``` +Bloomberg | Senior Software Engineer | Hybrid (NYC) | $160k - $240k USD + bonus +KoBold Metals | Senior Infrastructure Engineer | Remote (USA) | $170k - $230k +EnergyHub | Director of Engineering | Remote (US) | Salary $225k +Gladly | Staff Software Engineer | Remote (US, Colombia) | $60k–$215k + Equity +``` + + + + +The everyrow Python SDK orchestrates thousands of parallel LLM evaluations in a single function call, for filter criteria that require judgment on every row. + +Here, we screen 3,616 job postings for "remote-friendly, senior-level roles with disclosed salary": three criteria that can't be reduced to pattern matching. + +| Metric | Value | +| ------------------- | ------------ | +| Rows processed | 3,616 | +| Rows passing filter | 216 (6.0%) | +| Total cost | $4.24 | +| Time | 9.9 minutes | + +```bash +pip install everyrow +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +Download the dataset: [hn_jobs.csv](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs/data/hn_jobs.csv) (3,616 Hacker News "Who's Hiring" posts, March 2020 through January 2026). + +```python +import asyncio +import pandas as pd +from pydantic import BaseModel, Field +from everyrow.ops import screen + +jobs = pd.read_csv("hn_jobs.csv") # 3,616 job postings + +class JobScreenResult(BaseModel): + qualifies: bool = Field(description="True if meets ALL criteria") + +async def main(): + result = await screen( + task=""" + A job posting qualifies if it meets ALL THREE criteria: + + 1. Remote-friendly: Explicitly allows remote work, hybrid, WFH, + distributed teams, or "work from anywhere". + + 2. Senior-level: Title contains Senior/Staff/Lead/Principal/Architect, + OR requires 5+ years experience, OR mentions "founding engineer". + + 3. Salary disclosed: Specific compensation numbers are mentioned. + "$150K-200K" qualifies. "Competitive" or "DOE" does not. + """, + input=jobs, + response_model=JobScreenResult, + ) + + qualified = result.data + print(f"Qualified: {len(qualified)} of {len(jobs)}") + return qualified + +qualified_jobs = asyncio.run(main()) +``` + +216 of 3,616 postings passed (6.0%). [View the session](https://everyrow.io/sessions/6f742040-7a17-46c3-87fd-419062e69bf2). + +The data reveals a clear trend in tech hiring over the pandemic years: + +| Year | Qualified | Total | Pass Rate | +| ---- | --------- | ----- | --------- | +| 2020 | 10 | 594 | 1.7% | +| 2021 | 27 | 1,033 | 2.6% | +| 2022 | 36 | 758 | 4.7% | +| 2023 | 39 | 412 | 9.5% | +| 2024 | 39 | 387 | 10.1% | +| 2025 | 59 | 406 | 14.5% | +| 2026 | 6 | 26 | 23.1% | + +In early 2020, only 1.7% of postings met all three criteria. By 2025, 14.5% did. More companies now offer remote work, disclose salaries upfront, and hire senior engineers through Hacker News. + +Sample qualified postings: + +``` +Bloomberg | Senior Software Engineer | Hybrid (NYC) | $160k - $240k USD + bonus +KoBold Metals | Senior Infrastructure Engineer | Remote (USA) | $170k - $230k +EnergyHub | Director of Engineering | Remote (US) | Salary $225k +Gladly | Staff Software Engineer | Remote (US, Colombia) | $60k–$215k + Equity +``` + + + + +--- + +Built with [everyrow](https://github.com/futuresearch/everyrow-sdk). See the [screen documentation](reference/SCREEN) for more options including batch size tuning and async execution. diff --git a/docs/fuzzy-join-without-keys.md b/docs/fuzzy-join-without-keys.md deleted file mode 100644 index 3455f1a0..00000000 --- a/docs/fuzzy-join-without-keys.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -title: How to Fuzzy Join DataFrames in Python -description: Merge two pandas DataFrames when there's no shared ID column, using LLM-powered semantic matching for company names, tickers, and more. ---- - -# How to Merge DataFrames Without a Matching Column in Python - -When you need to join two pandas DataFrames but there's no shared ID column, `pd.merge()` won't help. Some techniques exist to do fuzzy matching on single columns, but this will miss the harder cases requiring semantic knowledge, and doesn't take advantage of data in other columns that give clues to which rows match. - -This guide shows how to merge tables using LLM-powered understanding, up and to including using agentic websearch to get additional information, to get the highest quality match available. We show how to do it pretty cheaply, since naive LLM-based solutions can be extremely expensive in token costs. - -In this example, we join two tables of 400-500 rows of company data, where one set has company names, and the other has stock tickers, or where names are spelled differently across sources. - -| Metric | Value | -| -------------- | ----------- | -| Rows processed | 438 | -| Accuracy | 100% | -| Cost | $1.00 | -| Time | ~30 seconds | - -```bash -pip install everyrow -export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io -``` - -We'll use two datasets of S&P 500 companies from different sources. Download [company_info.csv](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs/data/company_info.csv) and [valuations.csv](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs/data/valuations.csv), or run the [full notebook](case-studies/match-software-vendors-to-requirements). - -```python -import asyncio -import pandas as pd -from everyrow.ops import merge - -# Two tables from different sources - no shared column -companies = pd.read_csv("company_info.csv") # has: company, price, mkt_cap, shares -valuations = pd.read_csv("valuations.csv") # has: ticker, fair_value - -async def main(): - result = await merge( - task="Match companies to their stock tickers", - left_table=companies, - right_table=valuations, - ) - - # The result is a DataFrame with all columns joined - print(result.data.head()) - - # company price mkt_cap shares ticker fair_value - # 0 3M 101.74 61.70678828 606514530 MMM 39.18 - # 1 A. O. Smith 32.38 4.904416495 151464376 AOS 6.59 - # 2 Abbott Laboratories 34.87 51.22933139 1469152033 ABT 119.19 - -asyncio.run(main()) -``` - -The SDK figures out that "3M" corresponds to ticker "MMM", "Alphabet Inc." to "GOOGL", and so on. No merge columns are specified because there's nothing to match on directly. - -The merge operation uses a cascade of matching strategies, stopping at the simplest one that works for each row: - -| Strategy | When Used | Cost | -| ----------- | --------------------------------------- | ----------- | -| Exact match | Identical strings | Free | -| Fuzzy match | Typos, case differences | Free | -| LLM match | Semantic equivalence (company → ticker) | ~$0.002/row | -| Web search | Stale or obscure data | ~$0.01/row | - -For the company-to-ticker merge above, 99.8% of rows matched via LLM reasoning alone. The remaining 0.2% required a quick web lookup. - -The same approach works when your data has typos or corruption. In testing with 10% character-level noise in company names (e.g., "Alphaeet Iqc." instead of "Alphabet Inc."), the cascade achieved 100% accuracy at $0.44 for 438 rows. The fuzzy matcher catches obvious typos, and the LLM handles cases where corruption makes string similarity unreliable. - -This approach works well when your tables represent the same entities but use different identifiers: company names vs tickers, product names vs SKUs, subsidiary names vs parent companies. For tables that do share a common column, the SDK will use exact matching first and only escalate to more expensive methods when needed. - ---- - -See the full analysis with multiple experiments in the [merge tutorial notebook](case-studies/match-software-vendors-to-requirements). diff --git a/docs/fuzzy-join-without-keys.mdx b/docs/fuzzy-join-without-keys.mdx new file mode 100644 index 00000000..33dbd801 --- /dev/null +++ b/docs/fuzzy-join-without-keys.mdx @@ -0,0 +1,150 @@ +--- +title: Join Tables Without Shared Keys +metadataTitle: Can Claude Code Join Two Tables Without a Common Column? +description: Semantic matching for tables with different naming conventions, abbreviations, and formats. Accurate joins across datasets with no shared identifier. +--- + +# Join Tables Without Shared Keys + + + + +Claude Code can merge two CSVs when they share a key column. But what if one table has company names and the other has stock tickers, and there's no shared column to join on? + +Here, we get Claude Code to join two S&P 500 tables (438 rows) where one has company names and the other has stock tickers. No merge key exists. + +| Metric | Value | +| -------------- | ----------- | +| Rows processed | 438 | +| Matched | 437 (99.8%) | +| Cost | $0.82 | +| Time | ~10.9 minutes | + +First, install the [everyrow](https://github.com/futuresearch/everyrow-sdk) plugin for Claude Code: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + +Set your API key before launching Claude Code: + +```bash +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +Download [company_info.csv](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs/data/company_info.csv) (company names, price, market cap) and [valuations.csv](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs/data/valuations.csv) (tickers, fair value). With both files in your working directory, tell Claude: + +``` +Merge company_info.csv with valuations.csv. The first table has company names, +the second has stock tickers. Match companies to their stock tickers. +``` + +Claude calls everyrow's `merge` MCP tool to match the tables using semantic understanding: + +``` +Tool: everyrow_merge +├─ task: "Match companies to their stock tickers" +├─ left_csv: "/Users/you/company_info.csv" +└─ right_csv: "/Users/you/valuations.csv" + +→ Submitted: 438 rows for merging. + Session: https://everyrow.io/sessions/5c33bade-f142-44b0-bed5-9e5a846cdc3c + Task ID: 5c33... + +Tool: everyrow_progress +├─ task_id: "5c33..." +→ Running: 0/438 complete (18s elapsed) + +... + +Tool: everyrow_progress +→ Completed: 438/438 (0 failed) in 653s. + +Tool: everyrow_results +├─ task_id: "5c33..." +├─ output_path: "/Users/you/merged_companies.csv" +→ Saved 438 rows to /Users/you/merged_companies.csv +``` + +437 of 438 companies matched. [View the session](https://everyrow.io/sessions/5c33bade-f142-44b0-bed5-9e5a846cdc3c). + +| Company | Ticker | Fair Value | +|---------|--------|------------| +| 3M | MMM | 39.18 | +| Abbott Laboratories | ABT | 119.19 | +| AbbVie | ABBV | 180.95 | +| Accenture | ACN | 107.79 | +| A. O. Smith | AOS | 6.59 | + +The system figures out that "3M" corresponds to ticker "MMM", "Alphabet Inc." to "GOOGL", and so on. The single unmatched row was Block, Inc. (formerly Square, Inc.), where the 2021 rebrand made the name-to-ticker match difficult. + + + + +When you need to join two pandas DataFrames but there's no shared ID column, `pd.merge()` won't help. The everyrow SDK uses LLM-powered semantic matching to join tables even when names are spelled differently across sources. + +In this example, we join two S&P 500 tables (438 rows) where one has company names and the other has stock tickers. + +| Metric | Value | +| -------------- | ----------- | +| Rows processed | 438 | +| Accuracy | 100% | +| Cost | $1.00 | +| Time | ~30 seconds | + +```bash +pip install everyrow +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io +``` + +Download [company_info.csv](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs/data/company_info.csv) and [valuations.csv](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs/data/valuations.csv), or run the [full notebook](case-studies/match-software-vendors-to-requirements). + +```python +import asyncio +import pandas as pd +from everyrow.ops import merge + +# Two tables from different sources - no shared column +companies = pd.read_csv("company_info.csv") # has: company, price, mkt_cap, shares +valuations = pd.read_csv("valuations.csv") # has: ticker, fair_value + +async def main(): + result = await merge( + task="Match companies to their stock tickers", + left_table=companies, + right_table=valuations, + ) + + # The result is a DataFrame with all columns joined + print(result.data.head()) + + # company price mkt_cap shares ticker fair_value + # 0 3M 101.74 61.70678828 606514530 MMM 39.18 + # 1 A. O. Smith 32.38 4.904416495 151464376 AOS 6.59 + # 2 Abbott Laboratories 34.87 51.22933139 1469152033 ABT 119.19 + +asyncio.run(main()) +``` + +The SDK figures out that "3M" corresponds to ticker "MMM", "Alphabet Inc." to "GOOGL", and so on. No merge columns are specified because there's nothing to match on directly. + +The merge operation uses a cascade of matching strategies, stopping at the simplest one that works for each row: + +| Strategy | When Used | Cost | +| ----------- | --------------------------------------- | ----------- | +| Exact match | Identical strings | Free | +| Fuzzy match | Typos, case differences | Free | +| LLM match | Semantic equivalence (company → ticker) | ~$0.002/row | +| Web search | Stale or obscure data | ~$0.01/row | + +For the company-to-ticker merge above, 99.8% of rows matched via LLM reasoning alone. The remaining 0.2% required a quick web lookup. + +This approach works well when your tables represent the same entities but use different identifiers: company names vs tickers, product names vs SKUs, subsidiary names vs parent companies. For tables that do share a common column, the SDK will use exact matching first and only escalate to more expensive methods when needed. + + + + +--- + +Built with [everyrow](https://github.com/futuresearch/everyrow-sdk). See the full analysis with multiple experiments in the [merge tutorial notebook](case-studies/match-software-vendors-to-requirements). diff --git a/docs/guides.md b/docs/guides.md index a0a6c012..9b513caf 100644 --- a/docs/guides.md +++ b/docs/guides.md @@ -9,22 +9,24 @@ Practical walkthroughs that show you how to use everyrow for common data process ## Screen -- [Filter a DataFrame with LLMs](/docs/filter-dataframe-with-llm) +- [Filter a Dataset Intelligently](/docs/filter-dataframe-with-llm) ## Rank -- [Sort a Dataset Using Web Data](/docs/rank-by-external-metric) +- [Rank Data by External Metrics](/docs/rank-by-external-metric) ## Dedupe -- [Remove Duplicates from ML Training Data](/docs/deduplicate-training-data-ml) +- [Deduplicate Training Data](/docs/deduplicate-training-data-ml) - [Resolve Duplicate Entities](/docs/resolve-entities-python) +- [Scale Deduplication to 20K Rows](/docs/scale-deduplication-20k-rows) ## Merge -- [Fuzzy Join Without Matching Keys](/docs/fuzzy-join-without-keys) +- [Join Tables Without Shared Keys](/docs/fuzzy-join-without-keys) ## Research -- [Add a Column with Web Lookup](/docs/add-column-web-lookup) -- [Classify and Label Data with an LLM](/docs/classify-dataframe-rows-llm) +- [Add a Column via Web Research](/docs/add-column-web-lookup) +- [Classify and Label Rows](/docs/classify-dataframe-rows-llm) +- [LLM-Powered Data Labeling](/docs/active-learning-llm-oracle) diff --git a/docs/rank-by-external-metric.md b/docs/rank-by-external-metric.md deleted file mode 100644 index 0ef77806..00000000 --- a/docs/rank-by-external-metric.md +++ /dev/null @@ -1,103 +0,0 @@ ---- -title: How to sort a dataset using web data in Python -description: Rank or sort a Pandas DataFrame by criteria that don't exist in your data, using LLM research agents to look up data. ---- - -# How to Rank a DataFrame by a Metric That Requires The Web - -`pandas.sort_values()` requires the column to already exist. EveryRow can rank or sort data on criteria you don't have in your dataset, if it can find it on the web. It's designed to do this as cost efficiently as possible. - -This guide shows how to rank 300 PyPI packages by two different metrics that require external lookup: days since last release (from PyPI) and number of contributors (from GitHub). - -| Metric | Rows | Cost | Time | Session | -| ---------------------- | ---- | ----- | ----------- | ------------------------------------------------------------------------- | -| Days since release | 300 | $3.90 | 4.3 minutes | [view](https://everyrow.io/sessions/24190033-4656-4366-86e9-79295c6f4510) | -| Number of contributors | 300 | $4.13 | 6.0 minutes | [view](https://everyrow.io/sessions/8b63da61-8597-45ae-ab8b-3b4d28dd1a33) | - -```bash -pip install everyrow -export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key -``` - -The dataset is the top 300 PyPI packages by monthly downloads, fetched from the [top-pypi-packages](https://hugovk.github.io/top-pypi-packages/) API. The only columns are `package` and `monthly_downloads`—no release dates. - -```python -import asyncio -import requests -import pandas as pd -from everyrow.ops import rank - -# Fetch top PyPI packages -response = requests.get( - "https://hugovk.github.io/top-pypi-packages/top-pypi-packages-30-days.min.json" -) -packages = response.json()["rows"][50:350] # Skip AWS libs at top -df = pd.DataFrame(packages).rename( - columns={"project": "package", "download_count": "monthly_downloads"} -) - -async def main(): - result = await rank( - task=""" - Find the number of days since this package's last release on PyPI. - Look up the package on pypi.org to find the release date. - Return the number of days as an integer. - """, - input=df, - field_name="days_since_release", - field_type="int", - ascending_order=True, # Most recent first - ) - print(result.data[["package", "days_since_release"]]) - -asyncio.run(main()) -``` - -``` - package days_since_release -0 pyparsing 0 -1 httplib2 1 -2 yandexcloud 2 -3 multiprocess 2 -4 pyarrow 3 -... -295 ptyprocess 1850 -296 toml 1907 -297 ply 2897 -298 webencodings 3213 -``` - -The SDK dispatched LLM-powered web research agents to review each row. They are flexible agents, so while in this case we have instructions to guide them where to look, they can be given open ended tasks, though they might use more tokens doing that, leading to higher costs. In this case, it found that `pyparsing` was released today (Jan 20 2026), and `webencodings` hasn't been updated in 8.8 years. - -The same approach works for any metric you can describe. Here's the same dataset ranked by number of GitHub contributors: - -```python -result = await rank( - task=""" - Find the number of contributors to this package's GitHub repository. - Look up the package's source repo from PyPI, then find the contributor - count on GitHub. Return the number as an integer. - """, - input=df, - field_name="num_contributors", - field_type="int", - ascending_order=False, # Most contributors first -) -``` - -``` - package num_contributors -0 torch 4191 -1 langchain 3858 -2 langchain-core 3858 -3 transformers 3608 -4 scikit-learn 3157 -... -295 jsonpath-ng 2 -296 et-xmlfile 1 -297 beautifulsoup4 1 -298 ruamel-yaml 1 -299 pkginfo 1 -``` - -`torch` has 4,191 contributors; `pkginfo` has 1. The task prompt tells the agent what to look up and where—citation counts, benchmark scores, API response times, or anything else you can describe. diff --git a/docs/rank-by-external-metric.mdx b/docs/rank-by-external-metric.mdx new file mode 100644 index 00000000..bd1d83b3 --- /dev/null +++ b/docs/rank-by-external-metric.mdx @@ -0,0 +1,223 @@ +--- +title: Rank Data by External Metrics +metadataTitle: Get Claude Code to Score and Rank Data Using Live Web Research +description: Sort any dataset by information you don't have yet. Research agents look up live data per row and return a fully ranked result. +--- + +# Rank Data by External Metrics + + + + +Claude Code's web search works well for looking up a few packages. When you need two separate metrics researched for each of 300 packages, that is hundreds of individual lookups that need to happen in parallel. + +Here, we get Claude Code to rank 300 PyPI packages by two metrics that require external lookup: days since last release (from PyPI) and number of contributors (from GitHub). + +| Metric | Value | +| ---------------------- | ------------ | +| Rows processed | 300 | +| Total cost | $13.26 | +| Time | ~6.5 minutes | + +First, install the [everyrow](https://github.com/futuresearch/everyrow-sdk) plugin for Claude Code: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + +Set your API key before launching Claude Code: + +```bash +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +The dataset is the top 300 PyPI packages by monthly downloads, fetched from the [top-pypi-packages](https://hugovk.github.io/top-pypi-packages/) API. The only columns are `package` and `monthly_downloads`. No release dates, no contributor counts. Tell Claude: + +``` +Rank these 300 PyPI packages by days since their last release. +Look up each package on pypi.org to find the release date. +Sort by most recently released first. +``` + +Claude calls everyrow's `rank` MCP tool, then polls for progress until the operation completes: + +``` +Tool: everyrow_rank +├─ task: "Find the number of days since this package's last release on PyPI..." +├─ input_csv: "/Users/you/top_pypi_packages.csv" +├─ field_name: "days_since_release" +├─ field_type: "int" +└─ ascending_order: true + +→ Submitted: 300 rows for ranking. + Session: https://everyrow.io/sessions/7a461cd9-056b-42b2-b335-8d52fe3f685c + Task ID: 7a46... + +Tool: everyrow_progress +├─ task_id: "7a46..." +→ Running: 0/300 complete, 300 running (15s elapsed) + +Tool: everyrow_progress +→ Running: 150/300 complete, 150 running (120s elapsed) + +... + +Tool: everyrow_progress +→ Completed: 300/300 (0 failed) in 236s. + +Tool: everyrow_results +├─ task_id: "7a46..." +├─ output_path: "/Users/you/pypi_ranked_by_release.csv" +→ Saved 300 rows to /Users/you/pypi_ranked_by_release.csv +``` + +Under the hood, everyrow dispatched LLM-powered web research agents to look up each package on PyPI. The agents found that `fastapi` was released today, while `webencodings` hasn't been updated in 8.9 years. + +| Package | Days Since Release | +|---------|-------------------| +| fastapi | 0 | +| typer | 0 | +| langsmith | 0 | +| grpcio | 1 | +| greenlet | 1 | +| ... | ... | +| toml | 1,938 | +| pysocks | 2,346 | +| ply | 2,928 | +| webencodings | 3,244 | + +The same approach works for any metric you can describe. A second rank call on the same data, asking for number of GitHub contributors, ran in parallel: + +``` +Tool: everyrow_rank +├─ task: "Find the number of contributors to this package's GitHub repository..." +├─ input_csv: "/Users/you/top_pypi_packages.csv" +├─ field_name: "num_contributors" +├─ field_type: "int" +└─ ascending_order: false + +→ Completed: 300/300 in 391s. +``` + +| Package | Contributors | +|---------|-------------| +| torch | 4,257 | +| langchain | 3,897 | +| langchain-core | 3,897 | +| transformers | 3,655 | +| scikit-learn | 3,170 | +| ... | ... | +| scramp | 1 | +| et-xmlfile | 0 | +| beautifulsoup4 | 0 | +| docutils | 0 | + +Both operations completed in ~6.5 minutes of wall clock time. [View the sessions](https://everyrow.io/sessions/7a461cd9-056b-42b2-b335-8d52fe3f685c). + + + + +The everyrow Python SDK can rank or sort data on criteria you don't have in your dataset, if it can find them on the web. It dispatches LLM-powered web research agents to look up each row in parallel. + +This guide shows how to rank 300 PyPI packages by two different metrics that require external lookup: days since last release (from PyPI) and number of contributors (from GitHub). + +| Metric | Rows | Cost | Time | Session | +| ---------------------- | ---- | ----- | ----------- | ------------------------------------------------------------------------- | +| Days since release | 300 | $3.90 | 4.3 minutes | [view](https://everyrow.io/sessions/24190033-4656-4366-86e9-79295c6f4510) | +| Number of contributors | 300 | $4.13 | 6.0 minutes | [view](https://everyrow.io/sessions/8b63da61-8597-45ae-ab8b-3b4d28dd1a33) | + +```bash +pip install everyrow +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +The dataset is the top 300 PyPI packages by monthly downloads, fetched from the [top-pypi-packages](https://hugovk.github.io/top-pypi-packages/) API. The only columns are `package` and `monthly_downloads`—no release dates. + +```python +import asyncio +import requests +import pandas as pd +from everyrow.ops import rank + +# Fetch top PyPI packages +response = requests.get( + "https://hugovk.github.io/top-pypi-packages/top-pypi-packages-30-days.min.json" +) +packages = response.json()["rows"][50:350] # Skip AWS libs at top +df = pd.DataFrame(packages).rename( + columns={"project": "package", "download_count": "monthly_downloads"} +) + +async def main(): + result = await rank( + task=""" + Find the number of days since this package's last release on PyPI. + Look up the package on pypi.org to find the release date. + Return the number of days as an integer. + """, + input=df, + field_name="days_since_release", + field_type="int", + ascending_order=True, # Most recent first + ) + print(result.data[["package", "days_since_release"]]) + +asyncio.run(main()) +``` + +``` + package days_since_release +0 pyparsing 0 +1 httplib2 1 +2 yandexcloud 2 +3 multiprocess 2 +4 pyarrow 3 +... +295 ptyprocess 1850 +296 toml 1907 +297 ply 2897 +298 webencodings 3213 +``` + +The SDK dispatched LLM-powered web research agents to review each row. It found that `pyparsing` was released today (Jan 20 2026), and `webencodings` hasn't been updated in 8.8 years. + +The same approach works for any metric you can describe. Here's the same dataset ranked by number of GitHub contributors: + +```python +result = await rank( + task=""" + Find the number of contributors to this package's GitHub repository. + Look up the package's source repo from PyPI, then find the contributor + count on GitHub. Return the number as an integer. + """, + input=df, + field_name="num_contributors", + field_type="int", + ascending_order=False, # Most contributors first +) +``` + +``` + package num_contributors +0 torch 4191 +1 langchain 3858 +2 langchain-core 3858 +3 transformers 3608 +4 scikit-learn 3157 +... +295 jsonpath-ng 2 +296 et-xmlfile 1 +297 beautifulsoup4 1 +298 ruamel-yaml 1 +299 pkginfo 1 +``` + +`torch` has 4,191 contributors; `pkginfo` has 1. The task prompt tells the agent what to look up and where—citation counts, benchmark scores, API response times, or anything else you can describe. + + + + +--- + +Built with [everyrow](https://github.com/futuresearch/everyrow-sdk). See the [rank documentation](reference/RANK) for more options including field types and sort order. diff --git a/docs/resolve-entities-python.md b/docs/resolve-entities-python.md deleted file mode 100644 index 9002bebb..00000000 --- a/docs/resolve-entities-python.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -title: How to resolve duplicate rows in Python with LLMs -description: Identify and merge duplicate records representing the same entity across messy data, handling spelling variations, abbreviations, and nicknames. ---- - -# How to Resolve Duplicate Entities in Python - -Identifying matching records that represent the same entity across messy data typically requires labeled training data, manual blocking rules, or extensive threshold tuning. - -LLMs can solve this at high accuracy. But they can be expensive to run at scale, and require a lot of orchestration. EveryRow is designed to do this as cheaply as possible while still having high accuracy, in a single method with almost no setup. - -| Metric | Value | -| ------------------- | ------------------------------------------------------------------------- | -| Records processed | 500 | -| Unique entities | 131 | -| Duplicates resolved | 369 | -| Cost | $0.74 | -| Time | ~100 seconds | -| Session | [view](https://everyrow.io/sessions/d073ee5a-b25b-4129-8b43-b97347b50459) | - -```bash -pip install everyrow -export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io -``` - -We'll use a messy CRM dataset with 500 company records. The same companies appear multiple times with different spellings, abbreviations, and missing fields. Download [case_01_crm_data.csv](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs/data/case_01_crm_data.csv) to follow along. - -```python -import asyncio -import pandas as pd -from everyrow.ops import dedupe - -data = pd.read_csv("case_01_crm_data.csv").fillna("") - -async def main(): - result = await dedupe( - input=data, - equivalence_relation="Two entries are duplicates if they represent the same company.", - ) - - # Filter to keep only the best record per entity - unique = result.data[result.data["selected"] == True] - print(f"Reduced {len(data)} records to {len(unique)} unique entities") - -asyncio.run(main()) -``` - -The input data contains variations like these, all representing the same company: - -| company_name | contact_name | email_address | -| --------------------- | ---------------- | ------------------- | -| AbbVie Inc. | Richard Gonzales | info@abbvie-bio.com | -| AbbVie Pharmaceutical | Richard Gonzales | | -| Abbvie | | info@abbvie-bio.com | -| Abvie Inc | Richard Gonzales | | - -The SDK clusters these into a single entity and selects the most complete record (the one with both contact name and email). The output DataFrame includes `equivalence_class_id` and `equivalence_class_name` columns showing which records were grouped together, plus a `selected` boolean indicating which record to keep. - -This approach handles cases that string similarity misses entirely. "AAPL" matches to "Apple Inc." because the model knows the ticker symbol. "Big Blue" matches to "IBM Corporation" because that's IBM's nickname. "W-Mart" and "Wallmart" match to "Walmart Inc." despite having different typos. - -The equivalence relation is flexible. For matching people, you might write "Two entries are duplicates if they refer to the same person, accounting for name variations and nicknames." For products: "Two entries represent the same product if they're the same item sold under different names or SKUs." - -See the [full notebook](case-studies/dedupe-crm-company-records) for additional examples including how to merge the clustered records into consolidated entries. diff --git a/docs/resolve-entities-python.mdx b/docs/resolve-entities-python.mdx new file mode 100644 index 00000000..3141608b --- /dev/null +++ b/docs/resolve-entities-python.mdx @@ -0,0 +1,144 @@ +--- +title: Resolve Duplicate Entities +metadataTitle: How to Do Entity Resolution at Scale in Claude Code +description: Consolidate records where the same entity appears under different spellings, abbreviations, and nicknames. Accurate entity resolution across messy, real-world datasets. +--- + +# Resolve Duplicate Entities + + + + +Claude Code is great at writing normalization code to standardize company names. Stripping "Inc." and lowercasing gets you some matches. But "AbbVie Inc.", "AbbVie Pharmaceutical", "Abbvie", and "Abvie Inc" need more than normalization. Two of those are typos, one is a brand variant, and one is a division name. + +Here, we get Claude Code to resolve 500 messy CRM records down to their unique entities. + +| Metric | Value | +| ------------------- | --------------- | +| Records processed | 500 | +| Unique entities | 156 | +| Duplicates resolved | 344 (68.8%) | +| Cost | $2.02 | +| Time | 15.3 minutes | + +First, install the [everyrow](https://github.com/futuresearch/everyrow-sdk) plugin for Claude Code: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + +Set your API key before launching Claude Code: + +```bash +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +Download the dataset: [case_01_crm_data.csv](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs/data/case_01_crm_data.csv) (500 messy company records with typos, abbreviations, and missing fields). With the CSV in your working directory, tell Claude: + +``` +Deduplicate this CRM dataset. Two entries are duplicates if they represent +the same company. +``` + +Claude calls everyrow's `dedupe` MCP tool: + +``` +Tool: everyrow_dedupe +├─ equivalence_relation: "Two entries are duplicates if they represent the same company." +└─ input_csv: "/Users/you/case_01_crm_data.csv" + +→ Submitted: 500 rows for deduplication. + Session: https://everyrow.io/sessions/d5d1ef67-653e-4dd9-90e7-9653cda2af85 + Task ID: d5d1... + +Tool: everyrow_progress +├─ task_id: "d5d1..." +→ Running: 0/500 complete (30s elapsed) + +... + +Tool: everyrow_progress +→ Completed: 500/500 (0 failed) in 920s. + +Tool: everyrow_results +├─ task_id: "d5d1..." +├─ output_path: "/Users/you/crm_deduplicated.csv" +→ Saved 500 rows to /Users/you/crm_deduplicated.csv +``` + +500 records resolved to 156 unique entities. [View the session](https://everyrow.io/sessions/d5d1ef67-653e-4dd9-90e7-9653cda2af85). + +| Cluster | Records | Variants | +|---------|---------|----------| +| Palo Alto Networks | 8 | Pallow Alto, PANW, Palo Alto Net Inc, Paloalto Networks | +| Walmart Inc. | 8 | W-Mart, Wall-Mart, WMT Corp, Walmart Corporation | +| Uber Technologies | 8 | Ubar, Ubr, Uber Tech, Uber Corporation | +| ServiceNow | 6 | Service Now, Service-Now, SerivceNow, Service Now Inc | + +The system handles cases that string similarity misses entirely. "AAPL" matches to "Apple Inc." because the model knows the ticker symbol. "Big Blue" matches to "IBM Corporation" because that's IBM's nickname. The output includes `equivalence_class_id` and `selected` columns. Filter to `selected == True` to get one record per entity. + + + + +Identifying matching records that represent the same entity across messy data typically requires labeled training data, manual blocking rules, or extensive threshold tuning. The everyrow SDK uses LLMs to solve this at high accuracy in a single method call. + +| Metric | Value | +| ------------------- | ------------------------------------------------------------------------- | +| Records processed | 500 | +| Unique entities | 131 | +| Duplicates resolved | 369 | +| Cost | $0.74 | +| Time | ~100 seconds | +| Session | [view](https://everyrow.io/sessions/d073ee5a-b25b-4129-8b43-b97347b50459) | + +```bash +pip install everyrow +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io +``` + +We'll use a messy CRM dataset with 500 company records. Download [case_01_crm_data.csv](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs/data/case_01_crm_data.csv) to follow along. + +```python +import asyncio +import pandas as pd +from everyrow.ops import dedupe + +data = pd.read_csv("case_01_crm_data.csv").fillna("") + +async def main(): + result = await dedupe( + input=data, + equivalence_relation="Two entries are duplicates if they represent the same company.", + ) + + # Filter to keep only the best record per entity + unique = result.data[result.data["selected"] == True] + print(f"Reduced {len(data)} records to {len(unique)} unique entities") + +asyncio.run(main()) +``` + +The input data contains variations like these, all representing the same company: + +| company_name | contact_name | email_address | +| --------------------- | ---------------- | ------------------- | +| AbbVie Inc. | Richard Gonzales | info@abbvie-bio.com | +| AbbVie Pharmaceutical | Richard Gonzales | | +| Abbvie | | info@abbvie-bio.com | +| Abvie Inc | Richard Gonzales | | + +The SDK clusters these into a single entity and selects the most complete record. The output DataFrame includes `equivalence_class_id` and `equivalence_class_name` columns showing which records were grouped together, plus a `selected` boolean indicating which record to keep. + +This approach handles cases that string similarity misses entirely. "AAPL" matches to "Apple Inc." because the model knows the ticker symbol. "Big Blue" matches to "IBM Corporation" because that's IBM's nickname. "W-Mart" and "Wallmart" match to "Walmart Inc." despite having different typos. + +The equivalence relation is flexible. For matching people: "Two entries are duplicates if they refer to the same person, accounting for name variations and nicknames." For products: "Two entries represent the same product if they're the same item sold under different names or SKUs." + +See the [full notebook](case-studies/dedupe-crm-company-records) for additional examples including how to merge the clustered records into consolidated entries. + + + + +--- + +Built with [everyrow](https://github.com/futuresearch/everyrow-sdk). See the [dedupe documentation](reference/DEDUPE) for more options. diff --git a/docs/scale-deduplication-20k-rows.md b/docs/scale-deduplication-20k-rows.md deleted file mode 100644 index 3b3759b5..00000000 --- a/docs/scale-deduplication-20k-rows.md +++ /dev/null @@ -1,115 +0,0 @@ ---- -title: How to scale LLM deduplication to 20,000 rows -description: Scale LLM-powered deduplication to 20,000 rows with linear cost, achieving F1=0.996 using embeddings, clustering, and targeted LLM calls. ---- - -# How to Scale LLM Deduplication to 20,000 Rows - -LLM-powered deduplication gives you semantic understanding that string matching can't, but naive pairwise comparison is quadratic. At 20,000 rows that's 200 million pairs. Everyrow's dedupe pipeline uses a funnel of embeddings, clustering, and targeted LLM calls to keep cost linear and accuracy high. - -![FDA Drug Products — Deduplication at Scale](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs-site/public/images/fda_10pct_scaling.png) - -Error rates stay near zero as scale increases. Cost and LLM calls scale linearly. Runtime is under 5 minutes up to 10,000 rows and 25 minutes at 20,000. - -## Install - -```bash -pip install everyrow -export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key -``` - -## Running a large dedupe - -```python -import asyncio -import pandas as pd -from everyrow.ops import dedupe - -data = pd.read_csv("fda_products.csv") - -async def main(): - result = await dedupe( - input=data, - equivalence_relation=( - "Same ingredient + same strength + same applicant " - "+ same dosage form = duplicate" - ), - ) - - clean = result.data[result.data["selected"] == True] - print(f"Reduced {len(data)} to {len(clean)} unique records") - clean.to_csv("deduplicated.csv", index=False) - -asyncio.run(main()) -``` - -## Cost at different scales - -Cost stays between $0.90 and $1.50 per 1,000 rows across all datasets we tested: - -| Dataset | Entity | Rows | Dup% | F1 | Cost | $/1k rows | -| ---------------------- | ----------- | ------ | ---- | ----- | ------ | --------- | -| Small Companies | company | 200 | 8% | 1.000 | $0.18 | $0.90 | -| Medium People | person | 1,000 | 20% | 0.994 | $1.18 | $1.18 | -| Medium Transactions | transaction | 1,000 | 20% | 0.945 | $1.41 | $1.41 | -| Large Companies (Messy)| company | 3,000 | 10% | 0.974 | $3.21 | $1.07 | -| Large Products (FDA) | product | 5,000 | 5% | 0.997 | $6.37 | $1.27 | -| Company Names | company | 8,628 | 10% | 0.976 | $12.58 | $1.46 | -| FDA Products | product | 20,000 | 10% | 0.996 | $22.40 | $1.12 | - -The transaction dataset costs the most per row ($1.41/1k) because property records have ambiguous overlap, producing larger clusters. Structured data like FDA products is cheaper ($1.12/1k). - -Rough formula: **$1-1.50 per 1,000 rows** depending on data complexity. - -## The two error modes - -Every deduplication system makes two kinds of mistakes: - -**Over-merging (low Precision):** Distinct entities incorrectly grouped together. This is data loss — you destroy real records. The dangerous failure mode. - -**Under-merging (low Recall):** True duplicates missed. Your data stays messy, but nothing is lost. The safe failure mode. - -At 20,000 rows, Precision is 1.000 (zero false merges) while Recall is 0.992 (8 of ~2,000 duplicates were missed). The system only merges when it's confident. - -## Writing good equivalence rules - -The `equivalence_relation` parameter is the single most important input. It's a natural language description of what makes two rows "the same thing." - -**Be specific.** Enumerate the fields that must match: - -```python -# Good: mentions all matching fields -equivalence_relation="Same ingredient + same strength + same applicant + same dosage form = duplicate" - -# Less good: vague -equivalence_relation="Same drug" -``` - -**Be explicit about edge cases.** If "VIRTU FINANCIAL INC CLASS A" and "VIRTU FINANCIAL INC" should or shouldn't match, say so. If career changes mean the same person can appear at different organizations, state that: - -```python -equivalence_relation=( - "Two rows are duplicates if they represent the same person " - "despite different email/organization (career changes). " - "Consider name variations like typos, nicknames, and format differences." -) -``` - -**Keep it short.** One or two sentences. The rule goes into every LLM call, so verbosity costs tokens. - -## Testing on different entity types - -We validated across companies, people, products, and transactions to make sure the pipeline generalizes: - -- **FDA drug products (20k rows):** F1 = 0.996. Structured data with multi-field matching. Easiest for the pipeline. -- **Company names (8.6k rows):** F1 = 0.976. Single-column matching with only a name to work with. Requires semantic judgment. -- **People (1k rows):** F1 = 0.994. Name variations, career changes, multiple identifiers. -- **Transactions (1k rows):** F1 = 0.945. Property records with ambiguous addresses and shared parcel IDs. Hardest dataset. - -The transaction dataset is the hardest because the same address can appear in different formats and adjacent properties share identifiers. Even here, F1 is above 0.94. - -## Related - -- [How to Resolve Duplicate Entities in Python](/resolve-entities-python) — 500-row CRM walkthrough -- [How to Deduplicate Training Data in Python](/deduplicate-training-data-ml) — semantic deduplication for ML datasets -- [API reference](/reference/DEDUPE) diff --git a/docs/scale-deduplication-20k-rows.mdx b/docs/scale-deduplication-20k-rows.mdx new file mode 100644 index 00000000..8802d064 --- /dev/null +++ b/docs/scale-deduplication-20k-rows.mdx @@ -0,0 +1,149 @@ +--- +title: Scale Deduplication to 20K Rows +metadataTitle: Get Claude Code to Deduplicate 20,000 Rows +description: Deduplication that scales linearly with dataset size. Embeddings and clustering narrow the search space so LLM calls grow with cluster count, not quadratically. +--- + +# Scale Deduplication to 20K Rows + + + + +Claude Code handles deduplication of a few hundred rows natively. Scaling to 20,000 rows needs an approach where embeddings and clustering narrow the search space first, so LLM calls target the ambiguous pairs instead of all 200 million possible combinations. + +Here, we get Claude Code to deduplicate 20,000 FDA drug product records, using a funnel of embeddings, clustering, and targeted LLM calls. + +| Metric | Value | +| ------------------ | ------------ | +| Input rows | 20,000 | +| Unique after dedupe| 18,078 | +| Duplicates removed | 1,922 (9.6%) | +| Time | 22.5 minutes | +| Cost | $26.11 | + +First, install the [everyrow](https://github.com/futuresearch/everyrow-sdk) plugin for Claude Code: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + +Set your API key before launching Claude Code: + +```bash +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +Download [fda_products.csv](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs/data/fda_products.csv) (20,000 rows from the FDA Drugs@FDA database with ingredient, strength, applicant, and dosage form columns). Tell Claude: + +``` +Deduplicate fda_products.csv. Two rows are duplicates if they have the same +ingredient + same strength + same applicant + same dosage form. +``` + +Claude calls everyrow's `dedupe` MCP tool: + +``` +Tool: everyrow_dedupe +├─ equivalence_relation: "Same ingredient + same strength + same applicant + same dosage form = duplicate" +└─ input_csv: "/Users/you/fda_products.csv" + +→ Submitted: 20,000 rows for deduplication. + Session: https://everyrow.io/sessions/71e68a7f-a856-43ba-8080-89e4093afb1c + Task ID: 71e6... + +Tool: everyrow_progress +├─ task_id: "71e6..." +→ Running: 0/20000 complete (60s elapsed) + +... + +Tool: everyrow_progress +→ Completed: 20000/20000 (0 failed) in 1350s. + +Tool: everyrow_results +├─ task_id: "71e6..." +├─ output_path: "/Users/you/fda_deduplicated.csv" +→ Saved 20000 rows to /Users/you/fda_deduplicated.csv +``` + +20,000 rows deduplicated in 22.5 minutes for $26.11 ($1.31 per 1,000 rows). [View the session](https://everyrow.io/sessions/71e68a7f-a856-43ba-8080-89e4093afb1c). + +| Cluster | Members | Pattern | +|---------|---------|---------| +| Oxytocin / Fresenius Kabi | 3 | Different package sizes: 10/100/300 USP units, same concentration | +| Gadodiamide / GE Healthcare | 3 | Different volumes: 287mg/mL in bulk vs 50mL vs 100mL | +| Diazepam / Hikma | 3 | Strength formatting: "50MG/10ML (5MG/ML)" vs "5MG/ML" | +| Acyclovir 800MG / Teva | 3 | Company variants: TEVA, IVAX SUB TEVA PHARMS, TEVA PHARMS | + +The pipeline catches semantic duplicates across strength formatting variants, company name variations, and minor formatting differences. At 20,000 rows, Precision is 1.000 (zero false merges) while Recall is 0.992. The system only merges when it's confident. + + + + +LLM-powered deduplication gives you semantic understanding that string matching can't, but naive pairwise comparison is quadratic. At 20,000 rows that's 200 million pairs. Everyrow's dedupe pipeline uses a funnel of embeddings, clustering, and targeted LLM calls to keep cost linear and accuracy high. + +![FDA Drug Products — Deduplication at Scale](https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs-site/public/images/fda_10pct_scaling.png) + +Error rates stay near zero as scale increases. Cost and LLM calls scale linearly. Runtime is under 5 minutes up to 10,000 rows and 25 minutes at 20,000. + +```bash +pip install everyrow +export EVERYROW_API_KEY=your_key_here # Get one at everyrow.io/api-key +``` + +```python +import asyncio +import pandas as pd +from everyrow.ops import dedupe + +data = pd.read_csv("fda_products.csv") + +async def main(): + result = await dedupe( + input=data, + equivalence_relation=( + "Same ingredient + same strength + same applicant " + "+ same dosage form = duplicate" + ), + ) + + clean = result.data[result.data["selected"] == True] + print(f"Reduced {len(data)} to {len(clean)} unique records") + clean.to_csv("deduplicated.csv", index=False) + +asyncio.run(main()) +``` + +Cost stays between $0.90 and $1.50 per 1,000 rows across all datasets tested: + +| Dataset | Entity | Rows | Dup% | F1 | Cost | $/1k rows | +| ---------------------- | ----------- | ------ | ---- | ----- | ------ | --------- | +| Small Companies | company | 200 | 8% | 1.000 | $0.18 | $0.90 | +| Medium People | person | 1,000 | 20% | 0.994 | $1.18 | $1.18 | +| Medium Transactions | transaction | 1,000 | 20% | 0.945 | $1.41 | $1.41 | +| Large Companies (Messy)| company | 3,000 | 10% | 0.974 | $3.21 | $1.07 | +| Large Products (FDA) | product | 5,000 | 5% | 0.997 | $6.37 | $1.27 | +| Company Names | company | 8,628 | 10% | 0.976 | $12.58 | $1.46 | +| FDA Products | product | 20,000 | 10% | 0.996 | $22.40 | $1.12 | + +Rough formula: **$1-1.50 per 1,000 rows** depending on data complexity. + +Every deduplication system makes two kinds of mistakes. **Over-merging** (low Precision) is data loss: distinct entities incorrectly grouped together. **Under-merging** (low Recall) means your data stays messy, but nothing is lost. At 20,000 rows, Precision is 1.000 (zero false merges) while Recall is 0.992 (8 of ~2,000 duplicates were missed). The system only merges when it's confident. + +The `equivalence_relation` parameter is the single most important input. Be specific and enumerate the fields that must match: + +```python +# Good: mentions all matching fields +equivalence_relation="Same ingredient + same strength + same applicant + same dosage form = duplicate" + +# Less good: vague +equivalence_relation="Same drug" +``` + + + + +--- + +Built with [everyrow](https://github.com/futuresearch/everyrow-sdk). See the [dedupe documentation](reference/DEDUPE) for more options. Related guides: [Resolve Duplicate Entities](/resolve-entities-python) (500-row CRM walkthrough), [Deduplicate Training Data](/deduplicate-training-data-ml) (semantic dedup for ML datasets). From 617fe461e08c5b7f9471d3e23cdf2d6f7789dc0e Mon Sep 17 00:00:00 2001 From: Dan Schwarz Date: Wed, 25 Feb 2026 15:42:08 -0800 Subject: [PATCH 2/3] Narrative rebrand: personify utilities as researcher roles Co-Authored-By: Claude Opus 4.6 --- .claude-plugin/marketplace.json | 4 +- .claude-plugin/plugin.json | 2 +- CITATION.cff | 2 +- README.md | 38 ++++++---- docs-site/src/app/layout.tsx | 2 +- docs-site/src/app/page.tsx | 12 ++-- docs/getting-started.md | 7 ++ docs/installation.mdx | 122 ++++++++++++++++---------------- docs/reference/DEDUPE.md | 9 +++ docs/reference/FORECAST.md | 9 +++ docs/reference/MERGE.md | 10 +++ docs/reference/RANK.md | 10 +++ docs/reference/RESEARCH.md | 17 +++++ docs/reference/SCREEN.md | 9 +++ everyrow-mcp/manifest.json | 22 +++--- everyrow-mcp/pyproject.toml | 2 +- everyrow-mcp/server.json | 2 +- pyproject.toml | 2 +- skills/everyrow-sdk/SKILL.md | 4 +- 19 files changed, 184 insertions(+), 101 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 7d84368d..2b3c495d 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -4,13 +4,13 @@ "name": "FutureSearch" }, "metadata": { - "description": "AI-powered data processing plugins from FutureSearch" + "description": "everyrow plugins from FutureSearch" }, "plugins": [ { "name": "everyrow", "source": "./", - "description": "Claude Code plugin for the everyrow SDK - AI-powered data processing utilities for transforming, deduping, merging, ranking, and screening dataframes", + "description": "Give Claude Code a research team. Forecast, score, classify, or research every row of a dataset.", "version": "0.4.0" } ] diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 516370f6..f5c2e2a2 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "everyrow", - "description": "Claude Code plugin for the everyrow SDK - AI-powered data processing utilities for transforming, deduping, merging, ranking, and screening dataframes", + "description": "Give Claude Code a research team. Forecast, score, classify, or research every row of a dataset.", "version": "0.4.0", "author": { "name": "FutureSearch" diff --git a/CITATION.cff b/CITATION.cff index 130819ee..0434d850 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -2,7 +2,7 @@ cff-version: 1.2.0 message: "If you use this software, please cite it as below." type: software title: "everyrow" -abstract: "Screen, rank, dedupe, and merge dataframes using natural language. Run web agents to research every row." +abstract: "A researcher for every row. Run web research agents at scale to forecast, score, classify, deduplicate, merge, or enrich entire datasets." license: MIT version: 0.4.0 date-released: 2026-02-24 diff --git a/README.md b/README.md index 3a7c2adb..761275e9 100644 --- a/README.md +++ b/README.md @@ -7,33 +7,35 @@ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![Python 3.12+](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/) -An add-on for Claude Code, Claude Desktop/Cowork, and Claude web to enable Claude to run LLM web research agents at scale. Claude uses everyrow to research entire datasets, and to intelligently sort, filter, merge, dedupe, or add columns to large datasets, via a single Python or MCP call. See the [docs site](https://everyrow.io/docs) for how to install into your Claude interface of choice. +Give yourself, or your AI, a team of researchers to gather data, forecast, score, or classify every row in a dataset. Available [standalone](https://everyrow.io/app) a Claude Code plugin, MCP server, or Python SDK. See the [docs site](https://everyrow.io/docs) for how to install into your interface of choice. The best experience is inside Claude Code. + ```bash claude plugin marketplace add futuresearch/everyrow-sdk claude plugin install everyrow@futuresearch ``` -See [here](https://everyrow.io/docs#tab-claude-desktop-mcp) for Claude Desktop/Cowork. Claude web (claude.ai) connector coming soon. Or try it directly in our hosted app that uses the Claude Agent SDK at [everyrow.io/app](https://everyrow.io/app)]. +See [here](https://everyrow.io/docs#tab-claude-desktop-mcp) for Claude Desktop/Cowork. Claude web (claude.ai) connector coming soon. Or try it directly in our hosted app that uses the Claude Agent SDK at [everyrow.io/app](https://everyrow.io/app). Get an API key at [everyrow.io/api-key](https://everyrow.io/api-key) ($20 free credit), then: ## Operations -Enable Claude to perform tens of thousands of LLM calls, or thousands of LLM web research agents, in each single operation. +Spin up a team of: -| Operation | Intelligence | Scales To | -|---|---|---| -| [**Screen**](https://everyrow.io/docs/reference/SCREEN) | Filter by criteria that need judgment | 10k rows | -| [**Rank**](https://everyrow.io/docs/reference/RANK) | Score rows from research | 10k rows | -| [**Dedupe**](https://everyrow.io/docs/reference/DEDUPE) | Deduplicate when fuzzy matching fails | 20k rows | -| [**Merge**](https://everyrow.io/docs/reference/MERGE) | Join tables when keys don't match | 5k rows | -| [**Research**](https://everyrow.io/docs/reference/RESEARCH) | Web research on every row | 10k rows | +| Role | What it does | Cost | Scales To | +| ---- | ------------ | ---- | --------- | +| [**Agents**](https://everyrow.io/docs/reference/RESEARCH) | Research, then analyze | 1–3¢/researcher | 10k rows | +| [**Forecasters**](https://everyrow.io/docs/reference/FORECAST) | Predict outcomes | 20-50¢/researcher | 10k rows | +| [**Scorers**](https://everyrow.io/docs/reference/RANK) | Research, then score | 1-5¢/researcher | 10k rows | +| [**Classifiers**](https://everyrow.io/docs/reference/SCREEN) | Research, then categorize | 0.1-0.7¢/researcher | 10k rows | +| [**Matchers**](https://everyrow.io/docs/reference/MERGE) | Find matching rows | 0.2-0.5¢/researcher | 20k rows | See the full [API reference](https://everyrow.io/docs/api), [guides](https://everyrow.io/docs/guides), and [case studies](https://everyrow.io/docs/case-studies), (for example, see our [case study](https://everyrow.io/docs/case-studies/llm-web-research-agents-at-scale) running a `Research` task on 10k rows, running agents that used 120k LLM calls.) Or just ask Claude in your interface of choice: + ``` Label this 5,000 row CSV with the right categories. ``` @@ -50,7 +52,7 @@ Rank these 2,000 people from Wikipedia on who is the most bullish on AI. ## Web Agents -The most basic utility to build from is `agent_map`, to have LLM web research agents work on every row of the dataframe. Agents are tuned on [Deep Research Bench](https://arxiv.org/abs/2506.06287), our benchmark for questions that need extensive searching and cross-referencing, and tuned to get correct answers at minimal cost. +The base operation is `agent_map`: one web research agent per row. The other operations (rank, classify, forecast, merge, dedupe) use the agents under the hood as necessary. Agents are tuned on [Deep Research Bench](https://arxiv.org/abs/2506.06287), our benchmark for questions that need extensive searching and cross-referencing, and tuned to get correct answers at minimal cost. Under the hood, Claude will: @@ -83,7 +85,6 @@ print(result.data.head()) See the API [docs](https://everyrow.io/docs/reference/RESEARCH.md), a case study of [labeling data](https://everyrow.io/docs/classify-dataframe-rows-llm) or a case study for [researching government data](https://everyrow.io/docs/case-studies/research-and-rank-permit-times) at scale. - ## Sessions You can also use a session to output a URL to see the research and data processing in the [everyrow.io/app](https://everyrow.io/app) application, which streams the research and makes charts. Or you can use it purely as an intelligent data utility, and [chain intelligent pandas operations](https://everyrow.io/docs/chaining-operations) with normal pandas operations where LLMs are used to process every row. @@ -127,14 +128,18 @@ df = await fetch_task_data("12345678-1234-1234-1234-123456789abc") ### Other AI agent plugins #### Gemini CLI + [Official Docs](https://geminicli.com/docs/extensions/#installing-an-extension). Ensure that you're using version >= 0.25.0 + ```sh gemini --version gemini extensions install https://github.com/futuresearch/everyrow-sdk gemini extensions enable everyrow [--scope ] ``` + Then within the CLI + ```sh /settings > Preview Features > Enable /settings > Agent Skills > Enable @@ -144,21 +149,28 @@ Then within the CLI ``` #### Codex CLI + [Official docs](https://developers.openai.com/codex/skills#install-new-skills). Install from GitHub using the built-in skill installer, requested via natural language: + ```sh codex $skill-installer from the futuresearch/everyrow-sdk github repo, install the everyrow-sdk skill at --path skills/everyrow-sdk ``` + Or install directly: + ```sh python ~/.codex/skills/.system/skill-installer/scripts/install-skill-from-github.py \ --repo futuresearch/everyrow-sdk --path skills/everyrow-sdk ``` + Restart Codex to pick up the new skill. #### Cursor + [Official docs](https://cursor.com/docs/context/skills#installing-skills-from-github). + ```sh 1. Open Cursor Settings → Rules 2. In the Project Rules section, click Add Rule @@ -232,7 +244,7 @@ uv run basedpyright # type check ## About -Built by [FutureSearch](https://futuresearch.ai). We kept running into the same data problems: ranking leads, deduping messy CRM exports, merging tables without clean keys. Tedious for humans, but needs judgment that automation can't handle. So we built this. +Built by [FutureSearch](https://futuresearch.ai). [everyrow.io](https://everyrow.io) (app/dashboard) · [case studies](https://futuresearch.ai/solutions/) · [research](https://futuresearch.ai/research/) diff --git a/docs-site/src/app/layout.tsx b/docs-site/src/app/layout.tsx index 02a0ab24..684e88e6 100644 --- a/docs-site/src/app/layout.tsx +++ b/docs-site/src/app/layout.tsx @@ -22,7 +22,7 @@ const jetbrainsMono = JetBrains_Mono({ export const metadata: Metadata = { metadataBase: new URL("https://everyrow.io"), title: "Everyrow Documentation", - description: "Documentation for the Everyrow SDK - AI-powered data operations for pandas DataFrames", + description: "EveryRow documentation. A researcher for every row. Forecast, score, classify, or research entire datasets.", openGraph: { siteName: "Everyrow", type: "website", diff --git a/docs-site/src/app/page.tsx b/docs-site/src/app/page.tsx index 7bbb097a..fdf4095b 100644 --- a/docs-site/src/app/page.tsx +++ b/docs-site/src/app/page.tsx @@ -7,14 +7,14 @@ import { MDXContent } from "@/components/MDXContent"; export const metadata: Metadata = { title: "Everyrow Documentation", description: - "Run LLM Research Agents at Scale", + "A researcher for every row. Forecast, score, classify, or research entire datasets.", alternates: { canonical: "https://everyrow.io/docs", }, openGraph: { title: "Everyrow Documentation", description: - "Run LLM Research Agents at Scale", + "A researcher for every row. Forecast, score, classify, or research entire datasets.", url: "https://everyrow.io/docs", images: [{ url: "https://everyrow.io/everyrow-og.png" }], }, @@ -27,9 +27,9 @@ const SECTION_ICONS: Record = { }; const SECTION_DESCRIPTIONS: Record = { - Guides: "Step-by-step tutorials for common data processing tasks", - "API Reference": "Detailed documentation for all everyrow functions", - "Case Studies": "Real-world examples with Jupyter notebooks", + Guides: "Step-by-step tutorials for web research at scale", + "API Reference": "API reference for all everyrow operations", + "Case Studies": "Real-world examples with verified results", }; const SECTION_LINKS: Record = { @@ -102,7 +102,7 @@ export default async function DocsHome() {

everyrow documentation

- Run LLM Research Agents at Scale + A researcher for every row

diff --git a/docs/getting-started.md b/docs/getting-started.md index 5d1b0bce..1361ea62 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -7,6 +7,13 @@ description: Install everyrow and run your first operation. Everyrow lets you perform qualitative data transformations on noisy real-world data, at quantitative scale. Define your fuzzy logic concisely in natural language, and everyrow handles the complexity of orchestrating the execution. +**Using Claude Code?** Install the plugin and ask Claude in natural language: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + ## Prerequisites - Python 3.12+ diff --git a/docs/installation.mdx b/docs/installation.mdx index f097c9cb..6154ca1d 100644 --- a/docs/installation.mdx +++ b/docs/installation.mdx @@ -11,6 +11,67 @@ Select your platform and integration method below. + + +Install the everyrow plugin from the marketplace: + +```bash +claude plugin marketplace add futuresearch/everyrow-sdk +claude plugin install everyrow@futuresearch +``` + +This installs both the skill and MCP server together. You can toggle each on/off in Claude Code settings. + +**Important:** be sure to supply your API key when launching Claude Code: + +```bash +export EVERYROW_API_KEY=sk-cho... +claude +``` + +You can optionally configure Claude Code to show a [progress bar](/docs/progress-monitoring#progress-bar) for long-running tasks. + +[Official Claude Code Plugin Docs](https://code.claude.com/docs/en/discover-plugins#add-from-github) + + + + + +Add everyrow to your MCP config (requires [uv](https://docs.astral.sh/uv/)): + +```json +{ + "mcpServers": { + "everyrow": { + "command": "uvx", + "args": ["everyrow-mcp"], + "env": { + "EVERYROW_API_KEY": "${EVERYROW_API_KEY}" + } + } + } +} +``` + +Or install with pip and use `"command": "everyrow-mcp"` instead of uvx. + +Config file location: +- **User scope:** `~/.claude.json` (in the `mcpServers` field) +- **Project scope:** `.mcp.json` in your project root + +[Choosing the right scope](https://code.claude.com/docs/en/mcp#choosing-the-right-scope) + +**Important:** either insert your API key when creating the JSON file, or supply the key when launching Claude Code: + +```bash +export EVERYROW_API_KEY=sk-cho... +claude +``` + +You can optionally configure Claude Code to show a [progress bar](/docs/progress-monitoring#progress-bar) for long-running tasks. + + + ```bash @@ -92,67 +153,6 @@ See the [API Reference](/docs/api) for full documentation. - - -Add everyrow to your MCP config (requires [uv](https://docs.astral.sh/uv/)): - -```json -{ - "mcpServers": { - "everyrow": { - "command": "uvx", - "args": ["everyrow-mcp"], - "env": { - "EVERYROW_API_KEY": "${EVERYROW_API_KEY}" - } - } - } -} -``` - -Or install with pip and use `"command": "everyrow-mcp"` instead of uvx. - -Config file location: -- **User scope:** `~/.claude.json` (in the `mcpServers` field) -- **Project scope:** `.mcp.json` in your project root - -[Choosing the right scope](https://code.claude.com/docs/en/mcp#choosing-the-right-scope) - -**Important:** either insert your API key when creating the JSON file, or supply the key when launching Claude Code: - -```bash -export EVERYROW_API_KEY=sk-cho... -claude -``` - -You can optionally configure Claude Code to show a [progress bar](/docs/progress-monitoring#progress-bar) for long-running tasks. - - - - - -Install the everyrow plugin from the marketplace: - -```bash -claude plugin marketplace add futuresearch/everyrow-sdk -claude plugin install everyrow@futuresearch -``` - -This installs both the skill and MCP server together. You can toggle each on/off in Claude Code settings. - -**Important:** be sure to supply your API key when launching Claude Code: - -```bash -export EVERYROW_API_KEY=sk-cho... -claude -``` - -You can optionally configure Claude Code to show a [progress bar](/docs/progress-monitoring#progress-bar) for long-running tasks. - -[Official Claude Code Plugin Docs](https://code.claude.com/docs/en/discover-plugins#add-from-github) - - - First, make sure you have [uv installed](https://docs.astral.sh/uv/). diff --git a/docs/reference/DEDUPE.md b/docs/reference/DEDUPE.md index 74499406..3a426317 100644 --- a/docs/reference/DEDUPE.md +++ b/docs/reference/DEDUPE.md @@ -136,6 +136,15 @@ Output (selected rows only): | 500 | ~2 min | ~$1.67 | | 2,000 | ~8 min | ~$7 | +## Via MCP + +MCP tool: `everyrow_dedupe` + +| Parameter | Type | Description | +|-----------|------|-------------| +| `csv_path` | string | Path to input CSV file | +| `equivalence_relation` | string | What makes two rows duplicates | + ## Related docs ### Guides diff --git a/docs/reference/FORECAST.md b/docs/reference/FORECAST.md index 4d1e6aed..f5be0b3d 100644 --- a/docs/reference/FORECAST.md +++ b/docs/reference/FORECAST.md @@ -79,6 +79,15 @@ Probabilities are clamped to [3, 97]—even near-certain outcomes retain residua | 5 | ~6 min | ~$3 | | 20 | ~10 min | ~$12 | +## Via MCP + +MCP tool: `everyrow_forecast` + +| Parameter | Type | Description | +|-----------|------|-------------| +| `csv_path` | string | Path to CSV with questions (one per row) | +| `context` | string | Optional batch-level context for all questions | + ## Related docs ### Blog posts diff --git a/docs/reference/MERGE.md b/docs/reference/MERGE.md index 1db2a38b..79eddede 100644 --- a/docs/reference/MERGE.md +++ b/docs/reference/MERGE.md @@ -69,6 +69,16 @@ A DataFrame with all left table columns plus matched right table columns. Rows t | 2,000 × 50 | ~8 min | ~$9 | | 1,000 × 1,000 | ~12 min | ~$15 | +## Via MCP + +MCP tool: `everyrow_merge` + +| Parameter | Type | Description | +|-----------|------|-------------| +| `left_csv_path` | string | Path to the table being enriched (left join) | +| `right_csv_path` | string | Path to the lookup/reference table | +| `task` | string | How to match rows across tables | + ## Related docs ### Guides diff --git a/docs/reference/RANK.md b/docs/reference/RANK.md index 52eb4c22..93112eba 100644 --- a/docs/reference/RANK.md +++ b/docs/reference/RANK.md @@ -81,6 +81,16 @@ When specifying a response model, make sure that it contains `field_name`. Other | `ascending_order` | bool | True = lowest first (default) | | `preview` | bool | True = process only a few rows | +## Via MCP + +MCP tool: `everyrow_rank` + +| Parameter | Type | Description | +|-----------|------|-------------| +| `csv_path` | string | Path to input CSV file | +| `task` | string | How to score each row | +| `field_name` | string | Column name for the score | + ## Related docs ### Guides diff --git a/docs/reference/RESEARCH.md b/docs/reference/RESEARCH.md index 71af7058..5008fd54 100644 --- a/docs/reference/RESEARCH.md +++ b/docs/reference/RESEARCH.md @@ -131,6 +131,23 @@ companies = await single_agent( ) ``` +## Via MCP + +MCP tools: `everyrow_agent` (DataFrame), `everyrow_single_agent` (single question) + +**everyrow_agent:** + +| Parameter | Type | Description | +|-----------|------|-------------| +| `csv_path` | string | Path to input CSV file | +| `task` | string | What to research for each row | + +**everyrow_single_agent:** + +| Parameter | Type | Description | +|-----------|------|-------------| +| `task` | string | The question to research | + ## Related docs ### Guides diff --git a/docs/reference/SCREEN.md b/docs/reference/SCREEN.md index 358980bc..68f1f4ce 100644 --- a/docs/reference/SCREEN.md +++ b/docs/reference/SCREEN.md @@ -90,6 +90,15 @@ class Detailed(BaseModel): Compare: regex on "remote-friendly" job postings gets 68% precision. +## Via MCP + +MCP tool: `everyrow_screen` + +| Parameter | Type | Description | +|-----------|------|-------------| +| `csv_path` | string | Path to input CSV file | +| `task` | string | What should pass | + ## Related docs ### Guides diff --git a/everyrow-mcp/manifest.json b/everyrow-mcp/manifest.json index 155c064d..86e5b4d6 100644 --- a/everyrow-mcp/manifest.json +++ b/everyrow-mcp/manifest.json @@ -3,8 +3,8 @@ "name": "everyrow-mcp", "display_name": "Everyrow MCP Server", "version": "0.4.0", - "description": "AI-powered dataframe ops: transform, dedupe, merge, rank, and screen with natural language", - "long_description": "MCP server for everyrow: agent ops at spreadsheet scale. This server exposes everyrow's 5 core operations as MCP tools, allowing LLM applications to screen, rank, dedupe, merge, and run agents on CSV files. All tools operate on local CSV files.", + "description": "Give your AI a research team. Forecast, score, classify, or research every row of a dataset.", + "long_description": "MCP server for everyrow: give your AI a research team. Each operation dispatches web research agents across a dataset to forecast, score, classify, deduplicate, merge, or research at scale.", "author": { "name": "FutureSearch", "url": "https://everyrow.io" @@ -31,39 +31,39 @@ "tools": [ { "name": "everyrow_screen", - "description": "Filter rows in a CSV file based on any criteria." + "description": "Filter rows — researches each row to evaluate pass/fail." }, { "name": "everyrow_rank", - "description": "Score and sort rows in a CSV file based on any criteria." + "description": "Score and rank rows — researches each row to compute the score." }, { "name": "everyrow_dedupe", - "description": "Remove duplicate rows from a CSV file using semantic equivalence." + "description": "Deduplicate — researches rows to resolve entity matches." }, { "name": "everyrow_merge", - "description": "Join two CSV files using intelligent entity matching." + "description": "Join two CSVs — researches to match rows across tables." }, { "name": "everyrow_agent", - "description": "Run web research agents on each row of a CSV file." + "description": "Research every row and add new columns." }, { "name": "everyrow_forecast", - "description": "Forecast the probability of binary questions from a CSV file." + "description": "Forecast — researches each question to predict outcomes." }, { "name": "everyrow_single_agent", - "description": "Run a single web research agent on a task, optionally with context data." + "description": "Research a single question." }, { "name": "everyrow_progress", - "description": "Check progress of a running task. Blocks briefly to limit the polling rate." + "description": "Check progress of a running task." }, { "name": "everyrow_results", - "description": "Retrieve results from a completed everyrow task and save them to a CSV." + "description": "Retrieve results from a completed task." }, { "name": "everyrow_list_sessions", diff --git a/everyrow-mcp/pyproject.toml b/everyrow-mcp/pyproject.toml index cae06735..60a87dd6 100644 --- a/everyrow-mcp/pyproject.toml +++ b/everyrow-mcp/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "everyrow-mcp" version = "0.4.0" -description = "MCP server for everyrow: agent ops at spreadsheet scale" +description = "MCP server for everyrow: a researcher for every row" readme = "README.md" requires-python = ">=3.12" dependencies = [ diff --git a/everyrow-mcp/server.json b/everyrow-mcp/server.json index dabf3e63..7605a8ac 100644 --- a/everyrow-mcp/server.json +++ b/everyrow-mcp/server.json @@ -2,7 +2,7 @@ "$schema": "https://static.modelcontextprotocol.io/schemas/2025-12-11/server.schema.json", "name": "io.github.futuresearch/everyrow-mcp", "title": "Everyrow MCP Server", - "description": "AI-powered dataframe ops: transform, dedupe, merge, rank, and screen with natural language", + "description": "Give your AI a research team. Forecast, score, classify, or research every row of a dataset.", "repository": { "url": "https://github.com/futuresearch/everyrow-sdk", "source": "github", diff --git a/pyproject.toml b/pyproject.toml index 42407b96..3e9efb51 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ everyrow = { workspace = true } [project] name = "everyrow" version = "0.4.0" -description = "An SDK for everyrow.io: agent ops at spreadsheet scale" +description = "A researcher for every row. Forecast, score, classify, or research entire datasets." readme = "README.md" requires-python = ">=3.12" dependencies = [ diff --git a/skills/everyrow-sdk/SKILL.md b/skills/everyrow-sdk/SKILL.md index 61163688..f5fd7b49 100644 --- a/skills/everyrow-sdk/SKILL.md +++ b/skills/everyrow-sdk/SKILL.md @@ -1,11 +1,11 @@ --- name: everyrow-sdk -description: Helps write Python code using the everyrow SDK for AI-powered data processing - transforming, deduping, merging, ranking, and screening dataframes with natural language instructions +description: Use when the user wants Claude to dispatch researchers to forecast, score, classify, or add to a dataset at scale. --- # everyrow SDK -The everyrow SDK provides intelligent data processing utilities powered by AI agents. Use this skill when writing Python code that needs to: +everyrow gives Claude a research team for your data. Use this skill when writing Python code that needs to: > **Documentation**: For detailed guides, case studies, and API reference, see: > - Docs site: [everyrow.io/docs](https://everyrow.io/docs) From 5288443880e992c9c0966e0a9f2d929f2bfc3e1a Mon Sep 17 00:00:00 2001 From: Dan Schwarz Date: Wed, 25 Feb 2026 15:46:46 -0800 Subject: [PATCH 3/3] Fix manifest sync test and broken doc links Co-Authored-By: Claude Opus 4.6 --- docs/add-column-web-lookup.mdx | 2 +- docs/classify-dataframe-rows-llm.mdx | 2 +- everyrow-mcp/manifest.json | 18 +++++++++--------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/add-column-web-lookup.mdx b/docs/add-column-web-lookup.mdx index 060c3357..11b37b48 100644 --- a/docs/add-column-web-lookup.mdx +++ b/docs/add-column-web-lookup.mdx @@ -166,4 +166,4 @@ Each result includes a `research` column showing how the agent found the answer, --- -Built with [everyrow](https://github.com/futuresearch/everyrow-sdk). See the [agent_map documentation](reference/AGENT_MAP) for more options including response models and effort levels. +Built with [everyrow](https://github.com/futuresearch/everyrow-sdk). See the [agent_map documentation](reference/RESEARCH) for more options including response models and effort levels. diff --git a/docs/classify-dataframe-rows-llm.mdx b/docs/classify-dataframe-rows-llm.mdx index 1ea94349..f7d15ea7 100644 --- a/docs/classify-dataframe-rows-llm.mdx +++ b/docs/classify-dataframe-rows-llm.mdx @@ -190,4 +190,4 @@ Without web research agents, everyrow can classify data for ~$0.009 per row, or --- -Built with [everyrow](https://github.com/futuresearch/everyrow-sdk). See the [agent_map documentation](reference/AGENT_MAP) for more options including response models and effort levels. +Built with [everyrow](https://github.com/futuresearch/everyrow-sdk). See the [agent_map documentation](reference/RESEARCH) for more options including response models and effort levels. diff --git a/everyrow-mcp/manifest.json b/everyrow-mcp/manifest.json index 86e5b4d6..56d82107 100644 --- a/everyrow-mcp/manifest.json +++ b/everyrow-mcp/manifest.json @@ -31,39 +31,39 @@ "tools": [ { "name": "everyrow_screen", - "description": "Filter rows — researches each row to evaluate pass/fail." + "description": "Filter rows in a CSV file based on any criteria." }, { "name": "everyrow_rank", - "description": "Score and rank rows — researches each row to compute the score." + "description": "Score and sort rows in a CSV file based on any criteria." }, { "name": "everyrow_dedupe", - "description": "Deduplicate — researches rows to resolve entity matches." + "description": "Remove duplicate rows from a CSV file using semantic equivalence." }, { "name": "everyrow_merge", - "description": "Join two CSVs — researches to match rows across tables." + "description": "Join two CSV files using intelligent entity matching." }, { "name": "everyrow_agent", - "description": "Research every row and add new columns." + "description": "Run web research agents on each row of a CSV file." }, { "name": "everyrow_forecast", - "description": "Forecast — researches each question to predict outcomes." + "description": "Forecast the probability of binary questions from a CSV file." }, { "name": "everyrow_single_agent", - "description": "Research a single question." + "description": "Run a single web research agent on a task, optionally with context data." }, { "name": "everyrow_progress", - "description": "Check progress of a running task." + "description": "Check progress of a running task. Blocks briefly to limit the polling rate." }, { "name": "everyrow_results", - "description": "Retrieve results from a completed task." + "description": "Retrieve results from a completed everyrow task and save them to a CSV." }, { "name": "everyrow_list_sessions",