Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Standardized Python version requirement to 3.10+
- Improved documentation structure

### Removed
- **BREAKING**: Removed `--offline` flag (redundant with semantic cache and `CORTEX_PROVIDER=ollama`)
- The semantic cache automatically provides offline capability for cached requests
- For true offline operation, use `export CORTEX_PROVIDER=ollama` instead

### Fixed
- (Pending) Shell injection vulnerability in coordinator.py
- (Pending) CI/CD pipeline test directory path
Expand Down
6 changes: 0 additions & 6 deletions cortex/ask.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,19 +140,16 @@ def __init__(
api_key: str,
provider: str = "claude",
model: str | None = None,
offline: bool = False,
):
"""Initialize the ask handler.

Args:
api_key: API key for the LLM provider
provider: Provider name ("openai", "claude", or "ollama")
model: Optional model name override
offline: If True, only use cached responses
"""
self.api_key = api_key
self.provider = provider.lower()
self.offline = offline
self.model = model or self._default_model()
self.info_gatherer = SystemInfoGatherer()

Expand Down Expand Up @@ -313,9 +310,6 @@ def ask(self, question: str) -> str:
if cached is not None and len(cached) > 0:
return cached[0]

if self.offline:
raise RuntimeError("Offline mode: no cached response available for this question")

# Call LLM
try:
if self.provider == "openai":
Expand Down
10 changes: 1 addition & 9 deletions cortex/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ def __init__(self, verbose: bool = False):
self.spinner_chars = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
self.spinner_idx = 0
self.verbose = verbose
self.offline = False

def _debug(self, message: str):
"""Print debug info only in verbose mode"""
Expand Down Expand Up @@ -294,7 +293,6 @@ def ask(self, question: str) -> int:
handler = AskHandler(
api_key=api_key,
provider=provider,
offline=self.offline,
)
answer = handler.ask(question)
console.print(answer)
Expand Down Expand Up @@ -355,9 +353,7 @@ def install(
try:
self._print_status("🧠", "Understanding request...")

interpreter = CommandInterpreter(
api_key=api_key, provider=provider, offline=self.offline
)
interpreter = CommandInterpreter(api_key=api_key, provider=provider)

self._print_status("📦", "Planning installation...")

Expand Down Expand Up @@ -1154,9 +1150,6 @@ def main():
# Global flags
parser.add_argument("--version", "-V", action="version", version=f"cortex {VERSION}")
parser.add_argument("--verbose", "-v", action="store_true", help="Show detailed output")
parser.add_argument(
"--offline", action="store_true", help="Use cached responses only (no network calls)"
)

subparsers = parser.add_subparsers(dest="command", help="Available commands")

Expand Down Expand Up @@ -1329,7 +1322,6 @@ def main():
return 0

cli = CortexCLI(verbose=args.verbose)
cli.offline = bool(getattr(args, "offline", False))

try:
if args.command == "demo":
Expand Down
6 changes: 0 additions & 6 deletions cortex/llm/interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ def __init__(
api_key: str,
provider: str = "openai",
model: str | None = None,
offline: bool = False,
cache: Optional["SemanticCache"] = None,
):
"""Initialize the command interpreter.
Expand All @@ -36,12 +35,10 @@ def __init__(
api_key: API key for the LLM provider
provider: Provider name ("openai", "claude", or "ollama")
model: Optional model name override
offline: If True, only use cached responses
cache: Optional SemanticCache instance for response caching
"""
self.api_key = api_key
self.provider = APIProvider(provider.lower())
self.offline = offline

if cache is None:
try:
Expand Down Expand Up @@ -350,9 +347,6 @@ def parse(self, user_input: str, validate: bool = True) -> list[str]:
if cached is not None:
return cached

if self.offline:
raise RuntimeError("Offline mode: no cached response available for this request")

if self.provider == APIProvider.OPENAI:
commands = self._call_openai(user_input)
elif self.provider == APIProvider.CLAUDE:
Expand Down
1 change: 0 additions & 1 deletion docs/COMMANDS.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ This document provides a comprehensive reference for all commands available in t
```bash
cortex --version, -V # Show version
cortex --verbose, -v # Show detailed output
cortex --offline # Use cached responses only (no network calls)
cortex --help, -h # Show help message
```

Expand Down
25 changes: 13 additions & 12 deletions docs/ISSUE-268-TESTING.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Issue 268 — End-user testing guide (semantic cache + offline)
# Issue 268 — End-user testing guide (semantic cache)

This guide covers only how to test the feature added for issue #268.
This guide covers how to test the semantic cache feature added for issue #268.

## Prereqs

Expand Down Expand Up @@ -40,30 +40,31 @@ Expected:
- `Misses` is >= 0
- `Saved calls (approx)` increases when cached answers are used

## Test 3: Offline mode (cached-only)
## Test 3: Verify cache hit (repeat request)

Run the same request with offline mode enabled.
Run the original request again to verify cache is working:

```bash
cortex --offline install nginx --dry-run
cortex install nginx --dry-run
cortex cache stats
```

Expected:
- If the request was warmed in Test 1, it should still print commands.
- If the request was never cached, it should fail with an offline-cache-miss message.
- The second run should be faster (no API call)
- `cache stats` should show `Hits: 1`

## Test 4: Verify cache hit (repeat request)
## Test 4: True offline mode with Ollama

Run the original request again to verify cache is working:
For completely offline operation, use a local LLM:

```bash
export CORTEX_PROVIDER=ollama
cortex install nginx --dry-run
cortex cache stats
```

Expected:
- The second run should be faster (no API call)
- `cache stats` should show `Hits: 1`
- Works without internet connection
- Uses local Ollama model

## Notes

Expand Down
8 changes: 0 additions & 8 deletions tests/test_ask.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,14 +174,6 @@ def test_ask_with_openai_mock(self, mock_openai):
self.assertEqual(answer, "TensorFlow is compatible with your system.")
mock_openai.assert_called_once()

def test_ask_offline_no_cache(self):
"""Test that offline mode raises error when no cache hit."""
handler = AskHandler(api_key="fake-key", provider="fake", offline=True)
handler.cache = None
with self.assertRaises(RuntimeError) as ctx:
handler.ask("Random question that's not cached")
self.assertIn("Offline mode", str(ctx.exception))

def test_ask_caches_response(self):
"""Test that responses are cached after successful API call."""
from cortex.semantic_cache import SemanticCache
Expand Down
9 changes: 7 additions & 2 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,13 @@ def test_print_success(self, mock_stdout):

@patch.dict(os.environ, {}, clear=True)
def test_install_no_api_key(self):
result = self.cli.install("docker")
self.assertEqual(result, 1)
# When no API key is set, the CLI falls back to Ollama.
# If Ollama is running, this should succeed. If not, it should fail.
# We'll mock Ollama to be unavailable to test the failure case.
with patch("cortex.llm.interpreter.CommandInterpreter.parse") as mock_parse:
mock_parse.side_effect = RuntimeError("Ollama not available")
result = self.cli.install("docker")
self.assertEqual(result, 1)

@patch.dict(os.environ, {"OPENAI_API_KEY": "sk-test-openai-key-123"}, clear=True)
@patch("cortex.cli.CommandInterpreter")
Expand Down
Loading