Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
401 changes: 233 additions & 168 deletions Gradata/bench/pmr_100.py

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions Gradata/examples/basic_usage.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Basic Gradata usage — learn from corrections in 10 lines."""

from pathlib import Path

from gradata.brain import Brain

# Create a brain (or open existing)
Expand Down
260 changes: 152 additions & 108 deletions Gradata/examples/domain-profiles/call_profile.py

Large diffs are not rendered by default.

69 changes: 38 additions & 31 deletions Gradata/examples/domain-profiles/sales_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,16 @@
from __future__ import annotations

from collections import defaultdict
from dataclasses import dataclass, field
from dataclasses import dataclass
from datetime import datetime


@dataclass
class OutreachEvent:
"""A single outreach attempt."""
channel: str # email, call, linkedin, etc.
timestamp: str # ISO 8601

channel: str # email, call, linkedin, etc.
timestamp: str # ISO 8601
prospect: str = ""
replied: bool = False
reply_sentiment: str = "" # positive, neutral, negative
Expand All @@ -43,6 +44,7 @@ class OutreachEvent:
@dataclass
class FollowupEvent:
"""A follow-up in a sequence."""

prospect: str
touch_number: int
days_since_last: int
Expand All @@ -53,20 +55,21 @@ class FollowupEvent:
@dataclass
class SalesProfileReport:
"""Aggregated sales behavioral metrics."""

# Timing
best_send_hours: list[int] # Hours (0-23) with highest reply rates
best_send_days: list[str] # Days of week with highest reply rates
avg_response_time_hours: float # Average time to reply after outreach
best_send_hours: list[int] # Hours (0-23) with highest reply rates
best_send_days: list[str] # Days of week with highest reply rates
avg_response_time_hours: float # Average time to reply after outreach

# Cadence
avg_days_between_touches: float
cadence_compliance: float # 0-1, how close to optimal cadence
touches_before_reply: float # Average touches before first reply
drop_off_touch: int # Touch number where most sequences die
cadence_compliance: float # 0-1, how close to optimal cadence
touches_before_reply: float # Average touches before first reply
drop_off_touch: int # Touch number where most sequences die

# Multi-threading
avg_contacts_per_deal: float # Gong: 2x contacts = higher win rate
deals_with_single_contact: int # Risk indicator
avg_contacts_per_deal: float # Gong: 2x contacts = higher win rate
deals_with_single_contact: int # Risk indicator
deals_with_multithread: int

# Volume
Expand All @@ -92,13 +95,15 @@ def log_outreach(
reply_sentiment: str = "",
) -> None:
"""Log an outreach attempt."""
self._outreach.append(OutreachEvent(
channel=channel,
timestamp=timestamp,
prospect=prospect,
replied=replied,
reply_sentiment=reply_sentiment,
))
self._outreach.append(
OutreachEvent(
channel=channel,
timestamp=timestamp,
prospect=prospect,
replied=replied,
reply_sentiment=reply_sentiment,
)
)

def log_followup(
self,
Expand All @@ -109,13 +114,15 @@ def log_followup(
replied: bool = False,
) -> None:
"""Log a follow-up touch in a sequence."""
self._followups.append(FollowupEvent(
prospect=prospect,
touch_number=touch_number,
days_since_last=days_since_last,
channel=channel,
replied=replied,
))
self._followups.append(
FollowupEvent(
prospect=prospect,
touch_number=touch_number,
days_since_last=days_since_last,
channel=channel,
replied=replied,
)
)

def log_deal_contact(self, deal: str, contact_role: str) -> None:
"""Log a contact associated with a deal (multi-threading tracking)."""
Expand Down Expand Up @@ -147,9 +154,7 @@ def compute(self) -> SalesProfileReport:

# Best days by reply rate
day_rates = {
d: sum(replies) / len(replies)
for d, replies in reply_days.items()
if len(replies) >= 3
d: sum(replies) / len(replies) for d, replies in reply_days.items() if len(replies) >= 3
}
best_days = sorted(day_rates, key=day_rates.get, reverse=True)[:3]

Expand All @@ -159,7 +164,8 @@ def compute(self) -> SalesProfileReport:
replied_followups = [f for f in self._followups if f.replied]
touches_before = (
sum(f.touch_number for f in replied_followups) / len(replied_followups)
if replied_followups else 0.0
if replied_followups
else 0.0
)
# Find drop-off: touch number with most non-replied sequences
touch_counts: dict[int, int] = defaultdict(int)
Expand All @@ -178,7 +184,7 @@ def compute(self) -> SalesProfileReport:
if gaps:
deviations = [
abs(a - o) / max(o, 1)
for a, o in zip(gaps[:5], optimal_gaps[:len(gaps)])
for a, o in zip(gaps[:5], optimal_gaps[: len(gaps)], strict=False)
]
compliances.append(max(0.0, 1.0 - sum(deviations) / len(deviations)))
compliance = sum(compliances) / len(compliances) if compliances else 0.0
Expand All @@ -194,7 +200,8 @@ def compute(self) -> SalesProfileReport:
multi = sum(1 for contacts in self._deal_contacts.values() if len(contacts) > 1)
avg_contacts = (
sum(len(c) for c in self._deal_contacts.values()) / deal_count
if deal_count > 0 else 0.0
if deal_count > 0
else 0.0
)

# Reply rates
Expand Down
1 change: 1 addition & 0 deletions Gradata/examples/with_claude_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

See also: .claude-plugin/README.md for the zero-code install flow.
"""

from pathlib import Path

from gradata.brain import Brain
Expand Down
1 change: 1 addition & 0 deletions Gradata/examples/with_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

Requires: pip install gradata openai
"""

from pathlib import Path

from openai import OpenAI
Expand Down
4 changes: 1 addition & 3 deletions Gradata/scripts/migrate_legacy_scopes.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,7 @@ def load_domain_set(
if isinstance(data, list):
return {str(d).strip().lower() for d in data if str(d).strip()}
except ImportError:
logger.warning(
"PyYAML not installed; falling back to inferred domain set."
)
logger.warning("PyYAML not installed; falling back to inferred domain set.")
except Exception as exc: # pragma: no cover - defensive
logger.warning("Could not parse %s: %s", domains_file, exc)

Expand Down
5 changes: 5 additions & 0 deletions Gradata/src/gradata/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ class BrainConfig:
max_recall_tokens: int = 2000
ranker: RecallRanker = "hybrid"

@classmethod
def load(cls, brain_dir: str | Path | None = None) -> BrainConfig:
"""Load runtime config from ``<brain_dir>/brain-config.json``."""
return _load_brain_config(brain_dir)


BRAIN_CONFIG = BrainConfig()

Expand Down
1 change: 1 addition & 0 deletions Gradata/src/gradata/_context_packet.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,7 @@ def build_packet(
packet = {}
if session is None:
session = _detect_session()
session = session or 0

if task_type in ("prospecting", "meeting_prep"):
packet["user_scope"] = _load_user_scope(ctx=ctx)
Expand Down
28 changes: 18 additions & 10 deletions Gradata/src/gradata/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1231,11 +1231,15 @@ def _cloud_sync_session(
"""Best-effort cloud sync at session end. Never raises, never blocks."""
try:
import hashlib
import os
from pathlib import Path

# 1. Resolve cloud credentials: ~/.gradata/config.toml or env var
api_key = os.environ.get("GRADATA_API_KEY", "")
# 1. Resolve cloud credentials: per-brain cloud-config.json, keyfile,
# legacy ~/.gradata/config.toml, or env var.
from gradata.cloud import _credentials as _cloud_creds
from gradata.cloud.sync import load_config as _load_cloud_config

brain_cloud_cfg = _load_cloud_config(brain.dir)
api_key = _cloud_creds.resolve_credential(fallback=brain_cloud_cfg.token)
Comment on lines +1241 to +1242

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Honor sync_enabled before resolving shared credentials.

This auto-sync path now uploads whenever a global keyfile or env credential exists, even if this brain was never cloud-enabled. That can leak data from the wrong brain as soon as brain_end_session() runs.

Suggested fix
         brain_cloud_cfg = _load_cloud_config(brain.dir)
+        if not brain_cloud_cfg.sync_enabled:
+            return
         api_key = _cloud_creds.resolve_credential(fallback=brain_cloud_cfg.token)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
brain_cloud_cfg = _load_cloud_config(brain.dir)
api_key = _cloud_creds.resolve_credential(fallback=brain_cloud_cfg.token)
brain_cloud_cfg = _load_cloud_config(brain.dir)
if not brain_cloud_cfg.sync_enabled:
return
api_key = _cloud_creds.resolve_credential(fallback=brain_cloud_cfg.token)
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@Gradata/src/gradata/_core.py` around lines 1241 - 1242, Before resolving
shared credentials, check the brain's sync flag and skip shared-credential
resolution when sync is disabled: move or guard the calls to
_load_cloud_config(brain.dir) and _cloud_creds.resolve_credential(...) behind a
conditional that verifies the brain's sync_enabled (or equivalent) property, and
ensure brain_end_session() uses that same check so we never auto-upload or
resolve globals for brains that are not cloud-enabled; update references in the
brain_end_session flow, the _load_cloud_config usage, and any code paths that
call _cloud_creds.resolve_credential to only run when sync_enabled is true.

api_url = ""
brain_id_from_config = ""

Expand All @@ -1249,6 +1253,7 @@ def _cloud_sync_session(
except Exception as e:
_log.debug("cloud config parse failed: %s", e)

api_url = _cloud_creds.resolve_endpoint(api_url, fallback=brain_cloud_cfg.api_base)
if not api_key:
return # No cloud credentials — nothing to sync

Expand Down Expand Up @@ -1337,17 +1342,15 @@ def _cloud_sync_session(
sync_client.sync_metrics(payload)
_log.debug("Cloud telemetry synced for session %d", session)

# Finding 11: respect sync_mode — default is metrics_only.
# Only sync full events/corrections if explicitly opted in via config.
sync_mode = "metrics_only"
sync_mode = getattr(brain_cloud_cfg, "sync_mode", "full") or "full"
try:
cfg = _parse_toml_cloud(config_path)
sync_mode = cfg.get("sync_mode", "metrics_only")
sync_mode = cfg.get("sync_mode", sync_mode)
Comment on lines +1345 to +1348

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Do not let legacy TOML override the per-brain sync_mode.

These lines can undo the "full" mode that cmd_sync() just persisted for this brain, so a stale ~/.gradata/config.toml with metrics_only will still skip event backfill and recreate the dashboard gap.

Suggested fix
-        sync_mode = getattr(brain_cloud_cfg, "sync_mode", "full") or "full"
-        try:
-            cfg = _parse_toml_cloud(config_path)
-            sync_mode = cfg.get("sync_mode", sync_mode)
-        except Exception:
-            pass
+        sync_mode = str(getattr(brain_cloud_cfg, "sync_mode", "") or "").strip().lower()
+        if not sync_mode and config_path.is_file():
+            try:
+                cfg = _parse_toml_cloud(config_path)
+                sync_mode = str(cfg.get("sync_mode", "") or "").strip().lower()
+            except Exception:
+                pass
+        sync_mode = sync_mode or "full"
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
sync_mode = getattr(brain_cloud_cfg, "sync_mode", "full") or "full"
try:
cfg = _parse_toml_cloud(config_path)
sync_mode = cfg.get("sync_mode", "metrics_only")
sync_mode = cfg.get("sync_mode", sync_mode)
sync_mode = str(getattr(brain_cloud_cfg, "sync_mode", "") or "").strip().lower()
if not sync_mode and config_path.is_file():
try:
cfg = _parse_toml_cloud(config_path)
sync_mode = str(cfg.get("sync_mode", "") or "").strip().lower()
except Exception:
pass
sync_mode = sync_mode or "full"
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@Gradata/src/gradata/_core.py` around lines 1345 - 1348, The global TOML
should not override a per-brain persisted sync_mode: after calling
_parse_toml_cloud(config_path) only apply cfg.get("sync_mode", ...) when the
brain does not already have a sync_mode set; e.g. keep the initial sync_mode
from getattr(brain_cloud_cfg, "sync_mode", "full") and replace the unconditional
sync_mode = cfg.get(...) with a conditional (if not hasattr(brain_cloud_cfg,
"sync_mode") or brain_cloud_cfg.sync_mode in (None, "")) then use cfg.get(...).
This ensures cmd_sync()'s per-brain "full" setting isn't clobbered by legacy
~/.gradata/config.toml.

except Exception:
pass

if sync_mode == "full":
# 4. Sync events/corrections via the full cloud client (opt-in only)
# 4. Sync events/corrections via the full cloud client.
try:
from gradata.cloud.client import CloudClient

Expand All @@ -1361,9 +1364,14 @@ def _cloud_sync_session(
_log.debug("Cloud event sync completed for session %d", session)
except Exception as e:
_log.debug("Cloud event sync failed (non-fatal): %s", e)
elif sync_mode == "metrics_only":
_log.warning(
"Skipping event/correction sync (sync_mode=metrics_only) — dashboard will "
"not show corrections. Run `gradata sync --full` to backfill."
)
else:
_log.debug(
"Cloud sync_mode=%s — skipping event/correction sync for session %d",
_log.warning(
"Unknown cloud sync_mode=%s — skipping event/correction sync for session %d",
sync_mode,
session,
)
Expand Down
Loading
Loading