Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from fastapi.middleware.cors import CORSMiddleware # noqa: E402
from starlette.middleware.gzip import GZipMiddleware # noqa: E402

from api.cache import cache_key, set_cache # noqa: E402
from api.exceptions import ( # noqa: E402
AnyplotException,
anyplot_exception_handler,
Expand All @@ -38,6 +39,10 @@
specs_router,
stats_router,
)
from api.routers.languages import _refresh_languages # noqa: E402
from api.routers.libraries import _refresh_libraries # noqa: E402
from api.routers.specs import _refresh_specs_list # noqa: E402
from api.routers.stats import _refresh_stats # noqa: E402
from core.database import close_db, init_db, is_db_configured # noqa: E402


Expand All @@ -50,6 +55,36 @@
mcp_http_app = mcp_server.http_app(path="/")


async def _prewarm_cache() -> None:
"""Populate the in-memory cache for the four metadata endpoints that the
frontend's AppDataProvider fires on every page load (/stats, /libraries,
/languages, /specs).

The cache lives per Cloud Run instance, so every new instance that comes
up from autoscale or a cold start would otherwise force its first user
to wait on the full DB roundtrip — which is exactly the user-reported
"manchmal echt lange" on the NumbersStrip and the /specs page. Prewarming
runs once per process startup so the first request hits a warm cache.

Failures here are non-fatal: log and continue. A failed prewarm just
means the first user request takes the cold-cache path it would have
taken without this hook.
"""
refreshers = (
("stats", _refresh_stats),
("libraries", _refresh_libraries),
("languages", _refresh_languages),
("specs_list", _refresh_specs_list),
)
for key, factory in refreshers:
try:
result = await factory()
set_cache(cache_key(key), result)
logger.info("Cache prewarm: %s OK", key)
except Exception:
logger.warning("Cache prewarm failed for %s — falling back to lazy load", key, exc_info=True)


@asynccontextmanager
async def lifespan(app: FastAPI):
"""Manage application lifecycle."""
Expand All @@ -60,6 +95,7 @@ async def lifespan(app: FastAPI):
try:
await init_db()
logger.info("Database connection initialized")
await _prewarm_cache()
except Exception as e:
logger.error(f"Failed to initialize database: {e}")

Expand Down
22 changes: 19 additions & 3 deletions api/routers/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,16 +444,32 @@ async def _fetch_filtered() -> FilteredPlotsResponse:
spec_lookup = _build_spec_lookup(all_specs)
impl_lookup = _build_impl_lookup(all_specs)
all_images = _collect_all_images(all_specs)
spec_titles = {spec_id: data["spec"].title for spec_id, data in spec_lookup.items() if data["spec"].title}

global_counts = _calculate_global_counts(all_specs)

# Fast path for the unfiltered request (e.g. the /specs page list and
# the initial /plots load). With no filter groups, `filtered_images`
# equals `all_images`, `counts` equals `global_counts`, and `or_counts`
# is empty — so skip the two O(images × categories) recomputations
# that would otherwise dominate the cold-cache `filter:all` response.
if not filter_groups:
return FilteredPlotsResponse(
total=len(all_images),
images=all_images,
counts=global_counts,
globalCounts=global_counts,
orCounts=[],
specTitles=spec_titles,
)

spec_id_to_tags = {spec_id: spec_data["tags"] for spec_id, spec_data in spec_lookup.items()}

filtered_images = _filter_images(all_images, filter_groups, spec_lookup, impl_lookup)

global_counts = _calculate_global_counts(all_specs)
counts = _calculate_contextual_counts(filtered_images, spec_id_to_tags, impl_lookup)
or_counts = _calculate_or_counts(filter_groups, all_images, spec_id_to_tags, spec_lookup, impl_lookup)

spec_titles = {spec_id: data["spec"].title for spec_id, data in spec_lookup.items() if data["spec"].title}

return FilteredPlotsResponse(
total=len(filtered_images),
images=filtered_images,
Expand Down
28 changes: 18 additions & 10 deletions api/routers/specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,19 +144,31 @@ async def _build_spec_images(db: AsyncSession, spec_id: str) -> dict:
return {"spec_id": spec_id, "images": images}


async def _refresh_specs_list() -> list[SpecListItem]:
"""Standalone factory for background refresh + startup prewarm.

Creates its own DB session so it can be invoked outside the request
cycle (e.g. by the lifespan prewarm hook in api/main.py).
"""
async with get_db_context() as fresh_db:
return await _build_specs_list(fresh_db)


async def _refresh_specs_map() -> list[SpecMapItem]:
"""Standalone factory for /specs/map background refresh + startup prewarm."""
async with get_db_context() as fresh_db:
return await _build_specs_map(fresh_db)


@router.get("/specs", response_model=list[SpecListItem])
async def get_specs(db: AsyncSession = Depends(require_db)):
"""Get list of all specs with metadata (specs with at least one implementation)."""

async def _fetch() -> list[SpecListItem]:
return await _build_specs_list(db)

async def _refresh() -> list[SpecListItem]:
async with get_db_context() as fresh_db:
return await _build_specs_list(fresh_db)

return await get_or_set_cache(
cache_key("specs_list"), _fetch, refresh_after=settings.cache_refresh_after, refresh_factory=_refresh
cache_key("specs_list"), _fetch, refresh_after=settings.cache_refresh_after, refresh_factory=_refresh_specs_list
)


Expand All @@ -170,12 +182,8 @@ async def get_specs_map(db: AsyncSession = Depends(require_db)):
async def _fetch() -> list[SpecMapItem]:
return await _build_specs_map(db)

async def _refresh() -> list[SpecMapItem]:
async with get_db_context() as fresh_db:
return await _build_specs_map(fresh_db)

return await get_or_set_cache(
cache_key("specs_map"), _fetch, refresh_after=settings.cache_refresh_after, refresh_factory=_refresh
cache_key("specs_map"), _fetch, refresh_after=settings.cache_refresh_after, refresh_factory=_refresh_specs_map
)


Expand Down
61 changes: 27 additions & 34 deletions api/routers/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,35 @@
router = APIRouter(tags=["stats"])


async def _compute_stats(db: AsyncSession) -> StatsResponse:
"""Build the stats response using lightweight aggregate queries.

Previously this loaded every Spec (with selectinload(Impl) and
selectinload(Impl.library)) and every Library row just to take ``len()``
over them, which made cold-cache /stats one of the slowest reads on the
site — the user-visible NumbersStrip ("languages / libraries / specs"
under the hero) waited on that. Aggregate COUNT/DISTINCT queries avoid
transferring all those rows.
"""
spec_repo = SpecRepository(db)
lib_repo = LibraryRepository(db)
impl_repo = ImplRepository(db)

specs_with_impls = await spec_repo.count_with_impls()
total_impls = await impl_repo.count_all()
library_count, distinct_languages = await lib_repo.count_with_languages()
total_loc = await impl_repo.get_total_code_lines()

languages = distinct_languages or len(LANGUAGES_METADATA)
return StatsResponse(
specs=specs_with_impls, plots=total_impls, libraries=library_count, languages=languages, lines_of_code=total_loc
)


async def _refresh_stats() -> StatsResponse:
"""Standalone factory for background refresh (creates own DB session)."""
async with get_db_context() as db:
spec_repo = SpecRepository(db)
lib_repo = LibraryRepository(db)
impl_repo = ImplRepository(db)
specs = await spec_repo.get_all()
libraries = await lib_repo.get_all()
total_loc = await impl_repo.get_total_code_lines()

specs_with_impls = [s for s in specs if s.impls]
total_impls = sum(len(s.impls) for s in specs)
languages = len({lib.language_id for lib in libraries}) or len(LANGUAGES_METADATA)
return StatsResponse(
specs=len(specs_with_impls),
plots=total_impls,
libraries=len(libraries),
languages=languages,
lines_of_code=total_loc,
)
return await _compute_stats(db)


@router.get("/stats", response_model=StatsResponse)
Expand All @@ -48,23 +57,7 @@ async def get_stats(db: AsyncSession | None = Depends(optional_db)):
return StatsResponse(specs=0, plots=0, libraries=len(LIBRARIES_METADATA), languages=len(LANGUAGES_METADATA))

async def _fetch() -> StatsResponse:
spec_repo = SpecRepository(db)
lib_repo = LibraryRepository(db)
impl_repo = ImplRepository(db)
specs = await spec_repo.get_all()
libraries = await lib_repo.get_all()
total_loc = await impl_repo.get_total_code_lines()

specs_with_impls = [s for s in specs if s.impls]
total_impls = sum(len(s.impls) for s in specs)
languages = len({lib.language_id for lib in libraries}) or len(LANGUAGES_METADATA)
return StatsResponse(
specs=len(specs_with_impls),
plots=total_impls,
libraries=len(libraries),
languages=languages,
lines_of_code=total_loc,
)
return await _compute_stats(db)

return await get_or_set_cache(
cache_key("stats"), _fetch, refresh_after=settings.cache_refresh_after, refresh_factory=_refresh_stats
Expand Down
155 changes: 155 additions & 0 deletions app/src/components/Layout.test.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import { render, screen, waitFor } from '../test-utils';
import { AppDataProvider } from './Layout';
import { useAppData } from '../hooks/useLayoutContext';

// Helper component that reads the context and renders the four counts
// the user-reported NumbersStrip is built from. Acts as a black-box
// observer of AppDataProvider's data-loading useEffect.
function DataPeek() {
const { specsData, librariesData, languagesData, stats } = useAppData();
return (
<div>
<div data-testid="specs-count">{specsData.length}</div>
<div data-testid="libraries-count">{librariesData.length}</div>
<div data-testid="languages-count">{languagesData.length}</div>
<div data-testid="stats-libraries">{stats ? stats.libraries : 'pending'}</div>
</div>
);
}

function jsonResponse(body: unknown): Response {
return { ok: true, json: () => Promise.resolve(body) } as unknown as Response;
}

beforeEach(() => {
vi.restoreAllMocks();
});

afterEach(() => {
vi.restoreAllMocks();
});

describe('AppDataProvider', () => {
it('fetches /specs, /libraries, /languages, /stats and exposes them via useAppData', async () => {
const specsBody = [{ id: 'bar-grouped', title: 'Grouped Bar Chart' }];
const libsBody = { libraries: [{ id: 'matplotlib', name: 'Matplotlib', language: 'python' }] };
const langsBody = { languages: [{ id: 'python', name: 'Python', file_extension: '.py' }] };
const statsBody = { specs: 7, plots: 42, libraries: 11, languages: 3 };

const fetchMock = vi.fn().mockImplementation((url: string) => {
if (url.endsWith('/specs')) return Promise.resolve(jsonResponse(specsBody));
if (url.endsWith('/libraries')) return Promise.resolve(jsonResponse(libsBody));
if (url.endsWith('/languages')) return Promise.resolve(jsonResponse(langsBody));
if (url.endsWith('/stats')) return Promise.resolve(jsonResponse(statsBody));
throw new Error(`unexpected fetch: ${url}`);
});
global.fetch = fetchMock;

Comment on lines +25 to +48
render(
<AppDataProvider>
<DataPeek />
</AppDataProvider>,
);

// All four endpoints should be hit (in parallel) — this is the regression
// guard for the requestIdleCallback fix: previously the calls fired on
// browser idle (could be up to 2 s late on Chrome). Now they fire on
// mount, so the test resolves without any extra time advancement.
await waitFor(() => {
expect(fetchMock).toHaveBeenCalledTimes(4);
});
const urls = fetchMock.mock.calls.map((c) => c[0] as string);
expect(urls.some((u) => u.endsWith('/specs'))).toBe(true);
expect(urls.some((u) => u.endsWith('/libraries'))).toBe(true);
expect(urls.some((u) => u.endsWith('/languages'))).toBe(true);
expect(urls.some((u) => u.endsWith('/stats'))).toBe(true);

await waitFor(() => {
expect(screen.getByTestId('stats-libraries')).toHaveTextContent('11');
});
expect(screen.getByTestId('specs-count')).toHaveTextContent('1');
expect(screen.getByTestId('libraries-count')).toHaveTextContent('1');
expect(screen.getByTestId('languages-count')).toHaveTextContent('1');
});

it('handles the /specs envelope ({specs: [...]}) as well as a bare array', async () => {
const fetchMock = vi.fn().mockImplementation((url: string) => {
if (url.endsWith('/specs')) return Promise.resolve(jsonResponse({ specs: [{ id: 'a' }, { id: 'b' }] }));
if (url.endsWith('/libraries')) return Promise.resolve(jsonResponse({ libraries: [] }));
if (url.endsWith('/languages')) return Promise.resolve(jsonResponse({ languages: [] }));
if (url.endsWith('/stats')) return Promise.resolve(jsonResponse({ specs: 2, plots: 0, libraries: 0 }));
throw new Error(`unexpected fetch: ${url}`);
});
global.fetch = fetchMock;

render(
<AppDataProvider>
<DataPeek />
</AppDataProvider>,
);

await waitFor(() => {
expect(screen.getByTestId('specs-count')).toHaveTextContent('2');
});
});

it('swallows fetch errors without crashing — context falls back to empty defaults', async () => {
const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
global.fetch = vi.fn().mockRejectedValue(new Error('boom'));

render(
<AppDataProvider>
<DataPeek />
</AppDataProvider>,
);

await waitFor(() => {
expect(warnSpy).toHaveBeenCalled();
});
expect(screen.getByTestId('specs-count')).toHaveTextContent('0');
expect(screen.getByTestId('libraries-count')).toHaveTextContent('0');
expect(screen.getByTestId('stats-libraries')).toHaveTextContent('pending');

warnSpy.mockRestore();
});

it('aborts in-flight fetches when the provider unmounts', async () => {
// Hold all fetches open so the abort path is exercised in the cleanup.
const fetchMock = vi.fn().mockImplementation(
(_url: string, init?: RequestInit) =>
new Promise<Response>((_, reject) => {
init?.signal?.addEventListener('abort', () => {
// Real fetch rejects with an AbortError DOMException on signal
// abort; mimic the `name` so the catch in Layout.tsx treats it
// the same way. Use a tagged Error to avoid ESLint's no-undef
// for the DOMException browser global.
const err = new Error('aborted');
err.name = 'AbortError';
reject(err);
});
}),
);
global.fetch = fetchMock;

const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});

const { unmount } = render(
<AppDataProvider>
<DataPeek />
</AppDataProvider>,
);

// Pending — no data resolved yet
expect(screen.getByTestId('stats-libraries')).toHaveTextContent('pending');

unmount();

// The aborted rejection must not surface as an unhandled warn — the
// catch branch's `if (signal.aborted) return` guards it.
await new Promise((resolve) => setTimeout(resolve, 0));
expect(warnSpy).not.toHaveBeenCalled();

warnSpy.mockRestore();
});
});
Loading