From 2ca10e34379f1510f12fb7f54d3ca136b4ec3832 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sun, 15 Feb 2026 20:41:21 +0000 Subject: [PATCH] refactor: optimize JSON-LD parsing in movie scraper - Parse JSON-LD once in `MovieScraper.movie` service - Pass parsed object to `getMovieYear` and `getMovieDuration` helpers - Update helpers to accept object instead of string - Improve error handling for missing/invalid JSON-LD - Update tests to reflect changes Co-authored-by: bartholomej <5861310+bartholomej@users.noreply.github.com> --- src/helpers/movie.helper.ts | 26 ++++++++++++++------------ src/services/movie.service.ts | 10 ++++++++-- tests/movie.test.ts | 11 ++++++++--- 3 files changed, 30 insertions(+), 17 deletions(-) diff --git a/src/helpers/movie.helper.ts b/src/helpers/movie.helper.ts index 230b4004..4cc7b3fc 100644 --- a/src/helpers/movie.helper.ts +++ b/src/helpers/movie.helper.ts @@ -123,23 +123,23 @@ export const getMovieRatingCount = (el: HTMLElement): number => { } }; -export const getMovieYear = (el: string): number => { - try { - const jsonLd = JSON.parse(el); +export const getMovieYear = (jsonLd: any): number => { + if (jsonLd && jsonLd.dateCreated) { return +jsonLd.dateCreated; - } catch (error) { - console.error('node-csfd-api: Error parsing JSON-LD', error); - return null; } + return null; }; -export const getMovieDuration = (jsonLdRaw: string, el: HTMLElement): number => { - let duration = null; +export const getMovieDuration = (jsonLd: any, el: HTMLElement): number => { try { - const jsonLd = JSON.parse(jsonLdRaw); - duration = jsonLd.duration; - return parseISO8601Duration(duration); + if (jsonLd && jsonLd.duration) { + return parseISO8601Duration(jsonLd.duration); + } } catch (error) { + // ignore + } + + try { const origin = el.querySelector('.origin').innerText; const timeString = origin.split(','); if (timeString.length > 2) { @@ -151,11 +151,13 @@ export const getMovieDuration = (jsonLdRaw: string, el: HTMLElement): number => const hoursMinsRaw = timeRaw.split('min')[0]; const hoursMins = hoursMinsRaw.split('h'); // Resolve hours + minutes format - duration = hoursMins.length > 1 ? +hoursMins[0] * 60 + +hoursMins[1] : +hoursMins[0]; + const duration = hoursMins.length > 1 ? +hoursMins[0] * 60 + +hoursMins[1] : +hoursMins[0]; return duration; } else { return null; } + } catch (e) { + return null; } }; diff --git a/src/services/movie.service.ts b/src/services/movie.service.ts index e30470ac..476c52ac 100644 --- a/src/services/movie.service.ts +++ b/src/services/movie.service.ts @@ -41,7 +41,13 @@ export class MovieScraper { const pageClasses = movieHtml.querySelector('.page-content').classNames.split(' '); const asideNode = movieHtml.querySelector('.aside-movie-profile'); const movieNode = movieHtml.querySelector('.main-movie-profile'); - const jsonLd = movieHtml.querySelector('script[type="application/ld+json"]').innerText; + const jsonLdString = movieHtml.querySelector('script[type="application/ld+json"]').innerText; + let jsonLd = null; + try { + jsonLd = JSON.parse(jsonLdString); + } catch (e) { + console.error('node-csfd-api: Error parsing JSON-LD', e); + } return this.buildMovie(+movieId, movieNode, asideNode, pageClasses, jsonLd, options); } @@ -50,7 +56,7 @@ export class MovieScraper { el: HTMLElement, asideEl: HTMLElement, pageClasses: string[], - jsonLd: string, + jsonLd: any, options: CSFDOptions ): CSFDMovie { return { diff --git a/tests/movie.test.ts b/tests/movie.test.ts index 1cf9f189..bbbc4a6f 100644 --- a/tests/movie.test.ts +++ b/tests/movie.test.ts @@ -48,13 +48,18 @@ const getNode = (node: HTMLElement): HTMLElement => { return node.querySelector('.main-movie-profile') as HTMLElement; }; -const getJsonLd = (node: HTMLElement): string => { - return node.querySelector('script[type="application/ld+json"]')?.innerText ?? '{}'; +const getJsonLd = (node: HTMLElement): any => { + const json = node.querySelector('script[type="application/ld+json"]')?.innerText || '{}'; + try { + return JSON.parse(json); + } catch (e) { + return null; + } }; const getMovie = ( node: HTMLElement -): { pClasses: string[]; aside: HTMLElement; pNode: HTMLElement; jsonLd: string } => { +): { pClasses: string[]; aside: HTMLElement; pNode: HTMLElement; jsonLd: any } => { return { pClasses: getPageClasses(node), aside: getAsideNode(node),