twitter · jbaxter · Mar 5, 2026 · Mar 5, 2026 · Mar 5, 2026
diff --git a/scoring/src/scoring/constants.py b/scoring/src/scoring/constants.py
@@ -361,6 +361,19 @@ def rater_factor_key(i):
 gaussianNoteInterceptNoHighVolKey = "gaussianNoteInterceptNoHighVol"
 gaussianNoteInterceptNoCorrelatedKey = "gaussianNoteInterceptNoCorrelated"
 gaussianNoteInterceptPopulationSampledKey = "gaussianNoteInterceptPopulationSampled"
+# Gaussian Core With Topics Model
+gaussianCoreWithTopicsNoteInterceptKey = "gaussianCoreWithTopicsNoteIntercept"
+gaussianCoreWithTopicsNoteFactor1Key = "gaussianCoreWithTopicsNoteFactor1"
+gaussianCoreWithTopicsRatingStatusKey = "gaussianCoreWithTopicsRatingStatus"
+gaussianCoreWithTopicsActiveRulesKey = "gaussianCoreWithTopicsActiveRules"
+gaussianCoreWithTopicsNumFinalRoundRatingsKey = "gaussianCoreWithTopicsNumFinalRoundRatings"
+gaussianCoreWithTopicsNoteInterceptNoHighVolKey = "gaussianCoreWithTopicsNoteInterceptNoHighVol"
+gaussianCoreWithTopicsNoteInterceptNoCorrelatedKey = (
+  "gaussianCoreWithTopicsNoteInterceptNoCorrelated"
+)
+gaussianCoreWithTopicsNoteInterceptPopulationSampledKey = (
+  "gaussianCoreWithTopicsNoteInterceptPopulationSampled"
+)
 # Harassment/Abuse Tag
 harassmentNoteInterceptKey = "harassmentNoteIntercept"
 harassmentNoteFactor1Key = "harassmentNoteFactor1"
@@ -394,6 +407,9 @@ def rater_factor_key(i):
 aboveHelpfulnessThresholdKey = "aboveHelpfulnessThreshold"
 totalHelpfulHarassmentRatingsPenaltyKey = "totalHelpfulHarassmentPenalty"
 raterAgreeRatioWithHarassmentAbusePenaltyKey = "raterAgreeRatioKeyWithHarassmentAbusePenalty"
+crhTotal14dKey = "crhTotal14d"
+crnhTotal14dKey = "crnhTotal14d"
+nmrTotal14dKey = "nmrTotal14d"
 
 # Note Status Labels
 currentlyRatedHelpful = "CURRENTLY_RATED_HELPFUL"
@@ -960,6 +976,14 @@ def rater_factor_key(i):
   (gaussianNoteInterceptNoHighVolKey, np.double),
   (gaussianNoteInterceptPopulationSampledKey, np.double),
   (gaussianNumFinalRoundRatingsKey, np.double),  # double because nullable.
+  (gaussianCoreWithTopicsNoteInterceptKey, np.double),
+  (gaussianCoreWithTopicsNoteFactor1Key, np.double),
+  (gaussianCoreWithTopicsRatingStatusKey, "category"),
+  (gaussianCoreWithTopicsActiveRulesKey, "category"),
+  (gaussianCoreWithTopicsNoteInterceptNoHighVolKey, np.double),
+  (gaussianCoreWithTopicsNoteInterceptNoCorrelatedKey, np.double),
+  (gaussianCoreWithTopicsNoteInterceptPopulationSampledKey, np.double),
+  (gaussianCoreWithTopicsNumFinalRoundRatingsKey, np.double),  # double because nullable.
 ]
 noteModelOutputTSVColumns = [col for (col, dtype) in noteModelOutputTSVColumnsAndTypes]
 noteModelOutputTSVTypeMapping = {col: dtype for (col, dtype) in noteModelOutputTSVColumnsAndTypes}
@@ -1049,6 +1073,9 @@ def rater_factor_key(i):
   (coreWithTopicsRaterFactor1Key, np.double),
   (coreFirstRoundRaterInterceptKey, np.double),
   (coreFirstRoundRaterFactor1Key, np.double),
+  (crhTotal14dKey, pd.Int64Dtype()),
+  (crnhTotal14dKey, pd.Int64Dtype()),
+  (nmrTotal14dKey, pd.Int64Dtype()),
 ]
 raterModelOutputTSVColumns = [col for (col, dtype) in raterModelOutputTSVColumnsAndTypes]
 raterModelOutputTSVTypeMapping = {col: dtype for (col, dtype) in raterModelOutputTSVColumnsAndTypes}

diff --git a/scoring/src/scoring/enums.py b/scoring/src/scoring/enums.py
@@ -16,6 +16,7 @@ class Scorers(Enum):
   MFTopicScorer = auto()
   MFMultiGroupScorer = auto()
   GaussianScorer = auto()
+  GaussianCoreWithTopicsScorer = auto()
 
 
 class Topics(Enum):
@@ -26,6 +27,7 @@ class Topics(Enum):
   GazaConflict = 2
   MessiRonaldo = 3
   Scams = 4
+  InDimensionTwo = 5
 
 
 def scorers_from_csv(csv: str) -> Set[Scorers]:

diff --git a/scoring/src/scoring/gaussian_core_with_topics_scorer.py b/scoring/src/scoring/gaussian_core_with_topics_scorer.py
@@ -0,0 +1,83 @@
+from typing import Dict, List, Optional
+
+from . import constants as c
+from .gaussian_scorer import GaussianScorer
+
+
+class GaussianCoreWithTopicsScorer(GaussianScorer):
+  """Gaussian convolution scorer restricted to core groups (with topics variant).
+
+  This scorer inherits all Gaussian scoring logic but filters ratings to only
+  include raters from coreGroups and unassigned raters, mirroring the population
+  used by MFCoreWithTopicsScorer.
+  """
+
+  def __init__(
+    self,
+    seed: Optional[int] = None,
+    threads: int = c.defaultNumThreads,
+    saveIntermediateState: bool = False,
+  ) -> None:
+    """Configure GaussianCoreWithTopicsScorer object.
+
+    Args:
+      seed: if not None, seed value to ensure deterministic execution
+      threads: number of threads to use for intra-op parallelism in pytorch
+      saveIntermediateState: if True, save intermediate state for debugging
+    """
+    super().__init__(
+      includedGroups=c.coverageGroups,
+      excludeTopics=False,
+      includeUnassigned=True,
+      captureThreshold=0.5,
+      seed=seed,
+      threads=threads,
+      saveIntermediateState=saveIntermediateState,
+    )
+
+  def get_name(self):
+    return "GaussianCoreWithTopicsScorer"
+
+  def _get_note_col_mapping(self) -> Dict[str, str]:
+    """Returns a dict mapping default note column names to custom names for a specific model."""
+    return {
+      c.internalNoteInterceptKey: c.gaussianCoreWithTopicsNoteInterceptKey,
+      c.internalNoteFactor1Key: c.gaussianCoreWithTopicsNoteFactor1Key,
+      c.internalActiveRulesKey: c.gaussianCoreWithTopicsActiveRulesKey,
+      c.numFinalRoundRatingsKey: c.gaussianCoreWithTopicsNumFinalRoundRatingsKey,
+      c.internalNoteInterceptNoHighVolKey: c.gaussianCoreWithTopicsNoteInterceptNoHighVolKey,
+      c.internalNoteInterceptNoCorrelatedKey: c.gaussianCoreWithTopicsNoteInterceptNoCorrelatedKey,
+      c.internalNoteInterceptPopulationSampledKey: c.gaussianCoreWithTopicsNoteInterceptPopulationSampledKey,
+      c.lowDiligenceNoteInterceptKey: c.lowDiligenceLegacyNoteInterceptKey,
+      c.internalRatingStatusKey: c.gaussianCoreWithTopicsRatingStatusKey,
+    }
+
+  def _get_user_col_mapping(self) -> Dict[str, str]:
+    """Returns a dict mapping default user column names to custom names for a specific model."""
+    return {}
+
+  def get_scored_notes_cols(self) -> List[str]:
+    """Returns a list of columns which should be present in the scoredNotes output."""
+    return [
+      c.noteIdKey,
+      c.gaussianCoreWithTopicsNoteInterceptKey,
+      c.gaussianCoreWithTopicsNoteFactor1Key,
+      c.gaussianCoreWithTopicsRatingStatusKey,
+      c.gaussianCoreWithTopicsActiveRulesKey,
+      c.gaussianCoreWithTopicsNumFinalRoundRatingsKey,
+      c.gaussianCoreWithTopicsNoteInterceptNoHighVolKey,
+      c.gaussianCoreWithTopicsNoteInterceptNoCorrelatedKey,
+      c.gaussianCoreWithTopicsNoteInterceptPopulationSampledKey,
+    ]
+
+  def get_helpfulness_scores_cols(self) -> List[str]:
+    """Returns a list of columns which should be present in the helpfulnessScores output."""
+    return [
+      c.raterParticipantIdKey,
+    ]
+
+  def get_auxiliary_note_info_cols(self) -> List[str]:
+    """Returns a list of columns which should be present in the auxiliaryNoteInfo output."""
+    return [
+      c.noteIdKey,
+    ]
diff --git a/scoring/src/scoring/gaussian_scorer.py b/scoring/src/scoring/gaussian_scorer.py
@@ -250,6 +250,7 @@ def __init__(
     self._crhParams = crhParams
     self._crnhParams = crnhParams
     self._useMfNoteParams = useMfNoteParams
+    self._centeredBins = False
 
   def get_prescoring_name(self):
     return "MFCoreScorer"
@@ -367,7 +368,7 @@ def _get_dropped_note_cols(self) -> List[str]:
 
   def _get_dropped_user_cols(self) -> List[str]:
     """Returns a list of columns which should be excluded from helpfulnessScores output."""
-    return []
+    return [c.internalRaterFactor1Key]
 
   def _prepare_data_for_scoring(self, ratings: pd.DataFrame, final: bool = False) -> pd.DataFrame:
     """Prepare data for scoring. This includes filtering out notes and raters which do not meet
@@ -397,7 +398,7 @@ def _return_all_pts(
     params = self._crhParams if isCrh else self._crnhParams
 
     numQuantiles = len(quantileRange)
-    quantileCols = [f"{x:5.2f}" for x in quantileRange]
+    quantileCols = [f"{x:5.3f}" for x in quantileRange]
     quantileArray = np.array(quantileRange, dtype=np.float32)
 
     assert (
@@ -523,21 +524,20 @@ def _return_all_pts(
         quantileCols
       ].values
 
-    if not isCrh:
-      # Ensure notes with fewer than 3 ratings on each side get 0.1 smoothing
-      signCounts = (
-        ratingsForTrainingWithFactors.assign(
-          neg=ratingsForTrainingWithFactors[c.internalRaterFactor1Key] < 0,
-          pos=ratingsForTrainingWithFactors[c.internalRaterFactor1Key] > 0,
-        )
-        .groupby(c.noteIdKey)[["neg", "pos"]]
-        .sum()
-        .astype(int)
+    # Ensure notes with fewer than 3 ratings on each side get 0.1 smoothing
+    signCounts = (
+      ratingsForTrainingWithFactors.assign(
+        neg=ratingsForTrainingWithFactors[c.internalRaterFactor1Key] < 0,
+        pos=ratingsForTrainingWithFactors[c.internalRaterFactor1Key] > 0,
       )
-      insufficientMask = (signCounts["neg"] < 3) | (signCounts["pos"] < 3)
-      insufficientNoteIds = signCounts[insufficientMask].index
-      isInsufficient = np.isin(uniqueNotes, insufficientNoteIds)
-      smoothingValues[isInsufficient] = 0.1
+      .groupby(c.noteIdKey)[["neg", "pos"]]
+      .sum()
+      .astype(int)
+    )
+    insufficientMask = (signCounts["neg"] < 3) | (signCounts["pos"] < 3)
+    insufficientNoteIds = signCounts[insufficientMask].index
+    isInsufficient = np.isin(uniqueNotes, insufficientNoteIds)
+    smoothingValues[isInsufficient] = 0.1
 
     # Smoothing weights
     if params.adaptiveWeightBase is not None:
@@ -589,7 +589,7 @@ def _gaussian_kernel_extrapolator_vectorized(
         ratingsForTrainingWithFactors, quantileRange, isCrh=isCrh, empiricalPriors=empiricalPriors
       )
 
-    quantileCols = [f"{x:5.2f}" for x in quantileRange]
+    quantileCols = [f"{x:5.3f}" for x in quantileRange]
 
     # Compute intercept
     logValues = np.log(clippedValues[quantileCols].values)
@@ -765,31 +765,36 @@ def _score_notes_and_users(
         ].nunique()
         > self._nBinsEachSide
       ):
-        _, l_range = pd.qcut(
+        l_range = (
           ratersWithParams.loc[ratersWithParams[c.internalRaterFactor1Key] < 0][
             c.internalRaterFactor1Key
-          ],
-          self._nBinsEachSide,
-          retbins=True,
+          ]
+          .quantile(list(np.linspace(0.001, 0.999, self._nBinsEachSide)))
+          .values
         )
-        _, r_range = pd.qcut(
+        r_range = (
           ratersWithParams.loc[ratersWithParams[c.internalRaterFactor1Key] > 0][
             c.internalRaterFactor1Key
-          ],
-          self._nBinsEachSide,
-          retbins=True,
+          ]
+          .quantile(list(np.linspace(0.001, 0.999, self._nBinsEachSide)))
+          .values
         )
         lMids = (l_range[:-1] + l_range[1:]) / 2
         rMids = (r_range[:-1] + r_range[1:]) / 2
-        mids = (np.array(sorted(abs(lMids))) + np.array(sorted(abs(rMids)))) / 2
-        crhQuantileRange = np.concatenate([sorted(-mids), mids])
-        crnhQuantileRange = np.concatenate([sorted(-mids), mids])
+        if self._centeredBins:
+          mids = (np.array(sorted(abs(lMids))) + np.array(sorted(abs(rMids)))) / 2
+          crhQuantileRange = np.concatenate([sorted(-mids), mids])
+          crnhQuantileRange = np.concatenate([sorted(-mids), mids])
+        else:
+          crhQuantileRange = np.concatenate([lMids, rMids])
+          crnhQuantileRange = np.concatenate([lMids, rMids])
         logger.info(f"crh quantile range: {crhQuantileRange}")
         logger.info(f"crnh quantile range: {crnhQuantileRange}")
       # if there are not enough unique raters to even calculate bins, do not predict
       else:
-        scoredNotes = pd.DataFrame(columns=self.get_internal_scored_notes_cols())
-        helpfulnessScores = pd.DataFrame(columns=self.get_internal_helpfulness_scores_cols())
+        return pd.DataFrame(columns=self.get_internal_scored_notes_cols()), pd.DataFrame(
+          columns=self.get_internal_helpfulness_scores_cols()
+        )
 
     else:
       crhQuantileRange = c.quantileRange
@@ -957,6 +962,7 @@ def _score_notes_and_users(
       helpfulnessScores = prescoringRaterModelOutput[
         [
           c.raterParticipantIdKey,
+          c.internalRaterFactor1Key,
         ]
       ]