Remove unused model_p.decoder_inputs_split_mapping and model_p.decoder_states_split_mapping.

lingvo-bot · copybara-github · commit 7891c0a10b58 · 2021-12-09T18:46:47.000-08:00
PiperOrigin-RevId: 415406130
diff --git a/lingvo/jax/model.py b/lingvo/jax/model.py
@@ -132,22 +132,6 @@ def Params(cls) -> InstantiableParams:
         'all the inputs are replicated. For sharding inputs, this is a '
         '`NestedMap` with keys `map_1d`, `map_2d`, ..., etc.,'
         'which specifies how to shard the inputs of that dimension.')
-    tp.Define(
-        'decoder_inputs_split_mapping', None, 'The PartitionSpec for decoder'
-        'inputs such as partially completed sequence. This is only relevant'
-        'for SPMD sharded models. By default it is None, which means all the'
-        'inputs are replicated. For sharding the decoder inputs, this is a '
-        '`NestedMap` with keys `map_1d`, `map_2d` ..., etc., which specifies'
-        'how to shard the decoder inputs corresponding to that dimension.')
-    tp.Define(
-        'decoder_states_split_mapping', None, 'The PartitionSpec for cached'
-        'decoder states such as keys, values, steps etc. This is only relevant'
-        'for SPMD sharded models. By default it is None, which means all the'
-        'inputs are replicated. For sharding the decoder states, this is a '
-        '`NestedMap` with keys `map_1d`, `map_2d` ..., etc., which specifies'
-        'how to shard the decoder states corresponding to that dimension.')
-
-    # TODO(yonghui): Add other hyper-params.
     return p
 
   def __init__(self, params: InstantiableParams) -> None:
diff --git a/lingvo/jax/tasks/lm/model_params.py b/lingvo/jax/tasks/lm/model_params.py
@@ -51,15 +51,6 @@ def set_sharding_annotations_v1(model_p: InstantiableParams,
   model_p.train.inputs_split_mapping = NestedMap(
       map_1d=((replica_axis, data_axis),),
       map_2d=((replica_axis, data_axis), None))
-  model_p.train.decoder_inputs_split_mapping = NestedMap(
-      map_1d=((replica_axis, data_axis),))
-  model_p.train.decoder_states_split_mapping = NestedMap(
-      map_0d=None,
-      map_4d=(None, (replica_axis, data_axis), mdl_axis, None),
-      # 5d inputs are for the decoder states of shape [layers, seq_len,
-      # batch_size, num_heads, dims_per_head]
-      map_5d=(None, None, (replica_axis, data_axis), mdl_axis, None),
-  )
   model_p.mesh_axis_names = mesh_axis_names
   model_p.lm = model_p.lm.cls.set_sharding_params_v1(
       model_p.lm,