From 1a9f31df5204bab41b9a09f3d620651953d08457 Mon Sep 17 00:00:00 2001 From: Clayton Parnell Date: Fri, 7 Jul 2023 12:19:44 -0700 Subject: [PATCH 1/9] Support protobuf4 --- setup.py | 4 +- src/sagemaker/amazon/record_pb2.py | 834 ++--------------------------- 2 files changed, 53 insertions(+), 785 deletions(-) diff --git a/setup.py b/setup.py index 61fc351a5a..7a59b47f02 100644 --- a/setup.py +++ b/setup.py @@ -52,9 +52,9 @@ def read_requirements(filename): "cloudpickle==2.2.1", "google-pasta", "numpy>=1.9.0,<2.0", - "protobuf>=3.1,<4.0", + "protobuf>=3.1,<5.0", "smdebug_rulesconfig==1.0.1", - "importlib-metadata>=1.4.0,<5.0", + "importlib-metadata>=1.4.0,<7.0", "packaging>=20.0", "pandas", "pathos", diff --git a/src/sagemaker/amazon/record_pb2.py b/src/sagemaker/amazon/record_pb2.py index 183f807980..efa7cc89ed 100644 --- a/src/sagemaker/amazon/record_pb2.py +++ b/src/sagemaker/amazon/record_pb2.py @@ -1,795 +1,63 @@ +# -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: record.proto - -import sys - +"""Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection +from google.protobuf import descriptor_pool as _descriptor_pool from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 - -_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1")) - +from google.protobuf.internal import builder as _builder # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor.FileDescriptor( - name="record.proto", - package="aialgs.data", - syntax="proto2", - serialized_pb=_b( - '\n\x0crecord.proto\x12\x0b\x61ialgs.data"H\n\rFloat32Tensor\x12\x12\n\x06values\x18\x01 \x03(\x02\x42\x02\x10\x01\x12\x10\n\x04keys\x18\x02 \x03(\x04\x42\x02\x10\x01\x12\x11\n\x05shape\x18\x03 \x03(\x04\x42\x02\x10\x01"H\n\rFloat64Tensor\x12\x12\n\x06values\x18\x01 \x03(\x01\x42\x02\x10\x01\x12\x10\n\x04keys\x18\x02 \x03(\x04\x42\x02\x10\x01\x12\x11\n\x05shape\x18\x03 \x03(\x04\x42\x02\x10\x01"F\n\x0bInt32Tensor\x12\x12\n\x06values\x18\x01 \x03(\x05\x42\x02\x10\x01\x12\x10\n\x04keys\x18\x02 \x03(\x04\x42\x02\x10\x01\x12\x11\n\x05shape\x18\x03 \x03(\x04\x42\x02\x10\x01",\n\x05\x42ytes\x12\r\n\x05value\x18\x01 \x03(\x0c\x12\x14\n\x0c\x63ontent_type\x18\x02 \x01(\t"\xd3\x01\n\x05Value\x12\x34\n\x0e\x66loat32_tensor\x18\x02 \x01(\x0b\x32\x1a.aialgs.data.Float32TensorH\x00\x12\x34\n\x0e\x66loat64_tensor\x18\x03 \x01(\x0b\x32\x1a.aialgs.data.Float64TensorH\x00\x12\x30\n\x0cint32_tensor\x18\x07 \x01(\x0b\x32\x18.aialgs.data.Int32TensorH\x00\x12#\n\x05\x62ytes\x18\t \x01(\x0b\x32\x12.aialgs.data.BytesH\x00\x42\x07\n\x05value"\xa9\x02\n\x06Record\x12\x33\n\x08\x66\x65\x61tures\x18\x01 \x03(\x0b\x32!.aialgs.data.Record.FeaturesEntry\x12-\n\x05label\x18\x02 \x03(\x0b\x32\x1e.aialgs.data.Record.LabelEntry\x12\x0b\n\x03uid\x18\x03 \x01(\t\x12\x10\n\x08metadata\x18\x04 \x01(\t\x12\x15\n\rconfiguration\x18\x05 \x01(\t\x1a\x43\n\rFeaturesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.aialgs.data.Value:\x02\x38\x01\x1a@\n\nLabelEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.aialgs.data.Value:\x02\x38\x01\x42\x30\n com.amazonaws.aialgorithms.protoB\x0cRecordProtos' - ), -) - - -_FLOAT32TENSOR = _descriptor.Descriptor( - name="Float32Tensor", - full_name="aialgs.data.Float32Tensor", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="values", - full_name="aialgs.data.Float32Tensor.values", - index=0, - number=1, - type=2, - cpp_type=6, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b("\020\001")), - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="keys", - full_name="aialgs.data.Float32Tensor.keys", - index=1, - number=2, - type=4, - cpp_type=4, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b("\020\001")), - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="shape", - full_name="aialgs.data.Float32Tensor.shape", - index=2, - number=3, - type=4, - cpp_type=4, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b("\020\001")), - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - options=None, - is_extendable=False, - syntax="proto2", - extension_ranges=[], - oneofs=[], - serialized_start=29, - serialized_end=101, -) - - -_FLOAT64TENSOR = _descriptor.Descriptor( - name="Float64Tensor", - full_name="aialgs.data.Float64Tensor", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="values", - full_name="aialgs.data.Float64Tensor.values", - index=0, - number=1, - type=1, - cpp_type=5, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b("\020\001")), - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="keys", - full_name="aialgs.data.Float64Tensor.keys", - index=1, - number=2, - type=4, - cpp_type=4, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b("\020\001")), - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="shape", - full_name="aialgs.data.Float64Tensor.shape", - index=2, - number=3, - type=4, - cpp_type=4, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b("\020\001")), - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - options=None, - is_extendable=False, - syntax="proto2", - extension_ranges=[], - oneofs=[], - serialized_start=103, - serialized_end=175, -) - - -_INT32TENSOR = _descriptor.Descriptor( - name="Int32Tensor", - full_name="aialgs.data.Int32Tensor", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="values", - full_name="aialgs.data.Int32Tensor.values", - index=0, - number=1, - type=5, - cpp_type=1, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b("\020\001")), - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="keys", - full_name="aialgs.data.Int32Tensor.keys", - index=1, - number=2, - type=4, - cpp_type=4, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b("\020\001")), - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="shape", - full_name="aialgs.data.Int32Tensor.shape", - index=2, - number=3, - type=4, - cpp_type=4, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b("\020\001")), - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - options=None, - is_extendable=False, - syntax="proto2", - extension_ranges=[], - oneofs=[], - serialized_start=177, - serialized_end=247, -) - - -_BYTES = _descriptor.Descriptor( - name="Bytes", - full_name="aialgs.data.Bytes", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="value", - full_name="aialgs.data.Bytes.value", - index=0, - number=1, - type=12, - cpp_type=9, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="content_type", - full_name="aialgs.data.Bytes.content_type", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - options=None, - is_extendable=False, - syntax="proto2", - extension_ranges=[], - oneofs=[], - serialized_start=249, - serialized_end=293, -) - - -_VALUE = _descriptor.Descriptor( - name="Value", - full_name="aialgs.data.Value", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="float32_tensor", - full_name="aialgs.data.Value.float32_tensor", - index=0, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="float64_tensor", - full_name="aialgs.data.Value.float64_tensor", - index=1, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="int32_tensor", - full_name="aialgs.data.Value.int32_tensor", - index=2, - number=7, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="bytes", - full_name="aialgs.data.Value.bytes", - index=3, - number=9, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - options=None, - is_extendable=False, - syntax="proto2", - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name="value", - full_name="aialgs.data.Value.value", - index=0, - containing_type=None, - fields=[], - ) - ], - serialized_start=296, - serialized_end=507, -) - - -_RECORD_FEATURESENTRY = _descriptor.Descriptor( - name="FeaturesEntry", - full_name="aialgs.data.Record.FeaturesEntry", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="key", - full_name="aialgs.data.Record.FeaturesEntry.key", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="value", - full_name="aialgs.data.Record.FeaturesEntry.value", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - options=_descriptor._ParseOptions(descriptor_pb2.MessageOptions(), _b("8\001")), - is_extendable=False, - syntax="proto2", - extension_ranges=[], - oneofs=[], - serialized_start=674, - serialized_end=741, -) - -_RECORD_LABELENTRY = _descriptor.Descriptor( - name="LabelEntry", - full_name="aialgs.data.Record.LabelEntry", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="key", - full_name="aialgs.data.Record.LabelEntry.key", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="value", - full_name="aialgs.data.Record.LabelEntry.value", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - options=_descriptor._ParseOptions(descriptor_pb2.MessageOptions(), _b("8\001")), - is_extendable=False, - syntax="proto2", - extension_ranges=[], - oneofs=[], - serialized_start=743, - serialized_end=807, -) - -_RECORD = _descriptor.Descriptor( - name="Record", - full_name="aialgs.data.Record", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="features", - full_name="aialgs.data.Record.features", - index=0, - number=1, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="label", - full_name="aialgs.data.Record.label", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="uid", - full_name="aialgs.data.Record.uid", - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="metadata", - full_name="aialgs.data.Record.metadata", - index=3, - number=4, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="configuration", - full_name="aialgs.data.Record.configuration", - index=4, - number=5, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[_RECORD_FEATURESENTRY, _RECORD_LABELENTRY], - enum_types=[], - options=None, - is_extendable=False, - syntax="proto2", - extension_ranges=[], - oneofs=[], - serialized_start=510, - serialized_end=807, -) - -_VALUE.fields_by_name["float32_tensor"].message_type = _FLOAT32TENSOR -_VALUE.fields_by_name["float64_tensor"].message_type = _FLOAT64TENSOR -_VALUE.fields_by_name["int32_tensor"].message_type = _INT32TENSOR -_VALUE.fields_by_name["bytes"].message_type = _BYTES -_VALUE.oneofs_by_name["value"].fields.append(_VALUE.fields_by_name["float32_tensor"]) -_VALUE.fields_by_name["float32_tensor"].containing_oneof = _VALUE.oneofs_by_name["value"] -_VALUE.oneofs_by_name["value"].fields.append(_VALUE.fields_by_name["float64_tensor"]) -_VALUE.fields_by_name["float64_tensor"].containing_oneof = _VALUE.oneofs_by_name["value"] -_VALUE.oneofs_by_name["value"].fields.append(_VALUE.fields_by_name["int32_tensor"]) -_VALUE.fields_by_name["int32_tensor"].containing_oneof = _VALUE.oneofs_by_name["value"] -_VALUE.oneofs_by_name["value"].fields.append(_VALUE.fields_by_name["bytes"]) -_VALUE.fields_by_name["bytes"].containing_oneof = _VALUE.oneofs_by_name["value"] -_RECORD_FEATURESENTRY.fields_by_name["value"].message_type = _VALUE -_RECORD_FEATURESENTRY.containing_type = _RECORD -_RECORD_LABELENTRY.fields_by_name["value"].message_type = _VALUE -_RECORD_LABELENTRY.containing_type = _RECORD -_RECORD.fields_by_name["features"].message_type = _RECORD_FEATURESENTRY -_RECORD.fields_by_name["label"].message_type = _RECORD_LABELENTRY -DESCRIPTOR.message_types_by_name["Float32Tensor"] = _FLOAT32TENSOR -DESCRIPTOR.message_types_by_name["Float64Tensor"] = _FLOAT64TENSOR -DESCRIPTOR.message_types_by_name["Int32Tensor"] = _INT32TENSOR -DESCRIPTOR.message_types_by_name["Bytes"] = _BYTES -DESCRIPTOR.message_types_by_name["Value"] = _VALUE -DESCRIPTOR.message_types_by_name["Record"] = _RECORD -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -Float32Tensor = _reflection.GeneratedProtocolMessageType( - "Float32Tensor", - (_message.Message,), - dict( - DESCRIPTOR=_FLOAT32TENSOR, - __module__="record_pb2" - # @@protoc_insertion_point(class_scope:aialgs.data.Float32Tensor) - ), -) -_sym_db.RegisterMessage(Float32Tensor) - -Float64Tensor = _reflection.GeneratedProtocolMessageType( - "Float64Tensor", - (_message.Message,), - dict( - DESCRIPTOR=_FLOAT64TENSOR, - __module__="record_pb2" - # @@protoc_insertion_point(class_scope:aialgs.data.Float64Tensor) - ), -) -_sym_db.RegisterMessage(Float64Tensor) - -Int32Tensor = _reflection.GeneratedProtocolMessageType( - "Int32Tensor", - (_message.Message,), - dict( - DESCRIPTOR=_INT32TENSOR, - __module__="record_pb2" - # @@protoc_insertion_point(class_scope:aialgs.data.Int32Tensor) - ), -) -_sym_db.RegisterMessage(Int32Tensor) - -Bytes = _reflection.GeneratedProtocolMessageType( - "Bytes", - (_message.Message,), - dict( - DESCRIPTOR=_BYTES, - __module__="record_pb2" - # @@protoc_insertion_point(class_scope:aialgs.data.Bytes) - ), -) -_sym_db.RegisterMessage(Bytes) - -Value = _reflection.GeneratedProtocolMessageType( - "Value", - (_message.Message,), - dict( - DESCRIPTOR=_VALUE, - __module__="record_pb2" - # @@protoc_insertion_point(class_scope:aialgs.data.Value) - ), -) -_sym_db.RegisterMessage(Value) - -Record = _reflection.GeneratedProtocolMessageType( - "Record", - (_message.Message,), - dict( - FeaturesEntry=_reflection.GeneratedProtocolMessageType( - "FeaturesEntry", - (_message.Message,), - dict( - DESCRIPTOR=_RECORD_FEATURESENTRY, - __module__="record_pb2" - # @@protoc_insertion_point(class_scope:aialgs.data.Record.FeaturesEntry) - ), - ), - LabelEntry=_reflection.GeneratedProtocolMessageType( - "LabelEntry", - (_message.Message,), - dict( - DESCRIPTOR=_RECORD_LABELENTRY, - __module__="record_pb2" - # @@protoc_insertion_point(class_scope:aialgs.data.Record.LabelEntry) - ), - ), - DESCRIPTOR=_RECORD, - __module__="record_pb2" - # @@protoc_insertion_point(class_scope:aialgs.data.Record) - ), -) -_sym_db.RegisterMessage(Record) -_sym_db.RegisterMessage(Record.FeaturesEntry) -_sym_db.RegisterMessage(Record.LabelEntry) -DESCRIPTOR.has_options = True -DESCRIPTOR._options = _descriptor._ParseOptions( - descriptor_pb2.FileOptions(), _b("\n com.amazonaws.aialgorithms.protoB\014RecordProtos") -) -_FLOAT32TENSOR.fields_by_name["values"].has_options = True -_FLOAT32TENSOR.fields_by_name["values"]._options = _descriptor._ParseOptions( - descriptor_pb2.FieldOptions(), _b("\020\001") -) -_FLOAT32TENSOR.fields_by_name["keys"].has_options = True -_FLOAT32TENSOR.fields_by_name["keys"]._options = _descriptor._ParseOptions( - descriptor_pb2.FieldOptions(), _b("\020\001") -) -_FLOAT32TENSOR.fields_by_name["shape"].has_options = True -_FLOAT32TENSOR.fields_by_name["shape"]._options = _descriptor._ParseOptions( - descriptor_pb2.FieldOptions(), _b("\020\001") -) -_FLOAT64TENSOR.fields_by_name["values"].has_options = True -_FLOAT64TENSOR.fields_by_name["values"]._options = _descriptor._ParseOptions( - descriptor_pb2.FieldOptions(), _b("\020\001") -) -_FLOAT64TENSOR.fields_by_name["keys"].has_options = True -_FLOAT64TENSOR.fields_by_name["keys"]._options = _descriptor._ParseOptions( - descriptor_pb2.FieldOptions(), _b("\020\001") -) -_FLOAT64TENSOR.fields_by_name["shape"].has_options = True -_FLOAT64TENSOR.fields_by_name["shape"]._options = _descriptor._ParseOptions( - descriptor_pb2.FieldOptions(), _b("\020\001") -) -_INT32TENSOR.fields_by_name["values"].has_options = True -_INT32TENSOR.fields_by_name["values"]._options = _descriptor._ParseOptions( - descriptor_pb2.FieldOptions(), _b("\020\001") -) -_INT32TENSOR.fields_by_name["keys"].has_options = True -_INT32TENSOR.fields_by_name["keys"]._options = _descriptor._ParseOptions( - descriptor_pb2.FieldOptions(), _b("\020\001") -) -_INT32TENSOR.fields_by_name["shape"].has_options = True -_INT32TENSOR.fields_by_name["shape"]._options = _descriptor._ParseOptions( - descriptor_pb2.FieldOptions(), _b("\020\001") -) -_RECORD_FEATURESENTRY.has_options = True -_RECORD_FEATURESENTRY._options = _descriptor._ParseOptions( - descriptor_pb2.MessageOptions(), _b("8\001") -) -_RECORD_LABELENTRY.has_options = True -_RECORD_LABELENTRY._options = _descriptor._ParseOptions( - descriptor_pb2.MessageOptions(), _b("8\001") -) +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0crecord.proto\x12\x0b\x61ialgs.data\"H\n\rFloat32Tensor\x12\x12\n\x06values\x18\x01 \x03(\x02\x42\x02\x10\x01\x12\x10\n\x04keys\x18\x02 \x03(\x04\x42\x02\x10\x01\x12\x11\n\x05shape\x18\x03 \x03(\x04\x42\x02\x10\x01\"H\n\rFloat64Tensor\x12\x12\n\x06values\x18\x01 \x03(\x01\x42\x02\x10\x01\x12\x10\n\x04keys\x18\x02 \x03(\x04\x42\x02\x10\x01\x12\x11\n\x05shape\x18\x03 \x03(\x04\x42\x02\x10\x01\"F\n\x0bInt32Tensor\x12\x12\n\x06values\x18\x01 \x03(\x05\x42\x02\x10\x01\x12\x10\n\x04keys\x18\x02 \x03(\x04\x42\x02\x10\x01\x12\x11\n\x05shape\x18\x03 \x03(\x04\x42\x02\x10\x01\",\n\x05\x42ytes\x12\r\n\x05value\x18\x01 \x03(\x0c\x12\x14\n\x0c\x63ontent_type\x18\x02 \x01(\t\"\xd3\x01\n\x05Value\x12\x34\n\x0e\x66loat32_tensor\x18\x02 \x01(\x0b\x32\x1a.aialgs.data.Float32TensorH\x00\x12\x34\n\x0e\x66loat64_tensor\x18\x03 \x01(\x0b\x32\x1a.aialgs.data.Float64TensorH\x00\x12\x30\n\x0cint32_tensor\x18\x07 \x01(\x0b\x32\x18.aialgs.data.Int32TensorH\x00\x12#\n\x05\x62ytes\x18\t \x01(\x0b\x32\x12.aialgs.data.BytesH\x00\x42\x07\n\x05value\"\xa9\x02\n\x06Record\x12\x33\n\x08\x66\x65\x61tures\x18\x01 \x03(\x0b\x32!.aialgs.data.Record.FeaturesEntry\x12-\n\x05label\x18\x02 \x03(\x0b\x32\x1e.aialgs.data.Record.LabelEntry\x12\x0b\n\x03uid\x18\x03 \x01(\t\x12\x10\n\x08metadata\x18\x04 \x01(\t\x12\x15\n\rconfiguration\x18\x05 \x01(\t\x1a\x43\n\rFeaturesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.aialgs.data.Value:\x02\x38\x01\x1a@\n\nLabelEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.aialgs.data.Value:\x02\x38\x01\x42\x30\n com.amazonaws.aialgorithms.protoB\x0cRecordProtos') + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'record_pb2', _globals) +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n com.amazonaws.aialgorithms.protoB\014RecordProtos' + _FLOAT32TENSOR.fields_by_name['values']._options = None + _FLOAT32TENSOR.fields_by_name['values']._serialized_options = b'\020\001' + _FLOAT32TENSOR.fields_by_name['keys']._options = None + _FLOAT32TENSOR.fields_by_name['keys']._serialized_options = b'\020\001' + _FLOAT32TENSOR.fields_by_name['shape']._options = None + _FLOAT32TENSOR.fields_by_name['shape']._serialized_options = b'\020\001' + _FLOAT64TENSOR.fields_by_name['values']._options = None + _FLOAT64TENSOR.fields_by_name['values']._serialized_options = b'\020\001' + _FLOAT64TENSOR.fields_by_name['keys']._options = None + _FLOAT64TENSOR.fields_by_name['keys']._serialized_options = b'\020\001' + _FLOAT64TENSOR.fields_by_name['shape']._options = None + _FLOAT64TENSOR.fields_by_name['shape']._serialized_options = b'\020\001' + _INT32TENSOR.fields_by_name['values']._options = None + _INT32TENSOR.fields_by_name['values']._serialized_options = b'\020\001' + _INT32TENSOR.fields_by_name['keys']._options = None + _INT32TENSOR.fields_by_name['keys']._serialized_options = b'\020\001' + _INT32TENSOR.fields_by_name['shape']._options = None + _INT32TENSOR.fields_by_name['shape']._serialized_options = b'\020\001' + _RECORD_FEATURESENTRY._options = None + _RECORD_FEATURESENTRY._serialized_options = b'8\001' + _RECORD_LABELENTRY._options = None + _RECORD_LABELENTRY._serialized_options = b'8\001' + _globals['_FLOAT32TENSOR']._serialized_start=29 + _globals['_FLOAT32TENSOR']._serialized_end=101 + _globals['_FLOAT64TENSOR']._serialized_start=103 + _globals['_FLOAT64TENSOR']._serialized_end=175 + _globals['_INT32TENSOR']._serialized_start=177 + _globals['_INT32TENSOR']._serialized_end=247 + _globals['_BYTES']._serialized_start=249 + _globals['_BYTES']._serialized_end=293 + _globals['_VALUE']._serialized_start=296 + _globals['_VALUE']._serialized_end=507 + _globals['_RECORD']._serialized_start=510 + _globals['_RECORD']._serialized_end=807 + _globals['_RECORD_FEATURESENTRY']._serialized_start=674 + _globals['_RECORD_FEATURESENTRY']._serialized_end=741 + _globals['_RECORD_LABELENTRY']._serialized_start=743 + _globals['_RECORD_LABELENTRY']._serialized_end=807 # @@protoc_insertion_point(module_scope) From 6554802bf66cf1c27ef8e0f5c7887519b42cb94f Mon Sep 17 00:00:00 2001 From: Clayton Parnell Date: Fri, 7 Jul 2023 12:33:07 -0700 Subject: [PATCH 2/9] formatting... --- src/sagemaker/amazon/record_pb2.py | 89 +++++++++++++++--------------- 1 file changed, 45 insertions(+), 44 deletions(-) diff --git a/src/sagemaker/amazon/record_pb2.py b/src/sagemaker/amazon/record_pb2.py index efa7cc89ed..efa973d55d 100644 --- a/src/sagemaker/amazon/record_pb2.py +++ b/src/sagemaker/amazon/record_pb2.py @@ -6,58 +6,59 @@ from google.protobuf import descriptor_pool as _descriptor_pool from google.protobuf import symbol_database as _symbol_database from google.protobuf.internal import builder as _builder + # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0crecord.proto\x12\x0b\x61ialgs.data\"H\n\rFloat32Tensor\x12\x12\n\x06values\x18\x01 \x03(\x02\x42\x02\x10\x01\x12\x10\n\x04keys\x18\x02 \x03(\x04\x42\x02\x10\x01\x12\x11\n\x05shape\x18\x03 \x03(\x04\x42\x02\x10\x01\"H\n\rFloat64Tensor\x12\x12\n\x06values\x18\x01 \x03(\x01\x42\x02\x10\x01\x12\x10\n\x04keys\x18\x02 \x03(\x04\x42\x02\x10\x01\x12\x11\n\x05shape\x18\x03 \x03(\x04\x42\x02\x10\x01\"F\n\x0bInt32Tensor\x12\x12\n\x06values\x18\x01 \x03(\x05\x42\x02\x10\x01\x12\x10\n\x04keys\x18\x02 \x03(\x04\x42\x02\x10\x01\x12\x11\n\x05shape\x18\x03 \x03(\x04\x42\x02\x10\x01\",\n\x05\x42ytes\x12\r\n\x05value\x18\x01 \x03(\x0c\x12\x14\n\x0c\x63ontent_type\x18\x02 \x01(\t\"\xd3\x01\n\x05Value\x12\x34\n\x0e\x66loat32_tensor\x18\x02 \x01(\x0b\x32\x1a.aialgs.data.Float32TensorH\x00\x12\x34\n\x0e\x66loat64_tensor\x18\x03 \x01(\x0b\x32\x1a.aialgs.data.Float64TensorH\x00\x12\x30\n\x0cint32_tensor\x18\x07 \x01(\x0b\x32\x18.aialgs.data.Int32TensorH\x00\x12#\n\x05\x62ytes\x18\t \x01(\x0b\x32\x12.aialgs.data.BytesH\x00\x42\x07\n\x05value\"\xa9\x02\n\x06Record\x12\x33\n\x08\x66\x65\x61tures\x18\x01 \x03(\x0b\x32!.aialgs.data.Record.FeaturesEntry\x12-\n\x05label\x18\x02 \x03(\x0b\x32\x1e.aialgs.data.Record.LabelEntry\x12\x0b\n\x03uid\x18\x03 \x01(\t\x12\x10\n\x08metadata\x18\x04 \x01(\t\x12\x15\n\rconfiguration\x18\x05 \x01(\t\x1a\x43\n\rFeaturesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.aialgs.data.Value:\x02\x38\x01\x1a@\n\nLabelEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.aialgs.data.Value:\x02\x38\x01\x42\x30\n com.amazonaws.aialgorithms.protoB\x0cRecordProtos') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( + b'\n\x0crecord.proto\x12\x0b\x61ialgs.data"H\n\rFloat32Tensor\x12\x12\n\x06values\x18\x01 \x03(\x02\x42\x02\x10\x01\x12\x10\n\x04keys\x18\x02 \x03(\x04\x42\x02\x10\x01\x12\x11\n\x05shape\x18\x03 \x03(\x04\x42\x02\x10\x01"H\n\rFloat64Tensor\x12\x12\n\x06values\x18\x01 \x03(\x01\x42\x02\x10\x01\x12\x10\n\x04keys\x18\x02 \x03(\x04\x42\x02\x10\x01\x12\x11\n\x05shape\x18\x03 \x03(\x04\x42\x02\x10\x01"F\n\x0bInt32Tensor\x12\x12\n\x06values\x18\x01 \x03(\x05\x42\x02\x10\x01\x12\x10\n\x04keys\x18\x02 \x03(\x04\x42\x02\x10\x01\x12\x11\n\x05shape\x18\x03 \x03(\x04\x42\x02\x10\x01",\n\x05\x42ytes\x12\r\n\x05value\x18\x01 \x03(\x0c\x12\x14\n\x0c\x63ontent_type\x18\x02 \x01(\t"\xd3\x01\n\x05Value\x12\x34\n\x0e\x66loat32_tensor\x18\x02 \x01(\x0b\x32\x1a.aialgs.data.Float32TensorH\x00\x12\x34\n\x0e\x66loat64_tensor\x18\x03 \x01(\x0b\x32\x1a.aialgs.data.Float64TensorH\x00\x12\x30\n\x0cint32_tensor\x18\x07 \x01(\x0b\x32\x18.aialgs.data.Int32TensorH\x00\x12#\n\x05\x62ytes\x18\t \x01(\x0b\x32\x12.aialgs.data.BytesH\x00\x42\x07\n\x05value"\xa9\x02\n\x06Record\x12\x33\n\x08\x66\x65\x61tures\x18\x01 \x03(\x0b\x32!.aialgs.data.Record.FeaturesEntry\x12-\n\x05label\x18\x02 \x03(\x0b\x32\x1e.aialgs.data.Record.LabelEntry\x12\x0b\n\x03uid\x18\x03 \x01(\t\x12\x10\n\x08metadata\x18\x04 \x01(\t\x12\x15\n\rconfiguration\x18\x05 \x01(\t\x1a\x43\n\rFeaturesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.aialgs.data.Value:\x02\x38\x01\x1a@\n\nLabelEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.aialgs.data.Value:\x02\x38\x01\x42\x30\n com.amazonaws.aialgorithms.protoB\x0cRecordProtos' +) _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'record_pb2', _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "record_pb2", _globals) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n com.amazonaws.aialgorithms.protoB\014RecordProtos' - _FLOAT32TENSOR.fields_by_name['values']._options = None - _FLOAT32TENSOR.fields_by_name['values']._serialized_options = b'\020\001' - _FLOAT32TENSOR.fields_by_name['keys']._options = None - _FLOAT32TENSOR.fields_by_name['keys']._serialized_options = b'\020\001' - _FLOAT32TENSOR.fields_by_name['shape']._options = None - _FLOAT32TENSOR.fields_by_name['shape']._serialized_options = b'\020\001' - _FLOAT64TENSOR.fields_by_name['values']._options = None - _FLOAT64TENSOR.fields_by_name['values']._serialized_options = b'\020\001' - _FLOAT64TENSOR.fields_by_name['keys']._options = None - _FLOAT64TENSOR.fields_by_name['keys']._serialized_options = b'\020\001' - _FLOAT64TENSOR.fields_by_name['shape']._options = None - _FLOAT64TENSOR.fields_by_name['shape']._serialized_options = b'\020\001' - _INT32TENSOR.fields_by_name['values']._options = None - _INT32TENSOR.fields_by_name['values']._serialized_options = b'\020\001' - _INT32TENSOR.fields_by_name['keys']._options = None - _INT32TENSOR.fields_by_name['keys']._serialized_options = b'\020\001' - _INT32TENSOR.fields_by_name['shape']._options = None - _INT32TENSOR.fields_by_name['shape']._serialized_options = b'\020\001' - _RECORD_FEATURESENTRY._options = None - _RECORD_FEATURESENTRY._serialized_options = b'8\001' - _RECORD_LABELENTRY._options = None - _RECORD_LABELENTRY._serialized_options = b'8\001' - _globals['_FLOAT32TENSOR']._serialized_start=29 - _globals['_FLOAT32TENSOR']._serialized_end=101 - _globals['_FLOAT64TENSOR']._serialized_start=103 - _globals['_FLOAT64TENSOR']._serialized_end=175 - _globals['_INT32TENSOR']._serialized_start=177 - _globals['_INT32TENSOR']._serialized_end=247 - _globals['_BYTES']._serialized_start=249 - _globals['_BYTES']._serialized_end=293 - _globals['_VALUE']._serialized_start=296 - _globals['_VALUE']._serialized_end=507 - _globals['_RECORD']._serialized_start=510 - _globals['_RECORD']._serialized_end=807 - _globals['_RECORD_FEATURESENTRY']._serialized_start=674 - _globals['_RECORD_FEATURESENTRY']._serialized_end=741 - _globals['_RECORD_LABELENTRY']._serialized_start=743 - _globals['_RECORD_LABELENTRY']._serialized_end=807 + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b"\n com.amazonaws.aialgorithms.protoB\014RecordProtos" + _FLOAT32TENSOR.fields_by_name["values"]._options = None + _FLOAT32TENSOR.fields_by_name["values"]._serialized_options = b"\020\001" + _FLOAT32TENSOR.fields_by_name["keys"]._options = None + _FLOAT32TENSOR.fields_by_name["keys"]._serialized_options = b"\020\001" + _FLOAT32TENSOR.fields_by_name["shape"]._options = None + _FLOAT32TENSOR.fields_by_name["shape"]._serialized_options = b"\020\001" + _FLOAT64TENSOR.fields_by_name["values"]._options = None + _FLOAT64TENSOR.fields_by_name["values"]._serialized_options = b"\020\001" + _FLOAT64TENSOR.fields_by_name["keys"]._options = None + _FLOAT64TENSOR.fields_by_name["keys"]._serialized_options = b"\020\001" + _FLOAT64TENSOR.fields_by_name["shape"]._options = None + _FLOAT64TENSOR.fields_by_name["shape"]._serialized_options = b"\020\001" + _INT32TENSOR.fields_by_name["values"]._options = None + _INT32TENSOR.fields_by_name["values"]._serialized_options = b"\020\001" + _INT32TENSOR.fields_by_name["keys"]._options = None + _INT32TENSOR.fields_by_name["keys"]._serialized_options = b"\020\001" + _INT32TENSOR.fields_by_name["shape"]._options = None + _INT32TENSOR.fields_by_name["shape"]._serialized_options = b"\020\001" + _RECORD_FEATURESENTRY._options = None + _RECORD_FEATURESENTRY._serialized_options = b"8\001" + _RECORD_LABELENTRY._options = None + _RECORD_LABELENTRY._serialized_options = b"8\001" + _globals["_FLOAT32TENSOR"]._serialized_start = 29 + _globals["_FLOAT32TENSOR"]._serialized_end = 101 + _globals["_FLOAT64TENSOR"]._serialized_start = 103 + _globals["_FLOAT64TENSOR"]._serialized_end = 175 + _globals["_INT32TENSOR"]._serialized_start = 177 + _globals["_INT32TENSOR"]._serialized_end = 247 + _globals["_BYTES"]._serialized_start = 249 + _globals["_BYTES"]._serialized_end = 293 + _globals["_VALUE"]._serialized_start = 296 + _globals["_VALUE"]._serialized_end = 507 + _globals["_RECORD"]._serialized_start = 510 + _globals["_RECORD"]._serialized_end = 807 + _globals["_RECORD_FEATURESENTRY"]._serialized_start = 674 + _globals["_RECORD_FEATURESENTRY"]._serialized_end = 741 + _globals["_RECORD_LABELENTRY"]._serialized_start = 743 + _globals["_RECORD_LABELENTRY"]._serialized_end = 807 # @@protoc_insertion_point(module_scope) From 22cc812ad3b8550603eb57a099b9baad2a42bd6a Mon Sep 17 00:00:00 2001 From: Clayton Parnell Date: Fri, 7 Jul 2023 14:26:04 -0700 Subject: [PATCH 3/9] Ignore pylint --- src/sagemaker/amazon/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sagemaker/amazon/common.py b/src/sagemaker/amazon/common.py index 4632bda628..aa6b01a16f 100644 --- a/src/sagemaker/amazon/common.py +++ b/src/sagemaker/amazon/common.py @@ -20,7 +20,7 @@ import numpy as np -from sagemaker.amazon.record_pb2 import Record +from sagemaker.amazon.record_pb2 import Record # pylint: disable=E0611 from sagemaker.deprecations import deprecated_class from sagemaker.deserializers import SimpleBaseDeserializer from sagemaker.serializers import SimpleBaseSerializer From 64c2fce343d445c52579bd54adddd701a6ff6ace Mon Sep 17 00:00:00 2001 From: Clayton Parnell Date: Fri, 7 Jul 2023 14:26:25 -0700 Subject: [PATCH 4/9] Add .proto file for ref --- src/sagemaker/amazon/record.proto | 123 ++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 src/sagemaker/amazon/record.proto diff --git a/src/sagemaker/amazon/record.proto b/src/sagemaker/amazon/record.proto new file mode 100644 index 0000000000..6b9fb51474 --- /dev/null +++ b/src/sagemaker/amazon/record.proto @@ -0,0 +1,123 @@ +syntax = "proto2"; + +package aialgs.data; + +option java_package = "com.amazonaws.aialgorithms.proto"; +option java_outer_classname = "RecordProtos"; + +// A sparse or dense rank-R tensor that stores data as doubles (float64). +message Float32Tensor { + // Each value in the vector. If keys is empty this is treated as a + // dense vector. + repeated float values = 1 [packed = true]; + + // If not empty then the vector is treated as sparse with + // each key specifying the location of the value in the sparse vector. + repeated uint64 keys = 2 [packed = true]; + + // Optional shape which will allow the vector to represent a matrix. + // e.g. if shape = [ 10, 20 ] then floor(keys[i] / 10) will give the row + // and keys[i] % 20 will give the column. + // This also supports n-dimensonal tensors. + // NB. this must be specified if the tensor is sparse. + repeated uint64 shape = 3 [packed = true]; +} + +// A sparse or dense rank-R tensor that stores data as doubles (float64). +message Float64Tensor { + // Each value in the vector. If keys is empty this is treated as a + // dense vector. + repeated double values = 1 [packed = true]; + + // If not empty then the vector is treated as sparse with + // each key specifying the location of the value in the sparse vector. + repeated uint64 keys = 2 [packed = true]; + + // Optional shape which will allow the vector to represent a matrix. + // e.g. if shape = [ 10, 20 ] then floor(keys[i] / 10) will give the row + // and keys[i] % 20 will give the column. + // This also supports n-dimensonal tensors. + // NB. this must be specified if the tensor is sparse. + repeated uint64 shape = 3 [packed = true]; +} + +// A sparse or dense rank-R tensor that stores data as 32-bit ints (int32). +message Int32Tensor { + // Each value in the vector. If keys is empty this is treated as a + // dense vector. + repeated int32 values = 1 [packed = true]; + + // If not empty then the vector is treated as sparse with + // each key specifying the location of the value in the sparse vector. + repeated uint64 keys = 2 [packed = true]; + + // Optional shape which will allow the vector to represent a matrix. + // e.g. if shape = [ 10, 20 ] then floor(keys[i] / 10) will give the row + // and keys[i] % 20 will give the column. + // This also supports n-dimensonal tensors. + // NB. this must be specified if the tensor is sparse. + repeated uint64 shape = 3 [packed = true]; +} + +// Support for storing binary data for parsing in other ways (such as JPEG/etc). +// This is an example of another type of value and may not immediately be supported. +message Bytes { + repeated bytes value = 1; + + // Stores the content type of the data if known. + // This will allow the possibility of using decoders for common formats + // in the future. + optional string content_type = 2; +} + +message Value { + oneof value { + // The numbering assumes the possible use of: + // - float16, float128 + // - int8, int16, int32 + Float32Tensor float32_tensor = 2; + Float64Tensor float64_tensor = 3; + Int32Tensor int32_tensor = 7; + Bytes bytes = 9; + } +} + +message Record { + // Map from the name of the feature to the value. + // + // For vectors and libsvm-like datasets, + // a single feature with the name `values` + // should be specified. + map features = 1; + + // Optional set of labels for this record. + // Similar to features field above, the key used for + // generic scalar / vector labels should ve 'values' + map label = 2; + + // Unique identifier for this record in the dataset. + // + // Whilst not necessary, this allows better + // debugging where there are data issues. + // + // This is not used by the algorithm directly. + optional string uid = 3; + + // Textual metadata describing the record. + // + // This may include JSON-serialized information + // about the source of the record. + // + // This is not used by the algorithm directly. + optional string metadata = 4; + + // Optional serialized JSON object that allows per-record + // hyper-parameters/configuration/other information to be set. + // + // The meaning/interpretation of this field is defined by + // the algorithm author and may not be supported. + // + // This is used to pass additional inference configuration + // when batch inference is used (e.g. types of scores to return). + optional string configuration = 5; +} From 0e2f799e725437ab387ac7aed86166032d4049ac Mon Sep 17 00:00:00 2001 From: Clayton Parnell Date: Fri, 7 Jul 2023 14:26:40 -0700 Subject: [PATCH 5/9] Add pyi file --- src/sagemaker/amazon/record_pb2.pyi | 84 +++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 src/sagemaker/amazon/record_pb2.pyi diff --git a/src/sagemaker/amazon/record_pb2.pyi b/src/sagemaker/amazon/record_pb2.pyi new file mode 100644 index 0000000000..6fc06fcfb9 --- /dev/null +++ b/src/sagemaker/amazon/record_pb2.pyi @@ -0,0 +1,84 @@ +from google.protobuf.internal import containers as _containers +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union + +DESCRIPTOR: _descriptor.FileDescriptor + +class Float32Tensor(_message.Message): + __slots__ = ["values", "keys", "shape"] + VALUES_FIELD_NUMBER: _ClassVar[int] + KEYS_FIELD_NUMBER: _ClassVar[int] + SHAPE_FIELD_NUMBER: _ClassVar[int] + values: _containers.RepeatedScalarFieldContainer[float] + keys: _containers.RepeatedScalarFieldContainer[int] + shape: _containers.RepeatedScalarFieldContainer[int] + def __init__(self, values: _Optional[_Iterable[float]] = ..., keys: _Optional[_Iterable[int]] = ..., shape: _Optional[_Iterable[int]] = ...) -> None: ... + +class Float64Tensor(_message.Message): + __slots__ = ["values", "keys", "shape"] + VALUES_FIELD_NUMBER: _ClassVar[int] + KEYS_FIELD_NUMBER: _ClassVar[int] + SHAPE_FIELD_NUMBER: _ClassVar[int] + values: _containers.RepeatedScalarFieldContainer[float] + keys: _containers.RepeatedScalarFieldContainer[int] + shape: _containers.RepeatedScalarFieldContainer[int] + def __init__(self, values: _Optional[_Iterable[float]] = ..., keys: _Optional[_Iterable[int]] = ..., shape: _Optional[_Iterable[int]] = ...) -> None: ... + +class Int32Tensor(_message.Message): + __slots__ = ["values", "keys", "shape"] + VALUES_FIELD_NUMBER: _ClassVar[int] + KEYS_FIELD_NUMBER: _ClassVar[int] + SHAPE_FIELD_NUMBER: _ClassVar[int] + values: _containers.RepeatedScalarFieldContainer[int] + keys: _containers.RepeatedScalarFieldContainer[int] + shape: _containers.RepeatedScalarFieldContainer[int] + def __init__(self, values: _Optional[_Iterable[int]] = ..., keys: _Optional[_Iterable[int]] = ..., shape: _Optional[_Iterable[int]] = ...) -> None: ... + +class Bytes(_message.Message): + __slots__ = ["value", "content_type"] + VALUE_FIELD_NUMBER: _ClassVar[int] + CONTENT_TYPE_FIELD_NUMBER: _ClassVar[int] + value: _containers.RepeatedScalarFieldContainer[bytes] + content_type: str + def __init__(self, value: _Optional[_Iterable[bytes]] = ..., content_type: _Optional[str] = ...) -> None: ... + +class Value(_message.Message): + __slots__ = ["float32_tensor", "float64_tensor", "int32_tensor", "bytes"] + FLOAT32_TENSOR_FIELD_NUMBER: _ClassVar[int] + FLOAT64_TENSOR_FIELD_NUMBER: _ClassVar[int] + INT32_TENSOR_FIELD_NUMBER: _ClassVar[int] + BYTES_FIELD_NUMBER: _ClassVar[int] + float32_tensor: Float32Tensor + float64_tensor: Float64Tensor + int32_tensor: Int32Tensor + bytes: Bytes + def __init__(self, float32_tensor: _Optional[_Union[Float32Tensor, _Mapping]] = ..., float64_tensor: _Optional[_Union[Float64Tensor, _Mapping]] = ..., int32_tensor: _Optional[_Union[Int32Tensor, _Mapping]] = ..., bytes: _Optional[_Union[Bytes, _Mapping]] = ...) -> None: ... + +class Record(_message.Message): + __slots__ = ["features", "label", "uid", "metadata", "configuration"] + class FeaturesEntry(_message.Message): + __slots__ = ["key", "value"] + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: Value + def __init__(self, key: _Optional[str] = ..., value: _Optional[_Union[Value, _Mapping]] = ...) -> None: ... + class LabelEntry(_message.Message): + __slots__ = ["key", "value"] + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: Value + def __init__(self, key: _Optional[str] = ..., value: _Optional[_Union[Value, _Mapping]] = ...) -> None: ... + FEATURES_FIELD_NUMBER: _ClassVar[int] + LABEL_FIELD_NUMBER: _ClassVar[int] + UID_FIELD_NUMBER: _ClassVar[int] + METADATA_FIELD_NUMBER: _ClassVar[int] + CONFIGURATION_FIELD_NUMBER: _ClassVar[int] + features: _containers.MessageMap[str, Value] + label: _containers.MessageMap[str, Value] + uid: str + metadata: str + configuration: str + def __init__(self, features: _Optional[_Mapping[str, Value]] = ..., label: _Optional[_Mapping[str, Value]] = ..., uid: _Optional[str] = ..., metadata: _Optional[str] = ..., configuration: _Optional[str] = ...) -> None: ... From de5f799da7fcf9f9e9a2dbe9369b3cf0ec090386 Mon Sep 17 00:00:00 2001 From: Clayton Parnell Date: Fri, 7 Jul 2023 14:37:16 -0700 Subject: [PATCH 6/9] more formatting.. --- src/sagemaker/amazon/common.py | 2 +- src/sagemaker/amazon/record_pb2.pyi | 60 ++++++++++++++++++++++++----- 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/src/sagemaker/amazon/common.py b/src/sagemaker/amazon/common.py index aa6b01a16f..3c6511b7a6 100644 --- a/src/sagemaker/amazon/common.py +++ b/src/sagemaker/amazon/common.py @@ -20,7 +20,7 @@ import numpy as np -from sagemaker.amazon.record_pb2 import Record # pylint: disable=E0611 +from sagemaker.amazon.record_pb2 import Record # pylint: disable=E0611 from sagemaker.deprecations import deprecated_class from sagemaker.deserializers import SimpleBaseDeserializer from sagemaker.serializers import SimpleBaseSerializer diff --git a/src/sagemaker/amazon/record_pb2.pyi b/src/sagemaker/amazon/record_pb2.pyi index 6fc06fcfb9..11a9fbc243 100644 --- a/src/sagemaker/amazon/record_pb2.pyi +++ b/src/sagemaker/amazon/record_pb2.pyi @@ -1,7 +1,13 @@ from google.protobuf.internal import containers as _containers from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message -from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union +from typing import ( + ClassVar as _ClassVar, + Iterable as _Iterable, + Mapping as _Mapping, + Optional as _Optional, + Union as _Union, +) DESCRIPTOR: _descriptor.FileDescriptor @@ -13,7 +19,12 @@ class Float32Tensor(_message.Message): values: _containers.RepeatedScalarFieldContainer[float] keys: _containers.RepeatedScalarFieldContainer[int] shape: _containers.RepeatedScalarFieldContainer[int] - def __init__(self, values: _Optional[_Iterable[float]] = ..., keys: _Optional[_Iterable[int]] = ..., shape: _Optional[_Iterable[int]] = ...) -> None: ... + def __init__( + self, + values: _Optional[_Iterable[float]] = ..., + keys: _Optional[_Iterable[int]] = ..., + shape: _Optional[_Iterable[int]] = ..., + ) -> None: ... class Float64Tensor(_message.Message): __slots__ = ["values", "keys", "shape"] @@ -23,7 +34,12 @@ class Float64Tensor(_message.Message): values: _containers.RepeatedScalarFieldContainer[float] keys: _containers.RepeatedScalarFieldContainer[int] shape: _containers.RepeatedScalarFieldContainer[int] - def __init__(self, values: _Optional[_Iterable[float]] = ..., keys: _Optional[_Iterable[int]] = ..., shape: _Optional[_Iterable[int]] = ...) -> None: ... + def __init__( + self, + values: _Optional[_Iterable[float]] = ..., + keys: _Optional[_Iterable[int]] = ..., + shape: _Optional[_Iterable[int]] = ..., + ) -> None: ... class Int32Tensor(_message.Message): __slots__ = ["values", "keys", "shape"] @@ -33,7 +49,12 @@ class Int32Tensor(_message.Message): values: _containers.RepeatedScalarFieldContainer[int] keys: _containers.RepeatedScalarFieldContainer[int] shape: _containers.RepeatedScalarFieldContainer[int] - def __init__(self, values: _Optional[_Iterable[int]] = ..., keys: _Optional[_Iterable[int]] = ..., shape: _Optional[_Iterable[int]] = ...) -> None: ... + def __init__( + self, + values: _Optional[_Iterable[int]] = ..., + keys: _Optional[_Iterable[int]] = ..., + shape: _Optional[_Iterable[int]] = ..., + ) -> None: ... class Bytes(_message.Message): __slots__ = ["value", "content_type"] @@ -41,7 +62,9 @@ class Bytes(_message.Message): CONTENT_TYPE_FIELD_NUMBER: _ClassVar[int] value: _containers.RepeatedScalarFieldContainer[bytes] content_type: str - def __init__(self, value: _Optional[_Iterable[bytes]] = ..., content_type: _Optional[str] = ...) -> None: ... + def __init__( + self, value: _Optional[_Iterable[bytes]] = ..., content_type: _Optional[str] = ... + ) -> None: ... class Value(_message.Message): __slots__ = ["float32_tensor", "float64_tensor", "int32_tensor", "bytes"] @@ -53,24 +76,36 @@ class Value(_message.Message): float64_tensor: Float64Tensor int32_tensor: Int32Tensor bytes: Bytes - def __init__(self, float32_tensor: _Optional[_Union[Float32Tensor, _Mapping]] = ..., float64_tensor: _Optional[_Union[Float64Tensor, _Mapping]] = ..., int32_tensor: _Optional[_Union[Int32Tensor, _Mapping]] = ..., bytes: _Optional[_Union[Bytes, _Mapping]] = ...) -> None: ... + def __init__( + self, + float32_tensor: _Optional[_Union[Float32Tensor, _Mapping]] = ..., + float64_tensor: _Optional[_Union[Float64Tensor, _Mapping]] = ..., + int32_tensor: _Optional[_Union[Int32Tensor, _Mapping]] = ..., + bytes: _Optional[_Union[Bytes, _Mapping]] = ..., + ) -> None: ... class Record(_message.Message): __slots__ = ["features", "label", "uid", "metadata", "configuration"] + class FeaturesEntry(_message.Message): __slots__ = ["key", "value"] KEY_FIELD_NUMBER: _ClassVar[int] VALUE_FIELD_NUMBER: _ClassVar[int] key: str value: Value - def __init__(self, key: _Optional[str] = ..., value: _Optional[_Union[Value, _Mapping]] = ...) -> None: ... + def __init__( + self, key: _Optional[str] = ..., value: _Optional[_Union[Value, _Mapping]] = ... + ) -> None: ... + class LabelEntry(_message.Message): __slots__ = ["key", "value"] KEY_FIELD_NUMBER: _ClassVar[int] VALUE_FIELD_NUMBER: _ClassVar[int] key: str value: Value - def __init__(self, key: _Optional[str] = ..., value: _Optional[_Union[Value, _Mapping]] = ...) -> None: ... + def __init__( + self, key: _Optional[str] = ..., value: _Optional[_Union[Value, _Mapping]] = ... + ) -> None: ... FEATURES_FIELD_NUMBER: _ClassVar[int] LABEL_FIELD_NUMBER: _ClassVar[int] UID_FIELD_NUMBER: _ClassVar[int] @@ -81,4 +116,11 @@ class Record(_message.Message): uid: str metadata: str configuration: str - def __init__(self, features: _Optional[_Mapping[str, Value]] = ..., label: _Optional[_Mapping[str, Value]] = ..., uid: _Optional[str] = ..., metadata: _Optional[str] = ..., configuration: _Optional[str] = ...) -> None: ... + def __init__( + self, + features: _Optional[_Mapping[str, Value]] = ..., + label: _Optional[_Mapping[str, Value]] = ..., + uid: _Optional[str] = ..., + metadata: _Optional[str] = ..., + configuration: _Optional[str] = ..., + ) -> None: ... From d92e87ab6fb282d805250517da2a55f82a9f0abb Mon Sep 17 00:00:00 2001 From: Clayton Parnell Date: Fri, 7 Jul 2023 15:45:16 -0700 Subject: [PATCH 7/9] Raise lower bound of protobuf --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7a59b47f02..913295d991 100644 --- a/setup.py +++ b/setup.py @@ -52,7 +52,7 @@ def read_requirements(filename): "cloudpickle==2.2.1", "google-pasta", "numpy>=1.9.0,<2.0", - "protobuf>=3.1,<5.0", + "protobuf>=4,<5.0", "smdebug_rulesconfig==1.0.1", "importlib-metadata>=1.4.0,<7.0", "packaging>=20.0", From 887f74fd0111df31a064fa7a27eb068877a89d95 Mon Sep 17 00:00:00 2001 From: Clayton Parnell Date: Mon, 10 Jul 2023 13:11:26 -0700 Subject: [PATCH 8/9] Support protobuf 3 AND 4 --- setup.py | 2 +- src/sagemaker/amazon/common.py | 2 +- src/sagemaker/amazon/record_pb2.py | 133 +++++++++++++++++++++++----- src/sagemaker/amazon/record_pb2.pyi | 126 -------------------------- 4 files changed, 115 insertions(+), 148 deletions(-) delete mode 100644 src/sagemaker/amazon/record_pb2.pyi diff --git a/setup.py b/setup.py index 913295d991..7a59b47f02 100644 --- a/setup.py +++ b/setup.py @@ -52,7 +52,7 @@ def read_requirements(filename): "cloudpickle==2.2.1", "google-pasta", "numpy>=1.9.0,<2.0", - "protobuf>=4,<5.0", + "protobuf>=3.1,<5.0", "smdebug_rulesconfig==1.0.1", "importlib-metadata>=1.4.0,<7.0", "packaging>=20.0", diff --git a/src/sagemaker/amazon/common.py b/src/sagemaker/amazon/common.py index 3c6511b7a6..4632bda628 100644 --- a/src/sagemaker/amazon/common.py +++ b/src/sagemaker/amazon/common.py @@ -20,7 +20,7 @@ import numpy as np -from sagemaker.amazon.record_pb2 import Record # pylint: disable=E0611 +from sagemaker.amazon.record_pb2 import Record from sagemaker.deprecations import deprecated_class from sagemaker.deserializers import SimpleBaseDeserializer from sagemaker.serializers import SimpleBaseSerializer diff --git a/src/sagemaker/amazon/record_pb2.py b/src/sagemaker/amazon/record_pb2.py index efa973d55d..d06b38663b 100644 --- a/src/sagemaker/amazon/record_pb2.py +++ b/src/sagemaker/amazon/record_pb2.py @@ -4,8 +4,9 @@ """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder # @@protoc_insertion_point(imports) @@ -16,9 +17,101 @@ b'\n\x0crecord.proto\x12\x0b\x61ialgs.data"H\n\rFloat32Tensor\x12\x12\n\x06values\x18\x01 \x03(\x02\x42\x02\x10\x01\x12\x10\n\x04keys\x18\x02 \x03(\x04\x42\x02\x10\x01\x12\x11\n\x05shape\x18\x03 \x03(\x04\x42\x02\x10\x01"H\n\rFloat64Tensor\x12\x12\n\x06values\x18\x01 \x03(\x01\x42\x02\x10\x01\x12\x10\n\x04keys\x18\x02 \x03(\x04\x42\x02\x10\x01\x12\x11\n\x05shape\x18\x03 \x03(\x04\x42\x02\x10\x01"F\n\x0bInt32Tensor\x12\x12\n\x06values\x18\x01 \x03(\x05\x42\x02\x10\x01\x12\x10\n\x04keys\x18\x02 \x03(\x04\x42\x02\x10\x01\x12\x11\n\x05shape\x18\x03 \x03(\x04\x42\x02\x10\x01",\n\x05\x42ytes\x12\r\n\x05value\x18\x01 \x03(\x0c\x12\x14\n\x0c\x63ontent_type\x18\x02 \x01(\t"\xd3\x01\n\x05Value\x12\x34\n\x0e\x66loat32_tensor\x18\x02 \x01(\x0b\x32\x1a.aialgs.data.Float32TensorH\x00\x12\x34\n\x0e\x66loat64_tensor\x18\x03 \x01(\x0b\x32\x1a.aialgs.data.Float64TensorH\x00\x12\x30\n\x0cint32_tensor\x18\x07 \x01(\x0b\x32\x18.aialgs.data.Int32TensorH\x00\x12#\n\x05\x62ytes\x18\t \x01(\x0b\x32\x12.aialgs.data.BytesH\x00\x42\x07\n\x05value"\xa9\x02\n\x06Record\x12\x33\n\x08\x66\x65\x61tures\x18\x01 \x03(\x0b\x32!.aialgs.data.Record.FeaturesEntry\x12-\n\x05label\x18\x02 \x03(\x0b\x32\x1e.aialgs.data.Record.LabelEntry\x12\x0b\n\x03uid\x18\x03 \x01(\t\x12\x10\n\x08metadata\x18\x04 \x01(\t\x12\x15\n\rconfiguration\x18\x05 \x01(\t\x1a\x43\n\rFeaturesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.aialgs.data.Value:\x02\x38\x01\x1a@\n\nLabelEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.aialgs.data.Value:\x02\x38\x01\x42\x30\n com.amazonaws.aialgorithms.protoB\x0cRecordProtos' ) -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "record_pb2", _globals) + +_FLOAT32TENSOR = DESCRIPTOR.message_types_by_name["Float32Tensor"] +_FLOAT64TENSOR = DESCRIPTOR.message_types_by_name["Float64Tensor"] +_INT32TENSOR = DESCRIPTOR.message_types_by_name["Int32Tensor"] +_BYTES = DESCRIPTOR.message_types_by_name["Bytes"] +_VALUE = DESCRIPTOR.message_types_by_name["Value"] +_RECORD = DESCRIPTOR.message_types_by_name["Record"] +_RECORD_FEATURESENTRY = _RECORD.nested_types_by_name["FeaturesEntry"] +_RECORD_LABELENTRY = _RECORD.nested_types_by_name["LabelEntry"] +Float32Tensor = _reflection.GeneratedProtocolMessageType( + "Float32Tensor", + (_message.Message,), + { + "DESCRIPTOR": _FLOAT32TENSOR, + "__module__": "record_pb2" + # @@protoc_insertion_point(class_scope:aialgs.data.Float32Tensor) + }, +) +_sym_db.RegisterMessage(Float32Tensor) + +Float64Tensor = _reflection.GeneratedProtocolMessageType( + "Float64Tensor", + (_message.Message,), + { + "DESCRIPTOR": _FLOAT64TENSOR, + "__module__": "record_pb2" + # @@protoc_insertion_point(class_scope:aialgs.data.Float64Tensor) + }, +) +_sym_db.RegisterMessage(Float64Tensor) + +Int32Tensor = _reflection.GeneratedProtocolMessageType( + "Int32Tensor", + (_message.Message,), + { + "DESCRIPTOR": _INT32TENSOR, + "__module__": "record_pb2" + # @@protoc_insertion_point(class_scope:aialgs.data.Int32Tensor) + }, +) +_sym_db.RegisterMessage(Int32Tensor) + +Bytes = _reflection.GeneratedProtocolMessageType( + "Bytes", + (_message.Message,), + { + "DESCRIPTOR": _BYTES, + "__module__": "record_pb2" + # @@protoc_insertion_point(class_scope:aialgs.data.Bytes) + }, +) +_sym_db.RegisterMessage(Bytes) + +Value = _reflection.GeneratedProtocolMessageType( + "Value", + (_message.Message,), + { + "DESCRIPTOR": _VALUE, + "__module__": "record_pb2" + # @@protoc_insertion_point(class_scope:aialgs.data.Value) + }, +) +_sym_db.RegisterMessage(Value) + +Record = _reflection.GeneratedProtocolMessageType( + "Record", + (_message.Message,), + { + "FeaturesEntry": _reflection.GeneratedProtocolMessageType( + "FeaturesEntry", + (_message.Message,), + { + "DESCRIPTOR": _RECORD_FEATURESENTRY, + "__module__": "record_pb2" + # @@protoc_insertion_point(class_scope:aialgs.data.Record.FeaturesEntry) + }, + ), + "LabelEntry": _reflection.GeneratedProtocolMessageType( + "LabelEntry", + (_message.Message,), + { + "DESCRIPTOR": _RECORD_LABELENTRY, + "__module__": "record_pb2" + # @@protoc_insertion_point(class_scope:aialgs.data.Record.LabelEntry) + }, + ), + "DESCRIPTOR": _RECORD, + "__module__": "record_pb2" + # @@protoc_insertion_point(class_scope:aialgs.data.Record) + }, +) +_sym_db.RegisterMessage(Record) +_sym_db.RegisterMessage(Record.FeaturesEntry) +_sym_db.RegisterMessage(Record.LabelEntry) + if _descriptor._USE_C_DESCRIPTORS == False: DESCRIPTOR._options = None @@ -45,20 +138,20 @@ _RECORD_FEATURESENTRY._serialized_options = b"8\001" _RECORD_LABELENTRY._options = None _RECORD_LABELENTRY._serialized_options = b"8\001" - _globals["_FLOAT32TENSOR"]._serialized_start = 29 - _globals["_FLOAT32TENSOR"]._serialized_end = 101 - _globals["_FLOAT64TENSOR"]._serialized_start = 103 - _globals["_FLOAT64TENSOR"]._serialized_end = 175 - _globals["_INT32TENSOR"]._serialized_start = 177 - _globals["_INT32TENSOR"]._serialized_end = 247 - _globals["_BYTES"]._serialized_start = 249 - _globals["_BYTES"]._serialized_end = 293 - _globals["_VALUE"]._serialized_start = 296 - _globals["_VALUE"]._serialized_end = 507 - _globals["_RECORD"]._serialized_start = 510 - _globals["_RECORD"]._serialized_end = 807 - _globals["_RECORD_FEATURESENTRY"]._serialized_start = 674 - _globals["_RECORD_FEATURESENTRY"]._serialized_end = 741 - _globals["_RECORD_LABELENTRY"]._serialized_start = 743 - _globals["_RECORD_LABELENTRY"]._serialized_end = 807 + _FLOAT32TENSOR._serialized_start = 29 + _FLOAT32TENSOR._serialized_end = 101 + _FLOAT64TENSOR._serialized_start = 103 + _FLOAT64TENSOR._serialized_end = 175 + _INT32TENSOR._serialized_start = 177 + _INT32TENSOR._serialized_end = 247 + _BYTES._serialized_start = 249 + _BYTES._serialized_end = 293 + _VALUE._serialized_start = 296 + _VALUE._serialized_end = 507 + _RECORD._serialized_start = 510 + _RECORD._serialized_end = 807 + _RECORD_FEATURESENTRY._serialized_start = 674 + _RECORD_FEATURESENTRY._serialized_end = 741 + _RECORD_LABELENTRY._serialized_start = 743 + _RECORD_LABELENTRY._serialized_end = 807 # @@protoc_insertion_point(module_scope) diff --git a/src/sagemaker/amazon/record_pb2.pyi b/src/sagemaker/amazon/record_pb2.pyi deleted file mode 100644 index 11a9fbc243..0000000000 --- a/src/sagemaker/amazon/record_pb2.pyi +++ /dev/null @@ -1,126 +0,0 @@ -from google.protobuf.internal import containers as _containers -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from typing import ( - ClassVar as _ClassVar, - Iterable as _Iterable, - Mapping as _Mapping, - Optional as _Optional, - Union as _Union, -) - -DESCRIPTOR: _descriptor.FileDescriptor - -class Float32Tensor(_message.Message): - __slots__ = ["values", "keys", "shape"] - VALUES_FIELD_NUMBER: _ClassVar[int] - KEYS_FIELD_NUMBER: _ClassVar[int] - SHAPE_FIELD_NUMBER: _ClassVar[int] - values: _containers.RepeatedScalarFieldContainer[float] - keys: _containers.RepeatedScalarFieldContainer[int] - shape: _containers.RepeatedScalarFieldContainer[int] - def __init__( - self, - values: _Optional[_Iterable[float]] = ..., - keys: _Optional[_Iterable[int]] = ..., - shape: _Optional[_Iterable[int]] = ..., - ) -> None: ... - -class Float64Tensor(_message.Message): - __slots__ = ["values", "keys", "shape"] - VALUES_FIELD_NUMBER: _ClassVar[int] - KEYS_FIELD_NUMBER: _ClassVar[int] - SHAPE_FIELD_NUMBER: _ClassVar[int] - values: _containers.RepeatedScalarFieldContainer[float] - keys: _containers.RepeatedScalarFieldContainer[int] - shape: _containers.RepeatedScalarFieldContainer[int] - def __init__( - self, - values: _Optional[_Iterable[float]] = ..., - keys: _Optional[_Iterable[int]] = ..., - shape: _Optional[_Iterable[int]] = ..., - ) -> None: ... - -class Int32Tensor(_message.Message): - __slots__ = ["values", "keys", "shape"] - VALUES_FIELD_NUMBER: _ClassVar[int] - KEYS_FIELD_NUMBER: _ClassVar[int] - SHAPE_FIELD_NUMBER: _ClassVar[int] - values: _containers.RepeatedScalarFieldContainer[int] - keys: _containers.RepeatedScalarFieldContainer[int] - shape: _containers.RepeatedScalarFieldContainer[int] - def __init__( - self, - values: _Optional[_Iterable[int]] = ..., - keys: _Optional[_Iterable[int]] = ..., - shape: _Optional[_Iterable[int]] = ..., - ) -> None: ... - -class Bytes(_message.Message): - __slots__ = ["value", "content_type"] - VALUE_FIELD_NUMBER: _ClassVar[int] - CONTENT_TYPE_FIELD_NUMBER: _ClassVar[int] - value: _containers.RepeatedScalarFieldContainer[bytes] - content_type: str - def __init__( - self, value: _Optional[_Iterable[bytes]] = ..., content_type: _Optional[str] = ... - ) -> None: ... - -class Value(_message.Message): - __slots__ = ["float32_tensor", "float64_tensor", "int32_tensor", "bytes"] - FLOAT32_TENSOR_FIELD_NUMBER: _ClassVar[int] - FLOAT64_TENSOR_FIELD_NUMBER: _ClassVar[int] - INT32_TENSOR_FIELD_NUMBER: _ClassVar[int] - BYTES_FIELD_NUMBER: _ClassVar[int] - float32_tensor: Float32Tensor - float64_tensor: Float64Tensor - int32_tensor: Int32Tensor - bytes: Bytes - def __init__( - self, - float32_tensor: _Optional[_Union[Float32Tensor, _Mapping]] = ..., - float64_tensor: _Optional[_Union[Float64Tensor, _Mapping]] = ..., - int32_tensor: _Optional[_Union[Int32Tensor, _Mapping]] = ..., - bytes: _Optional[_Union[Bytes, _Mapping]] = ..., - ) -> None: ... - -class Record(_message.Message): - __slots__ = ["features", "label", "uid", "metadata", "configuration"] - - class FeaturesEntry(_message.Message): - __slots__ = ["key", "value"] - KEY_FIELD_NUMBER: _ClassVar[int] - VALUE_FIELD_NUMBER: _ClassVar[int] - key: str - value: Value - def __init__( - self, key: _Optional[str] = ..., value: _Optional[_Union[Value, _Mapping]] = ... - ) -> None: ... - - class LabelEntry(_message.Message): - __slots__ = ["key", "value"] - KEY_FIELD_NUMBER: _ClassVar[int] - VALUE_FIELD_NUMBER: _ClassVar[int] - key: str - value: Value - def __init__( - self, key: _Optional[str] = ..., value: _Optional[_Union[Value, _Mapping]] = ... - ) -> None: ... - FEATURES_FIELD_NUMBER: _ClassVar[int] - LABEL_FIELD_NUMBER: _ClassVar[int] - UID_FIELD_NUMBER: _ClassVar[int] - METADATA_FIELD_NUMBER: _ClassVar[int] - CONFIGURATION_FIELD_NUMBER: _ClassVar[int] - features: _containers.MessageMap[str, Value] - label: _containers.MessageMap[str, Value] - uid: str - metadata: str - configuration: str - def __init__( - self, - features: _Optional[_Mapping[str, Value]] = ..., - label: _Optional[_Mapping[str, Value]] = ..., - uid: _Optional[str] = ..., - metadata: _Optional[str] = ..., - configuration: _Optional[str] = ..., - ) -> None: ... From 46effc98cb777f2cd036be28a1aaf61f853a3187 Mon Sep 17 00:00:00 2001 From: Clayton Parnell Date: Wed, 12 Jul 2023 08:58:49 -0700 Subject: [PATCH 9/9] Update lower bound to ensure compatibility. Loosen PyYAML --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 7a59b47f02..611e7b0520 100644 --- a/setup.py +++ b/setup.py @@ -52,14 +52,14 @@ def read_requirements(filename): "cloudpickle==2.2.1", "google-pasta", "numpy>=1.9.0,<2.0", - "protobuf>=3.1,<5.0", + "protobuf>=3.12,<5.0", "smdebug_rulesconfig==1.0.1", "importlib-metadata>=1.4.0,<7.0", "packaging>=20.0", "pandas", "pathos", "schema", - "PyYAML==6.0", + "PyYAML~=6.0", "jsonschema", "platformdirs", "tblib==1.7.0",