From 1f4cb32fae35f34c5f6cb143b41dfb43394b83d3 Mon Sep 17 00:00:00 2001 From: flynn Date: Sun, 30 Aug 2020 18:57:33 -0400 Subject: [PATCH 01/18] sklearn instrumentation --- docs-requirements.txt | 1 + docs/conf.py | 1 + docs/instrumentation/sklearn/sklearn.rst | 7 + .../CHANGELOG.md | 9 + .../LICENSE | 201 +++++++++++++++++ .../MANIFEST.in | 9 + .../README.rst | 23 ++ .../setup.cfg | 55 +++++ .../setup.py | 31 +++ .../instrumentation/sklearn/__init__.py | 204 ++++++++++++++++++ .../instrumentation/sklearn/version.py | 1 + .../tests/__init__.py | 0 .../tests/fixtures.py | 40 ++++ .../tests/test_sklearn.py | 65 ++++++ tox.ini | 7 + 15 files changed, 654 insertions(+) create mode 100644 docs/instrumentation/sklearn/sklearn.rst create mode 100644 instrumentation/opentelemetry-instrumentation-sklearn/CHANGELOG.md create mode 100644 instrumentation/opentelemetry-instrumentation-sklearn/LICENSE create mode 100644 instrumentation/opentelemetry-instrumentation-sklearn/MANIFEST.in create mode 100644 instrumentation/opentelemetry-instrumentation-sklearn/README.rst create mode 100644 instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg create mode 100644 instrumentation/opentelemetry-instrumentation-sklearn/setup.py create mode 100644 instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py create mode 100644 instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/version.py create mode 100644 instrumentation/opentelemetry-instrumentation-sklearn/tests/__init__.py create mode 100644 instrumentation/opentelemetry-instrumentation-sklearn/tests/fixtures.py create mode 100644 instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py diff --git a/docs-requirements.txt b/docs-requirements.txt index e98a0d35dfb..75e693576b8 100644 --- a/docs-requirements.txt +++ b/docs-requirements.txt @@ -30,3 +30,4 @@ boto~=2.0 botocore~=1.0 starlette~=0.13 fastapi~=0.58.1 +scikit-learn~=0.22.0 diff --git a/docs/conf.py b/docs/conf.py index 4b9753c96c4..d2e34a8341a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -115,6 +115,7 @@ "any", "opentelemetry.trace.propagation.httptextformat.HTTPTextFormat.inject", ), + ("py:class", "BaseEstimator"), ] # Add any paths that contain templates here, relative to this directory. diff --git a/docs/instrumentation/sklearn/sklearn.rst b/docs/instrumentation/sklearn/sklearn.rst new file mode 100644 index 00000000000..9d9a2fd1383 --- /dev/null +++ b/docs/instrumentation/sklearn/sklearn.rst @@ -0,0 +1,7 @@ +OpenTelemetry sklearn Instrumentation +===================================== + +.. automodule:: opentelemetry.instrumentation.sklearn + :members: + :undoc-members: + :show-inheritance: diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/CHANGELOG.md b/instrumentation/opentelemetry-instrumentation-sklearn/CHANGELOG.md new file mode 100644 index 00000000000..8b31361c752 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-sklearn/CHANGELOG.md @@ -0,0 +1,9 @@ +# Changelog + +## Unreleased + +## Version 0.12b0 + +Released 2020-xx-xx + +- Initial release diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/LICENSE b/instrumentation/opentelemetry-instrumentation-sklearn/LICENSE new file mode 100644 index 00000000000..261eeb9e9f8 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-sklearn/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/MANIFEST.in b/instrumentation/opentelemetry-instrumentation-sklearn/MANIFEST.in new file mode 100644 index 00000000000..aed3e33273b --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-sklearn/MANIFEST.in @@ -0,0 +1,9 @@ +graft src +graft tests +global-exclude *.pyc +global-exclude *.pyo +global-exclude __pycache__/* +include CHANGELOG.md +include MANIFEST.in +include README.rst +include LICENSE diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/README.rst b/instrumentation/opentelemetry-instrumentation-sklearn/README.rst new file mode 100644 index 00000000000..20679b10119 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-sklearn/README.rst @@ -0,0 +1,23 @@ +OpenTelemetry Scikit-Learn Instrumentation +========================================== + +|pypi| + +.. |pypi| image:: https://badge.fury.io/py/opentelemetry-instrumentation-sklearn.svg + :target: https://pypi.org/project/opentelemetry-instrumentation-sklearn/ + +This library allows tracing HTTP requests made by the +`scikit-learn `_ library. + +Installation +------------ + +:: + + pip install opentelemetry-instrumentation-sklearn + +References +---------- + +* `OpenTelemetry sklearn Instrumentation `_ +* `OpenTelemetry Project `_ diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg b/instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg new file mode 100644 index 00000000000..af97177ee7c --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg @@ -0,0 +1,55 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +[metadata] +name = opentelemetry-instrumentation-sklearn +description = OpenTelemetry sklearn instrumentation +long_description = file: README.rst +long_description_content_type = text/x-rst +author = OpenTelemetry Authors +author_email = cncf-opentelemetry-contributors@lists.cncf.io +url = https://github.com/open-telemetry/opentelemetry-python/tree/master/instrumentation/opentelemetry-instrumentation-sklearn +platforms = any +license = Apache-2.0 +classifiers = + Development Status :: 4 - Beta + Intended Audience :: Developers + License :: OSI Approved :: Apache Software License + Programming Language :: Python + Programming Language :: Python :: 3 + Programming Language :: Python :: 3.5 + Programming Language :: Python :: 3.6 + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + +[options] +python_requires = >=3.5 +package_dir= + =src +packages=find_namespace: +install_requires = + opentelemetry-api == 0.13dev0 + opentelemetry-instrumentation == 0.13dev0 + scikit-learn ~= 0.22.0 + +[options.extras_require] +test = + opentelemetry-test == 0.13dev0 + +[options.packages.find] +where = src + +[options.entry_points] +opentelemetry_instrumentor = + sklearn = opentelemetry.instrumentation.sklearn:SklearnInstrumentor diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/setup.py b/instrumentation/opentelemetry-instrumentation-sklearn/setup.py new file mode 100644 index 00000000000..92f53925e07 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-sklearn/setup.py @@ -0,0 +1,31 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +import setuptools + +BASE_DIR = os.path.dirname(__file__) +VERSION_FILENAME = os.path.join( + BASE_DIR, + "src", + "opentelemetry", + "instrumentation", + "sklearn", + "version.py", +) +PACKAGE_INFO = {} +with open(VERSION_FILENAME) as f: + exec(f.read(), PACKAGE_INFO) + +setuptools.setup(version=PACKAGE_INFO["__version__"]) diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py new file mode 100644 index 00000000000..36e7b7c9aa3 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py @@ -0,0 +1,204 @@ +import logging +from functools import wraps +from typing import Callable, Dict, List, Sequence, Type + +from sklearn.base import BaseEstimator +from sklearn.pipeline import FeatureUnion, Pipeline + +from opentelemetry.instrumentation.sklearn.version import __version__ +from opentelemetry.trace import get_tracer + +logger = logging.getLogger(__name__) + + +def implement_spans(func: Callable, estimator_name: str): + """Wrap the method call with a span. + + Args: + func: A callable to be wrapped in a span + estimator_name: The name of the estimator class. We pass estimator name + here because there are some wrapped methods in Pipeline that don't + have ``__self__`` to access the class name. + + Returns: + The passed function wrapped in a span. + """ + logger.debug("Instrumenting: %s.%s", estimator_name, func.__name__) + + @wraps(func) + def wrapper(*args, **kwargs): + with get_tracer(__name__, __version__).start_as_current_span( + name="{cls}.{func}".format(cls=estimator_name, func=func.__name__) + ): + return func(*args, **kwargs) + + return wrapper + + +# Methods on which spans should be applied. +DEFAULT_METHODS = ["fit", "transform", "predict", "predict_proba"] + +# Classes and their attributes which contain a list of tupled estimators +# through which we should walk recursively for estimators. +DEFAULT_NAMEDTUPLE_ATTRIBS = { + Pipeline: ["steps"], + FeatureUnion: ["transformer_list"], +} + +# Classes and their attributes which contain an estimator or sequence of +# estimators through which we should walk recursively for estimators. +DEFAULT_ATTRIBS = {} + + +class SklearnInstrumentor: + """Instrument a fitted sklearn model with opentelemetry spans. + + Instrument methods of ``BaseEstimator``-derived components in a sklearn + model. The assumption is that a machine learning model ``Pipeline`` (or + class descendent) is being instrumented with opentelemetry. Within a + ``Pipeline`` is some hierarchy of estimators and transformers. + + The ``instrument_estimator`` method walks this hierarchy of estimators, + implementing each of the defined methods with its own span. + + Certain estimators in the sklearn ecosystem contain other estimators as + instance attributes. Support for walking this embedded sub-hierarchy is + supported with ``recurse_attribs``. This argument is a dictionary + with classes as keys, and a list of attributes representing embedded + estimators. By default, ``recurse_attribs`` is empty. + + Similar to Pipelines, there are also estimators which have class attributes + as a list of 2-tuples; for instance, the ``FeatureUnion`` and its attribute + ``transformer_list``. Instrumenting estimators like this is also + supported through the ``recurse_namedtuple_attribs`` argument. This + argument is a dictionary with classes as keys, and a list of attribute + names representing the namedtuple list(s). By default, the + ``recurse_namedtuple_attribs`` dictionary supports + ``Pipeline`` with ``steps``, and ``FeatureUnion`` with + ``transformer_list``. + + Note that spans will not be generated for any child transformer whose + parent transformer has ``n_jobs`` parameter set to anything besides + ``None`` or ``1``. + + Example: + + .. code-block:: python + + from opentelemetry.instrumentation.sklearn import SklearnInstrumentor + from sklearn.datasets import load_iris + from sklearn.ensemble import RandomForestClassifier + from sklearn.model_selection import train_test_split + from sklearn.pipeline import Pipeline + + X, y = load_iris(return_X_y=True) + X_train, X_test, y_train, y_test = train_test_split(X, y) + + model = Pipeline( + [ + ("class", RandomForestClassifier(n_estimators=10)), + ] + ) + + model.fit(X_train, y_train) + + SklearnInstrumentor().instrument_estimator(model) + + Args: + methods (list): A list of method names on which to instrument a span. + This list of methods will be checked on all estimators in the model + hierarchy. + recurse_attribs (dict): A dictionary of ``BaseEstimator``-derived + sklearn classes as keys, with values being a list of attributes. Each + attribute represents either an estimator or list of estimators on + which to also implement spans. An example is + ``RandomForestClassifier`` and its attribute ``estimators_`` + recurse_namedtuple_attribs (dict): A dictionary of ``BaseEstimator``- + derived sklearn types as keys, with values being a list of + attribute names. Each attribute represents a list of 2-tuples in + which the first element is the estimator name, and the second + element is the estimator. Defaults include sklearn's ``Pipeline`` + and its attribute ``steps``, and the ``FeatureUnion`` and its + attribute ``transformer_list``. + """ + + def __init__( + self, + methods: List[str] = None, + recurse_attribs: Dict[Type[BaseEstimator], List[str]] = None, + recurse_namedtuple_attribs: Dict[ + Type[BaseEstimator], List[str] + ] = None, + ): + self.methods = methods or DEFAULT_METHODS + self.recurse_attribs = recurse_attribs or DEFAULT_ATTRIBS + self.recurse_namedtuple_attribs = ( + recurse_namedtuple_attribs or DEFAULT_NAMEDTUPLE_ATTRIBS + ) + + def instrument_estimator(self, estimator: BaseEstimator): + """Instrument a fitted estimator and its hierarchy where configured. + + Args: + estimator (BaseEstimator): A fitted ``sklearn`` estimator, + typically a ``Pipeline`` instance. + """ + if isinstance( + estimator, tuple(self.recurse_namedtuple_attribs.keys()) + ): + self._instrument_estimator_namedtuple(estimator=estimator) + + if isinstance(estimator, tuple(self.recurse_attribs.keys())): + self._instrument_estimator_attribute(estimator=estimator) + + for method_name in self.methods: + if hasattr(estimator, method_name): + setattr( + estimator, + method_name, + implement_spans( + getattr(estimator, method_name), + estimator.__class__.__name__, + ), + ) + + def _instrument_estimator_attribute(self, estimator: BaseEstimator): + """Instrument instance attributes which also contain estimators. + + Examples include ``RandomForestClassifier`` and + ``MultiOutputRegressor`` instances which have attributes + ``estimators_`` attributes. + + Args: + estimator (BaseEstimator): A fitted ``sklearn`` estimator, with an + attribute which also contains an estimator or collection of + estimators. + """ + for klass, attribs in self.recurse_attribs.items(): + if isinstance(estimator, klass): + for attrib in attribs: + attrib_value = getattr(estimator, attrib) + if isinstance(attrib_value, Sequence): + for value in attrib_value: + self.instrument_estimator(estimator=value) + else: + self.instrument_estimator(estimator=attrib_value,) + break + + def _instrument_estimator_namedtuple(self, estimator: BaseEstimator): + """Instrument attributes with (name, estimator) tupled components. + + Examples include Pipeline and FeatureUnion instances which + have attributes steps and transformer_list, respectively. + + Args: + estimator: A fitted sklearn estimator, with an attribute which also + contains an estimator or collection of estimators. + """ + for klass, attribs in self.recurse_namedtuple_attribs.items(): + if isinstance(estimator, klass): + for attrib in attribs: + if hasattr(estimator, attrib): + for _, est in getattr(estimator, attrib): + self.instrument_estimator(estimator=est) + break diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/version.py b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/version.py new file mode 100644 index 00000000000..70efafb1a08 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/version.py @@ -0,0 +1 @@ +__version__ = "0.13dev0" diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/tests/__init__.py b/instrumentation/opentelemetry-instrumentation-sklearn/tests/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/tests/fixtures.py b/instrumentation/opentelemetry-instrumentation-sklearn/tests/fixtures.py new file mode 100644 index 00000000000..4f9b8aa47ed --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-sklearn/tests/fixtures.py @@ -0,0 +1,40 @@ +import numpy as np +from sklearn.datasets import load_iris +from sklearn.decomposition import PCA, TruncatedSVD +from sklearn.ensemble import RandomForestClassifier +from sklearn.model_selection import train_test_split +from sklearn.pipeline import FeatureUnion, Pipeline +from sklearn.preprocessing import Normalizer, StandardScaler + +X, y = load_iris(return_X_y=True) +X_train, X_test, y_train, y_test = train_test_split(X, y) + + +def pipeline(): + """A dummy model that has a bunch of components that we can test.""" + model = Pipeline( + [ + ("scaler", StandardScaler()), + ("normal", Normalizer()), + ( + "union", + FeatureUnion( + [ + ("pca", PCA(n_components=1)), + ("svd", TruncatedSVD(n_components=2)), + ], + n_jobs=1, # parallelized components won't generate spans + ), + ), + ("class", RandomForestClassifier(n_estimators=10)), + ] + ) + model.fit(X_train, y_train) + return model + + +def random_input(): + """A random record from the feature set.""" + rows = X.shape[0] + random_row = np.random.choice(rows, size=1) + return X[random_row, :] diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py b/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py new file mode 100644 index 00000000000..9614504eb28 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py @@ -0,0 +1,65 @@ +from sklearn.ensemble import RandomForestClassifier + +from opentelemetry.instrumentation.sklearn import SklearnInstrumentor +from opentelemetry.test.test_base import TestBase +from opentelemetry.trace import SpanKind + +from .fixtures import pipeline, random_input + + +class TestSklearn(TestBase): + def test_span_properties(self): + """Test that we get all of the spans we expect.""" + model = pipeline() + SklearnInstrumentor().instrument_estimator(estimator=model) + + x_test = random_input() + model.predict(x_test) + + spans = self.memory_exporter.get_finished_spans() + self.assertEqual(len(spans), 8) + span = spans[0] + self.assertEqual(span.name, "StandardScaler.transform") + self.assertEqual(span.kind, SpanKind.INTERNAL) + self.assertEqual(span.parent.span_id, spans[-1].context.span_id) + span = spans[1] + self.assertEqual(span.name, "Normalizer.transform") + self.assertEqual(span.kind, SpanKind.INTERNAL) + self.assertEqual(span.parent.span_id, spans[-1].context.span_id) + span = spans[2] + self.assertEqual(span.name, "PCA.transform") + self.assertEqual(span.kind, SpanKind.INTERNAL) + self.assertEqual(span.parent.span_id, spans[4].context.span_id) + span = spans[3] + self.assertEqual(span.name, "TruncatedSVD.transform") + self.assertEqual(span.kind, SpanKind.INTERNAL) + self.assertEqual(span.parent.span_id, spans[4].context.span_id) + span = spans[4] + self.assertEqual(span.name, "FeatureUnion.transform") + self.assertEqual(span.kind, SpanKind.INTERNAL) + self.assertEqual(span.parent.span_id, spans[-1].context.span_id) + span = spans[5] + self.assertEqual(span.name, "RandomForestClassifier.predict_proba") + self.assertEqual(span.kind, SpanKind.INTERNAL) + self.assertEqual(span.parent.span_id, spans[6].context.span_id) + span = spans[6] + self.assertEqual(span.name, "RandomForestClassifier.predict") + self.assertEqual(span.kind, SpanKind.INTERNAL) + self.assertEqual(span.parent.span_id, spans[-1].context.span_id) + span = spans[7] + self.assertEqual(span.name, "Pipeline.predict") + self.assertEqual(span.kind, SpanKind.INTERNAL) + + def test_attrib_config(self): + """Test that the attribute config makes spans on the decision trees.""" + model = pipeline() + attrib_config = {RandomForestClassifier: ["estimators_"]} + SklearnInstrumentor( + recurse_attribs=attrib_config + ).instrument_estimator(estimator=model) + + x_test = random_input() + model.predict(x_test) + + spans = self.memory_exporter.get_finished_spans() + self.assertEqual(len(spans), 8 + model.steps[-1][-1].n_estimators) diff --git a/tox.ini b/tox.ini index 91024f7f3d4..ad03bb6f815 100644 --- a/tox.ini +++ b/tox.ini @@ -156,6 +156,10 @@ envlist = py3{4,5,6,7,8}-test-instrumentation-redis pypy3-test-instrumentation-redis + ; opentelemetry-instrumentation-sklearn + py3{5,6,7,8}-test-instrumentation-sklearn + ; ext-sklearn intentionally excluded from pypy3 + ; opentelemetry-instrumentation-celery py3{5,6,7,8}-test-instrumentation-celery pypy3-test-instrumentation-celery @@ -220,6 +224,7 @@ changedir = test-instrumentation-pyramid: instrumentation/opentelemetry-instrumentation-pyramid/tests test-instrumentation-redis: instrumentation/opentelemetry-instrumentation-redis/tests test-instrumentation-requests: instrumentation/opentelemetry-instrumentation-requests/tests + test-instrumentation-sklearn: instrumentation/opentelemetry-instrumentation-sklearn/tests test-instrumentation-sqlalchemy: instrumentation/opentelemetry-instrumentation-sqlalchemy/tests test-instrumentation-sqlite3: instrumentation/opentelemetry-instrumentation-sqlite3/tests test-instrumentation-starlette: instrumentation/opentelemetry-instrumentation-starlette/tests @@ -294,6 +299,8 @@ commands_pre = requests: pip install {toxinidir}/instrumentation/opentelemetry-instrumentation-requests[test] + sklearn: pip install {toxinidir}/instrumentation/opentelemetry-instrumentation-sklearn[test] + starlette: pip install {toxinidir}/instrumentation/opentelemetry-instrumentation-starlette[test] jinja2: pip install {toxinidir}/instrumentation/opentelemetry-instrumentation-jinja2[test] From 5302647288c9a8dcb23be8ac4de862ee7809e64b Mon Sep 17 00:00:00 2001 From: flynn Date: Tue, 22 Sep 2020 21:13:27 -0400 Subject: [PATCH 02/18] more flexible instrumentation --- .../instrumentation/sklearn/__init__.py | 103 ++++++++++++++---- 1 file changed, 79 insertions(+), 24 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py index 36e7b7c9aa3..9fe3450fcda 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py @@ -1,6 +1,6 @@ import logging from functools import wraps -from typing import Callable, Dict, List, Sequence, Type +from typing import Callable, Dict, List, MutableMapping, Sequence, Type from sklearn.base import BaseEstimator from sklearn.pipeline import FeatureUnion, Pipeline @@ -65,7 +65,7 @@ class descendent) is being instrumented with opentelemetry. Within a instance attributes. Support for walking this embedded sub-hierarchy is supported with ``recurse_attribs``. This argument is a dictionary with classes as keys, and a list of attributes representing embedded - estimators. By default, ``recurse_attribs`` is empty. + estimators as values. By default, ``recurse_attribs`` is empty. Similar to Pipelines, there are also estimators which have class attributes as a list of 2-tuples; for instance, the ``FeatureUnion`` and its attribute @@ -120,6 +120,8 @@ class descendent) is being instrumented with opentelemetry. Within a element is the estimator. Defaults include sklearn's ``Pipeline`` and its attribute ``steps``, and the ``FeatureUnion`` and its attribute ``transformer_list``. + spanner: A function with signature (func, str) which + decorates instance methods with opentelemetry spans. """ def __init__( @@ -129,12 +131,14 @@ def __init__( recurse_namedtuple_attribs: Dict[ Type[BaseEstimator], List[str] ] = None, + spanner: Callable = implement_spans, ): self.methods = methods or DEFAULT_METHODS self.recurse_attribs = recurse_attribs or DEFAULT_ATTRIBS self.recurse_namedtuple_attribs = ( recurse_namedtuple_attribs or DEFAULT_NAMEDTUPLE_ATTRIBS ) + self.spanner = spanner def instrument_estimator(self, estimator: BaseEstimator): """Instrument a fitted estimator and its hierarchy where configured. @@ -152,19 +156,72 @@ def instrument_estimator(self, estimator: BaseEstimator): self._instrument_estimator_attribute(estimator=estimator) for method_name in self.methods: - if hasattr(estimator, method_name): + self._instrument_estimator_method( + estimator=estimator, method_name=method_name + ) + + def _instrument_estimator_method( + self, estimator: BaseEstimator, method_name: str + ): + """Instrument an estimator method with a span. + + Args: + estimator (BaseEstimator): A fitted ``sklearn`` estimator, with an + attribute which also contains an estimator or collection of + estimators. + method_name (str): The method name of the estimator on which to + apply a span. + """ + if hasattr(estimator, method_name): + class_attr = getattr(type(estimator), method_name, None) + # handle attributes which are properties + if isinstance(class_attr, property): + # grab the returned value of the property + attrib = getattr(estimator, method_name) + # for properties, we get the actual instance ownership + # and patch the returned attribute on the instance + is_callable = isinstance(attrib, Callable) + if is_callable and "__self__" in dir(attrib): + instance = getattr(attrib, "__self__") + setattr( + instance, + attrib.__name__, + self.spanner(attrib, estimator.__class__.__name__), + ) + # sometimes functions are wrapped, + # so we unwrap them here and recurse + elif is_callable and "__wrapped__" in dir(attrib): + instance = getattr(attrib, "__wrapped__") + self._instrument_estimator_method( + estimator=instance, method_name=method_name + ) + else: + logger.debug( + "Unable to instrument: %s.%s", + estimator.__class__.__name__, + method_name, + ) + else: + method = getattr(estimator, method_name) setattr( estimator, method_name, - implement_spans( - getattr(estimator, method_name), - estimator.__class__.__name__, - ), + self.spanner(method, estimator.__class__.__name__), ) + else: + logger.debug( + "Unable to instrument (method not found): %s.%s", + estimator.__class__.__name__, + method_name, + ) def _instrument_estimator_attribute(self, estimator: BaseEstimator): """Instrument instance attributes which also contain estimators. + Handle instance attributes which are also estimators, are a list + (Sequence) of estimators, or are mappings (dictionary) in which + the values are estimators. + Examples include ``RandomForestClassifier`` and ``MultiOutputRegressor`` instances which have attributes ``estimators_`` attributes. @@ -174,16 +231,17 @@ def _instrument_estimator_attribute(self, estimator: BaseEstimator): attribute which also contains an estimator or collection of estimators. """ - for klass, attribs in self.recurse_attribs.items(): - if isinstance(estimator, klass): - for attrib in attribs: - attrib_value = getattr(estimator, attrib) - if isinstance(attrib_value, Sequence): - for value in attrib_value: - self.instrument_estimator(estimator=value) - else: - self.instrument_estimator(estimator=attrib_value,) - break + attribs = self.recurse_attribs.get(estimator.__class__, []) + for attrib in attribs: + attrib_value = getattr(estimator, attrib) + if isinstance(attrib_value, Sequence): + for value in attrib_value: + self.instrument_estimator(estimator=value) + elif isinstance(attrib_value, MutableMapping): + for value in attrib_value.values(): + self.instrument_estimator(estimator=value) + else: + self.instrument_estimator(estimator=attrib_value) def _instrument_estimator_namedtuple(self, estimator: BaseEstimator): """Instrument attributes with (name, estimator) tupled components. @@ -195,10 +253,7 @@ def _instrument_estimator_namedtuple(self, estimator: BaseEstimator): estimator: A fitted sklearn estimator, with an attribute which also contains an estimator or collection of estimators. """ - for klass, attribs in self.recurse_namedtuple_attribs.items(): - if isinstance(estimator, klass): - for attrib in attribs: - if hasattr(estimator, attrib): - for _, est in getattr(estimator, attrib): - self.instrument_estimator(estimator=est) - break + attribs = self.recurse_namedtuple_attribs.get(estimator.__class__, []) + for attrib in attribs: + for _, est in getattr(estimator, attrib): + self.instrument_estimator(estimator=est) From 8fb526a935735b0a879a26b5090604f779091caa Mon Sep 17 00:00:00 2001 From: flynn Date: Tue, 22 Sep 2020 21:41:24 -0400 Subject: [PATCH 03/18] update deps --- .../opentelemetry-instrumentation-sklearn/setup.cfg | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg b/instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg index af97177ee7c..e95c72d0c5d 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg +++ b/instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg @@ -39,13 +39,13 @@ package_dir= =src packages=find_namespace: install_requires = - opentelemetry-api == 0.13dev0 - opentelemetry-instrumentation == 0.13dev0 + opentelemetry-api == 0.14.dev0 + opentelemetry-instrumentation == 0.14.dev0 scikit-learn ~= 0.22.0 [options.extras_require] test = - opentelemetry-test == 0.13dev0 + opentelemetry-test == 0.14.dev0 [options.packages.find] where = src From 32af450b918f3cc5b040c022f18c163f8798f56d Mon Sep 17 00:00:00 2001 From: flynn Date: Tue, 22 Sep 2020 21:44:02 -0400 Subject: [PATCH 04/18] update package ver --- .../src/opentelemetry/instrumentation/sklearn/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/version.py b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/version.py index 70efafb1a08..eef24b9b14f 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/version.py +++ b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/version.py @@ -1 +1 @@ -__version__ = "0.13dev0" +__version__ = "0.14.dev0" From b6461fd451133d7c445bcc9f05d0efee6d4154b5 Mon Sep 17 00:00:00 2001 From: flynn Date: Tue, 22 Sep 2020 21:49:09 -0400 Subject: [PATCH 05/18] fix docstring --- .../src/opentelemetry/instrumentation/sklearn/__init__.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py index 9fe3450fcda..b9d847f9ed4 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py @@ -166,9 +166,7 @@ def _instrument_estimator_method( """Instrument an estimator method with a span. Args: - estimator (BaseEstimator): A fitted ``sklearn`` estimator, with an - attribute which also contains an estimator or collection of - estimators. + estimator (BaseEstimator): A fitted ``sklearn`` estimator. method_name (str): The method name of the estimator on which to apply a span. """ From 33d10816470ab9e5b08096909f3c22d0a3419ac0 Mon Sep 17 00:00:00 2001 From: flynn Date: Wed, 23 Sep 2020 20:22:34 -0400 Subject: [PATCH 06/18] remove changelog entry --- .../opentelemetry-instrumentation-sklearn/CHANGELOG.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/CHANGELOG.md b/instrumentation/opentelemetry-instrumentation-sklearn/CHANGELOG.md index 8b31361c752..0ef6e1246c4 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/CHANGELOG.md +++ b/instrumentation/opentelemetry-instrumentation-sklearn/CHANGELOG.md @@ -2,8 +2,6 @@ ## Unreleased -## Version 0.12b0 -Released 2020-xx-xx - Initial release From 63bd6cd19d26c2d0ec8f53eb37c2e8c417d93807 Mon Sep 17 00:00:00 2001 From: flynn Date: Sun, 27 Sep 2020 23:55:09 -0400 Subject: [PATCH 07/18] update for autoinstrumentation and uninstrumentation --- .../instrumentation/sklearn/__init__.py | 524 ++++++++++++++++-- .../tests/test_sklearn.py | 71 ++- 2 files changed, 539 insertions(+), 56 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py index b9d847f9ed4..8d3b2fedfff 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py @@ -1,42 +1,152 @@ +# Copyright 2020, OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +The integration with sklearn supports the scikit-learn compatible libraries, +it can be enabled by using ``SklearnInstrumentor``. + +.. sklearn: https://github.com/scikit-learn/scikit-learn + +Usage +----- + +Package instrumentation example: + +.. code-block:: python + + from opentelemetry.instrumentation.sklearn import SklearnInstrumentor + + # instrument the sklearn library + SklearnInstrumentor().instrument() + + # instrument sklearn and other libraries + SklearnInstrumentor(packages=["lightgbm", "xgboost"]).instrument() + + +Model intrumentation example: + +.. code-block:: python + + from opentelemetry.instrumentation.sklearn import SklearnInstrumentor + from sklearn.datasets import load_iris + from sklearn.ensemble import RandomForestClassifier + from sklearn.model_selection import train_test_split + from sklearn.pipeline import Pipeline + + X, y = load_iris(return_X_y=True) + X_train, X_test, y_train, y_test = train_test_split(X, y) + + model = Pipeline( + [ + ("class", RandomForestClassifier(n_estimators=10)), + ] + ) + + model.fit(X_train, y_train) + + SklearnInstrumentor().instrument_estimator(model) + +""" import logging +import os from functools import wraps -from typing import Callable, Dict, List, MutableMapping, Sequence, Type +from importlib import import_module +from inspect import isclass +from pkgutil import iter_modules +from typing import Callable, Dict, List, MutableMapping, Sequence, Type, Union from sklearn.base import BaseEstimator from sklearn.pipeline import FeatureUnion, Pipeline +from sklearn.tree import BaseDecisionTree +from opentelemetry.instrumentation.instrumentor import BaseInstrumentor from opentelemetry.instrumentation.sklearn.version import __version__ from opentelemetry.trace import get_tracer logger = logging.getLogger(__name__) -def implement_spans(func: Callable, estimator_name: str): +def implement_spans( + func: Callable, estimator: Union[BaseEstimator, Type[BaseEstimator]] +): """Wrap the method call with a span. Args: func: A callable to be wrapped in a span - estimator_name: The name of the estimator class. We pass estimator name - here because there are some wrapped methods in Pipeline that don't - have ``__self__`` to access the class name. + estimator: An instance or class of an estimator. Returns: The passed function wrapped in a span. """ - logger.debug("Instrumenting: %s.%s", estimator_name, func.__name__) + if isclass(estimator): + name = estimator.__name__ + else: + name = estimator.__class__.__name__ + logger.debug("Instrumenting: %s.%s", name, func.__name__) @wraps(func) def wrapper(*args, **kwargs): with get_tracer(__name__, __version__).start_as_current_span( - name="{cls}.{func}".format(cls=estimator_name, func=func.__name__) + name="{cls}.{func}".format(cls=name, func=func.__name__) ): return func(*args, **kwargs) return wrapper +def get_base_estimators(packages: List[str]) -> Dict[str, Type[BaseEstimator]]: + """Walk package hierarchies to get BaseEstimator-derived classes. + + Args: + packages (List[str]): A list of package names to instrument. + + Returns: + A dictionary of qualnames and classes inheriting from + ``BaseEstimator``. + """ + klasses = dict() + for package_name in packages: + lib = import_module(package_name) + package_dir = os.path.dirname(lib.__file__) + for (_, module_name, _) in iter_modules([package_dir]): + # import the module and iterate through its attributes + try: + module = import_module(f"{package_name}.{module_name}") + except ImportError: + logger.warning( + "Unable to import %s.%s", package_name, module_name + ) + continue + for attribute_name in dir(module): + attrib = getattr(module, attribute_name) + if isclass(attrib) and issubclass(attrib, BaseEstimator): + klasses[ + f"{package_name}.{module_name}.{attribute_name}" + ] = attrib + return klasses + + # Methods on which spans should be applied. -DEFAULT_METHODS = ["fit", "transform", "predict", "predict_proba"] +DEFAULT_METHODS = [ + "fit", + "transform", + "predict", + "predict_proba", + "_fit", + "_transform", + "_predict", + "_predict_proba", +] # Classes and their attributes which contain a list of tupled estimators # through which we should walk recursively for estimators. @@ -49,8 +159,14 @@ def wrapper(*args, **kwargs): # estimators through which we should walk recursively for estimators. DEFAULT_ATTRIBS = {} +# Classes (including children) explicitly excluded from autoinstrumentation +DEFAULT_EXCLUDE_CLASSES = [BaseDecisionTree] + +# Default packages for autoinstrumentation +DEFAULT_PACKAGES = ["sklearn"] -class SklearnInstrumentor: + +class SklearnInstrumentor(BaseInstrumentor): """Instrument a fitted sklearn model with opentelemetry spans. Instrument methods of ``BaseEstimator``-derived components in a sklearn @@ -81,7 +197,21 @@ class descendent) is being instrumented with opentelemetry. Within a parent transformer has ``n_jobs`` parameter set to anything besides ``None`` or ``1``. - Example: + Package instrumentation example: + + .. code-block:: python + + from opentelemetry.instrumentation.sklearn import SklearnInstrumentor + + # instrument the sklearn library + SklearnInstrumentor().instrument() + + # instrument additional libraries + packages = ["sklearn", "lightgbm", "xgboost"] + SklearnInstrumentor(packages=packages).instrument() + + + Model intrumentation example: .. code-block:: python @@ -107,23 +237,41 @@ class descendent) is being instrumented with opentelemetry. Within a Args: methods (list): A list of method names on which to instrument a span. This list of methods will be checked on all estimators in the model - hierarchy. + hierarchy. Used in package and model instrumentation recurse_attribs (dict): A dictionary of ``BaseEstimator``-derived sklearn classes as keys, with values being a list of attributes. Each attribute represents either an estimator or list of estimators on which to also implement spans. An example is - ``RandomForestClassifier`` and its attribute ``estimators_`` + ``RandomForestClassifier`` and its attribute ``estimators_``. Used + in model instrumentation only. recurse_namedtuple_attribs (dict): A dictionary of ``BaseEstimator``- derived sklearn types as keys, with values being a list of attribute names. Each attribute represents a list of 2-tuples in which the first element is the estimator name, and the second element is the estimator. Defaults include sklearn's ``Pipeline`` and its attribute ``steps``, and the ``FeatureUnion`` and its - attribute ``transformer_list``. + attribute ``transformer_list``. Used in model instrumentation only. spanner: A function with signature (func, str) which decorates instance methods with opentelemetry spans. + packages: A list of additional sklearn-compatible packages to + instrument. Used with package instrumentation only. + exclude_classes: A list of classes to exclude from instrumentation. + Child classes are also excluded. Default is sklearn's + ``[BaseDecisionTree]``. """ + def __new__(cls, *args, **kwargs): + """Override new. + + The base class' new method passes args and kwargs. We override because + we init the class with configuration and Python raises TypeError when + additional arguments are passed to the object.__new__() method. + """ + if cls._instance is None: + cls._instance = object.__new__(cls) + + return cls._instance + def __init__( self, methods: List[str] = None, @@ -132,6 +280,8 @@ def __init__( Type[BaseEstimator], List[str] ] = None, spanner: Callable = implement_spans, + packages: List[str] = None, + exclude_classes: List[Type] = None, ): self.methods = methods or DEFAULT_METHODS self.recurse_attribs = recurse_attribs or DEFAULT_ATTRIBS @@ -139,6 +289,36 @@ def __init__( recurse_namedtuple_attribs or DEFAULT_NAMEDTUPLE_ATTRIBS ) self.spanner = spanner + self.packages = packages or DEFAULT_PACKAGES + if exclude_classes is None: + self.exclude_classes = tuple(DEFAULT_EXCLUDE_CLASSES) + else: + self.exclude_classes = tuple(exclude_classes) + + def _instrument(self, **kwargs): + """Instrument the library, and any additional specified on init.""" + klasses = get_base_estimators(packages=self.packages) + for _, klass in klasses.items(): + if issubclass(klass, self.exclude_classes): + logger.debug("Not instrumenting (excluded): %s", str(klass)) + else: + logger.debug("Instrumenting: %s", str(klass)) + for method_name in self.methods: + if hasattr(klass, method_name): + self._instrument_class_method( + estimator=klass, method_name=method_name + ) + + def _uninstrument(self, **kwargs): + """Uninstrument the library""" + klasses = get_base_estimators(packages=self.packages) + for _, klass in klasses.items(): + logger.debug("Uninstrumenting: %s", str(klass)) + for method_name in self.methods: + if hasattr(klass, method_name): + self._uninstrument_class_method( + estimator=klass, method_name=method_name + ) def instrument_estimator(self, estimator: BaseEstimator): """Instrument a fitted estimator and its hierarchy where configured. @@ -147,6 +327,13 @@ def instrument_estimator(self, estimator: BaseEstimator): estimator (BaseEstimator): A fitted ``sklearn`` estimator, typically a ``Pipeline`` instance. """ + if isinstance(estimator, self.exclude_classes): + logger.debug( + "Not instrumenting (excluded): %s", + estimator.__class__.__name__, + ) + return + if isinstance( estimator, tuple(self.recurse_namedtuple_attribs.keys()) ): @@ -156,62 +343,254 @@ def instrument_estimator(self, estimator: BaseEstimator): self._instrument_estimator_attribute(estimator=estimator) for method_name in self.methods: - self._instrument_estimator_method( - estimator=estimator, method_name=method_name + if hasattr(estimator, method_name): + self._instrument_instance_method( + estimator=estimator, method_name=method_name + ) + + def uninstrument_estimator(self, estimator: BaseEstimator): + """Uninstrument a fitted estimator and its hierarchy where configured. + + Args: + estimator (BaseEstimator): A fitted ``sklearn`` estimator, + typically a ``Pipeline`` instance. + """ + if isinstance(estimator, self.exclude_classes): + logger.debug( + "Not uninstrumenting (excluded): %s", + estimator.__class__.__name__, + ) + return + + if isinstance( + estimator, tuple(self.recurse_namedtuple_attribs.keys()) + ): + self._uninstrument_estimator_namedtuple(estimator=estimator) + + if isinstance(estimator, tuple(self.recurse_attribs.keys())): + self._uninstrument_estimator_attribute(estimator=estimator) + + for method_name in self.methods: + if hasattr(estimator, method_name): + self._uninstrument_instance_method( + estimator=estimator, method_name=method_name + ) + + def _check_instrumented( + self, + estimator: Union[BaseEstimator, Type[BaseEstimator]], + method_name: str, + ) -> bool: + """Check an estimator-method is instrumented. + + Args: + estimator (BaseEstimator): A class or instance of an ``sklearn`` + estimator. + method_name (str): The method name of the estimator on which to + check for instrumentation. + """ + orig_method_name = "_original_" + method_name + has_original = hasattr(estimator, orig_method_name) + orig_class, orig_method = getattr( + estimator, orig_method_name, (None, None) + ) + same_class = orig_class == estimator + if has_original and same_class: + class_method = self._unwrap_function( + getattr(estimator, method_name) + ) + # if they match then the subclass doesn't override + # if they don't then the overridden method needs instrumentation + if class_method.__name__ == orig_method.__name__: + return True + return False + + def _uninstrument_class_method( + self, estimator: Type[BaseEstimator], method_name: str + ): + """Uninstrument a class method. + + Replaces the patched method with the original, and deletes the + attribute which stored the original method. + + Args: + estimator (BaseEstimator): A class or instance of an ``sklearn`` + estimator. + method_name (str): The method name of the estimator on which to + apply a span. + """ + orig_method_name = "_original_" + method_name + if isclass(estimator): + qualname = estimator.__qualname__ + else: + qualname = estimator.__class__.__qualname__ + if self._check_instrumented(estimator, method_name): + logger.debug( + "Uninstrumenting: %s.%s", qualname, method_name, + ) + _, orig_method = getattr(estimator, orig_method_name) + wrapper = self._function_wrapper_wrapper(orig_method) + if wrapper is not None: + setattr( + wrapper, "__wrapped__", orig_method, + ) + else: + setattr( + estimator, method_name, orig_method, + ) + delattr(estimator, orig_method_name) + else: + logger.debug( + "Already uninstrumented: %s.%s", qualname, method_name, ) - def _instrument_estimator_method( + def _uninstrument_instance_method( self, estimator: BaseEstimator, method_name: str + ): + """Uninstrument an instance method. + + Replaces the patched method with the original, and deletes the + attribute which stored the original method. + + Args: + estimator (BaseEstimator): A class or instance of an ``sklearn`` + estimator. + method_name (str): The method name of the estimator on which to + apply a span. + """ + orig_method_name = "_original_" + method_name + if isclass(estimator): + qualname = estimator.__qualname__ + else: + qualname = estimator.__class__.__qualname__ + if self._check_instrumented(estimator, method_name): + logger.debug( + "Uninstrumenting: %s.%s", qualname, method_name, + ) + _, orig_method = getattr(estimator, orig_method_name) + setattr( + estimator, method_name, orig_method, + ) + delattr(estimator, orig_method_name) + else: + logger.debug( + "Already uninstrumented: %s.%s", qualname, method_name, + ) + + def _instrument_class_method( + self, estimator: Type[BaseEstimator], method_name: str ): """Instrument an estimator method with a span. + When instrumenting we attach a tuple of (Class, method) to the + attribute ``_original_`` for each method. This allows + us to replace the patched with the original in unstrumentation, but + also allows proper instrumentation of child classes without + instrumenting inherited methods twice. + Args: - estimator (BaseEstimator): A fitted ``sklearn`` estimator. + estimator (BaseEstimator): A ``BaseEstimator``-derived + class method_name (str): The method name of the estimator on which to apply a span. """ - if hasattr(estimator, method_name): - class_attr = getattr(type(estimator), method_name, None) - # handle attributes which are properties - if isinstance(class_attr, property): - # grab the returned value of the property - attrib = getattr(estimator, method_name) - # for properties, we get the actual instance ownership - # and patch the returned attribute on the instance - is_callable = isinstance(attrib, Callable) - if is_callable and "__self__" in dir(attrib): - instance = getattr(attrib, "__self__") - setattr( - instance, - attrib.__name__, - self.spanner(attrib, estimator.__class__.__name__), - ) - # sometimes functions are wrapped, - # so we unwrap them here and recurse - elif is_callable and "__wrapped__" in dir(attrib): - instance = getattr(attrib, "__wrapped__") - self._instrument_estimator_method( - estimator=instance, method_name=method_name - ) - else: - logger.debug( - "Unable to instrument: %s.%s", - estimator.__class__.__name__, - method_name, - ) + if self._check_instrumented(estimator, method_name): + logger.debug( + "Already instrumented: %s.%s", + estimator.__qualname__, + method_name, + ) + return + class_attr = getattr(estimator, method_name) + if isinstance(class_attr, property): + logger.debug( + "Not instrumenting found property: %s.%s", + estimator.__qualname__, + method_name, + ) + else: + wrapper = self._function_wrapper(class_attr) + if wrapper is not None: + setattr( + estimator, + "_original_" + method_name, + (estimator, wrapper.__wrapped__), + ) + setattr( + wrapper, + "__wrapped__", + self.spanner(wrapper.__wrapped__, estimator), + ) else: - method = getattr(estimator, method_name) + setattr( + estimator, + "_original_" + method_name, + (estimator, class_attr), + ) setattr( estimator, method_name, - self.spanner(method, estimator.__class__.__name__), + self.spanner(class_attr, estimator), ) - else: + + def _function_wrapper(self, function): + """Get the inner-most decorator of a function.""" + if hasattr(function, "__wrapped__"): + if hasattr(function.__wrapped__, "__wrapped__"): + return self._function_wrapper(function.__wrapped__) + return function + return None + + def _function_wrapper_wrapper(self, function): + """Get the second inner-most decorator of a function""" + if hasattr(function, "__wrapped__"): + if hasattr(function.__wrapped__, "__wrapped__"): + if hasattr(function.__wrapped__.__wrapped__, "__wrapped__"): + return self._function_wrapper_wrapper(function.__wrapped__) + return function + return None + + def _unwrap_function(self, function): + """Fetch the function underlying any decorators""" + if hasattr(function, "__wrapped__"): + return self._unwrap_function(function.__wrapped__) + return function + + def _instrument_instance_method( + self, estimator: BaseEstimator, method_name: str + ): + """Instrument an estimator instance method with a span. + + When instrumenting we attach a tuple of (Class, method) to the + attribute ``_original_`` for each method. This allows + us to replace the patched with the original in unstrumentation. + + Args: + estimator (BaseEstimator): A fitted ``sklearn`` estimator. + method_name (str): The method name of the estimator on which to + apply a span. + """ + if self._check_instrumented(estimator, method_name): logger.debug( - "Unable to instrument (method not found): %s.%s", - estimator.__class__.__name__, + "Already instrumented: %s.%s", + estimator.__class__.__qualname__, method_name, ) + return + + class_attr = getattr(type(estimator), method_name, None) + if isinstance(class_attr, property): + logger.debug( + "Not instrumenting found property: %s.%s", + estimator.__class__.__qualname__, + method_name, + ) + else: + method = getattr(estimator, method_name) + setattr(estimator, "_original_" + method_name, (estimator, method)) + setattr( + estimator, method_name, self.spanner(method, estimator), + ) def _instrument_estimator_attribute(self, estimator: BaseEstimator): """Instrument instance attributes which also contain estimators. @@ -255,3 +634,46 @@ def _instrument_estimator_namedtuple(self, estimator: BaseEstimator): for attrib in attribs: for _, est in getattr(estimator, attrib): self.instrument_estimator(estimator=est) + + def _uninstrument_estimator_attribute(self, estimator: BaseEstimator): + """Uninstrument instance attributes which also contain estimators. + + Handle instance attributes which are also estimators, are a list + (Sequence) of estimators, or are mappings (dictionary) in which + the values are estimators. + + Examples include ``RandomForestClassifier`` and + ``MultiOutputRegressor`` instances which have attributes + ``estimators_`` attributes. + + Args: + estimator (BaseEstimator): A fitted ``sklearn`` estimator, with an + attribute which also contains an estimator or collection of + estimators. + """ + attribs = self.recurse_attribs.get(estimator.__class__, []) + for attrib in attribs: + attrib_value = getattr(estimator, attrib) + if isinstance(attrib_value, Sequence): + for value in attrib_value: + self.uninstrument_estimator(estimator=value) + elif isinstance(attrib_value, MutableMapping): + for value in attrib_value.values(): + self.uninstrument_estimator(estimator=value) + else: + self.uninstrument_estimator(estimator=attrib_value) + + def _uninstrument_estimator_namedtuple(self, estimator: BaseEstimator): + """Uninstrument attributes with (name, estimator) tupled components. + + Examples include Pipeline and FeatureUnion instances which + have attributes steps and transformer_list, respectively. + + Args: + estimator: A fitted sklearn estimator, with an attribute which also + contains an estimator or collection of estimators. + """ + attribs = self.recurse_namedtuple_attribs.get(estimator.__class__, []) + for attrib in attribs: + for _, est in getattr(estimator, attrib): + self.uninstrument_estimator(estimator=est) diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py b/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py index 9614504eb28..9661202414f 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py +++ b/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py @@ -1,6 +1,11 @@ from sklearn.ensemble import RandomForestClassifier -from opentelemetry.instrumentation.sklearn import SklearnInstrumentor +from opentelemetry.instrumentation.sklearn import ( + DEFAULT_EXCLUDE_CLASSES, + DEFAULT_METHODS, + SklearnInstrumentor, + get_base_estimators, +) from opentelemetry.test.test_base import TestBase from opentelemetry.trace import SpanKind @@ -8,12 +13,49 @@ class TestSklearn(TestBase): + def test_package_instrumentation(self): + ski = SklearnInstrumentor(packages=["sklearn"]) + + base_estimators = get_base_estimators(packages=["sklearn"]) + + ski.instrument() + # assert instrumented + for _, estimator in base_estimators.items(): + for method_name in DEFAULT_METHODS: + if issubclass(estimator, tuple(DEFAULT_EXCLUDE_CLASSES)): + assert not hasattr(estimator, "_original_" + method_name) + continue + class_attr = getattr(estimator, method_name, None) + if isinstance(class_attr, property): + assert not hasattr(estimator, "_original_" + method_name) + continue + if hasattr(estimator, method_name): + assert hasattr(estimator, "_original_" + method_name) + + ski.uninstrument() + # assert uninstrumented + for _, estimator in base_estimators.items(): + for method_name in DEFAULT_METHODS: + if issubclass(estimator, tuple(DEFAULT_EXCLUDE_CLASSES)): + assert not hasattr(estimator, "_original_" + method_name) + continue + class_attr = getattr(estimator, method_name, None) + if isinstance(class_attr, property): + assert not hasattr(estimator, "_original_" + method_name) + continue + if hasattr(estimator, method_name): + assert not hasattr(estimator, "_original_" + method_name) + + self.memory_exporter.clear() + def test_span_properties(self): """Test that we get all of the spans we expect.""" model = pipeline() - SklearnInstrumentor().instrument_estimator(estimator=model) + ski = SklearnInstrumentor() + ski.instrument_estimator(estimator=model) x_test = random_input() + model.predict(x_test) spans = self.memory_exporter.get_finished_spans() @@ -50,16 +92,35 @@ def test_span_properties(self): self.assertEqual(span.name, "Pipeline.predict") self.assertEqual(span.kind, SpanKind.INTERNAL) + self.memory_exporter.clear() + + # uninstrument + ski.uninstrument_estimator(estimator=model) + x_test = random_input() + model.predict(x_test) + spans = self.memory_exporter.get_finished_spans() + self.assertEqual(len(spans), 0) + def test_attrib_config(self): """Test that the attribute config makes spans on the decision trees.""" model = pipeline() attrib_config = {RandomForestClassifier: ["estimators_"]} - SklearnInstrumentor( - recurse_attribs=attrib_config - ).instrument_estimator(estimator=model) + ski = SklearnInstrumentor( + recurse_attribs=attrib_config, + exclude_classes=[], # decision trees excluded by default + ) + ski.instrument_estimator(estimator=model) x_test = random_input() model.predict(x_test) spans = self.memory_exporter.get_finished_spans() self.assertEqual(len(spans), 8 + model.steps[-1][-1].n_estimators) + + self.memory_exporter.clear() + + ski.uninstrument_estimator(estimator=model) + x_test = random_input() + model.predict(x_test) + spans = self.memory_exporter.get_finished_spans() + self.assertEqual(len(spans), 0) From 2e330eb6256b9621086f40a0a2b31782d533fe90 Mon Sep 17 00:00:00 2001 From: flynn Date: Mon, 28 Sep 2020 00:09:53 -0400 Subject: [PATCH 08/18] rm f-strings --- .../src/opentelemetry/instrumentation/sklearn/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py index 8d3b2fedfff..3a6234044bc 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py @@ -121,7 +121,7 @@ def get_base_estimators(packages: List[str]) -> Dict[str, Type[BaseEstimator]]: for (_, module_name, _) in iter_modules([package_dir]): # import the module and iterate through its attributes try: - module = import_module(f"{package_name}.{module_name}") + module = import_module(package_name + "." + module_name) except ImportError: logger.warning( "Unable to import %s.%s", package_name, module_name @@ -131,7 +131,7 @@ def get_base_estimators(packages: List[str]) -> Dict[str, Type[BaseEstimator]]: attrib = getattr(module, attribute_name) if isclass(attrib) and issubclass(attrib, BaseEstimator): klasses[ - f"{package_name}.{module_name}.{attribute_name}" + ".".join([package_name, module_name, attribute_name]) ] = attrib return klasses From 07b024ac284b72e7779b4dba099462c8686bac41 Mon Sep 17 00:00:00 2001 From: flynn Date: Thu, 1 Oct 2020 23:28:15 -0400 Subject: [PATCH 09/18] update package instrumentation test --- docs/conf.py | 2 +- .../instrumentation/sklearn/__init__.py | 45 ++++++------------- .../tests/test_sklearn.py | 22 ++++++++- 3 files changed, 34 insertions(+), 35 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index ee5070ec639..8976bfe02ce 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -112,7 +112,7 @@ "any", "opentelemetry.trace.propagation.textmap.TextMapPropagator.inject", ), - ("py:class", "BaseEstimator"), + ("py:class", "sklearn.base.BaseEstimator"), ] # Add any paths that contain templates here, relative to this directory. diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py index 3a6234044bc..98c3baa9b75 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py @@ -429,15 +429,9 @@ def _uninstrument_class_method( "Uninstrumenting: %s.%s", qualname, method_name, ) _, orig_method = getattr(estimator, orig_method_name) - wrapper = self._function_wrapper_wrapper(orig_method) - if wrapper is not None: - setattr( - wrapper, "__wrapped__", orig_method, - ) - else: - setattr( - estimator, method_name, orig_method, - ) + setattr( + estimator, method_name, orig_method, + ) delattr(estimator, orig_method_name) else: logger.debug( @@ -509,29 +503,16 @@ def _instrument_class_method( method_name, ) else: - wrapper = self._function_wrapper(class_attr) - if wrapper is not None: - setattr( - estimator, - "_original_" + method_name, - (estimator, wrapper.__wrapped__), - ) - setattr( - wrapper, - "__wrapped__", - self.spanner(wrapper.__wrapped__, estimator), - ) - else: - setattr( - estimator, - "_original_" + method_name, - (estimator, class_attr), - ) - setattr( - estimator, - method_name, - self.spanner(class_attr, estimator), - ) + setattr( + estimator, + "_original_" + method_name, + (estimator, class_attr), + ) + setattr( + estimator, + method_name, + self.spanner(class_attr, estimator), + ) def _function_wrapper(self, function): """Get the inner-most decorator of a function.""" diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py b/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py index 9661202414f..bf238064b76 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py +++ b/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py @@ -14,10 +14,12 @@ class TestSklearn(TestBase): def test_package_instrumentation(self): - ski = SklearnInstrumentor(packages=["sklearn"]) + ski = SklearnInstrumentor() base_estimators = get_base_estimators(packages=["sklearn"]) + model = pipeline() + ski.instrument() # assert instrumented for _, estimator in base_estimators.items(): @@ -32,6 +34,16 @@ def test_package_instrumentation(self): if hasattr(estimator, method_name): assert hasattr(estimator, "_original_" + method_name) + x_test = random_input() + + model.predict(x_test) + + spans = self.memory_exporter.get_finished_spans() + for span in spans: + print(span) + self.assertEqual(len(spans), 8) + self.memory_exporter.clear() + ski.uninstrument() # assert uninstrumented for _, estimator in base_estimators.items(): @@ -46,7 +58,13 @@ def test_package_instrumentation(self): if hasattr(estimator, method_name): assert not hasattr(estimator, "_original_" + method_name) - self.memory_exporter.clear() + model = pipeline() + x_test = random_input() + + model.predict(x_test) + + spans = self.memory_exporter.get_finished_spans() + self.assertEqual(len(spans), 0) def test_span_properties(self): """Test that we get all of the spans we expect.""" From 9c55e999defb4bfbad1761ee751fe88df0bae3a8 Mon Sep 17 00:00:00 2001 From: flynn Date: Thu, 1 Oct 2020 23:37:26 -0400 Subject: [PATCH 10/18] format --- .../src/opentelemetry/instrumentation/sklearn/__init__.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py index 98c3baa9b75..80d97e7854f 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py @@ -504,14 +504,10 @@ def _instrument_class_method( ) else: setattr( - estimator, - "_original_" + method_name, - (estimator, class_attr), + estimator, "_original_" + method_name, (estimator, class_attr), ) setattr( - estimator, - method_name, - self.spanner(class_attr, estimator), + estimator, method_name, self.spanner(class_attr, estimator), ) def _function_wrapper(self, function): From de241d398948fc35511d21262c4d57e79fdb7b1a Mon Sep 17 00:00:00 2001 From: flynn Date: Wed, 28 Oct 2020 21:06:55 -0400 Subject: [PATCH 11/18] handle instrumentation of delegators --- docs/conf.py | 1 + .../instrumentation/sklearn/__init__.py | 95 ++++++++++++++----- .../tests/test_sklearn.py | 69 +++++++++----- 3 files changed, 117 insertions(+), 48 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 8976bfe02ce..72a6ba45719 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -113,6 +113,7 @@ "opentelemetry.trace.propagation.textmap.TextMapPropagator.inject", ), ("py:class", "sklearn.base.BaseEstimator"), + ("py:class", "sklearn.utils.metaestimators._IffHasAttrDescriptor"), ] # Add any paths that contain templates here, relative to this directory. diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py index 80d97e7854f..193cfc58ba6 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py @@ -68,6 +68,7 @@ from sklearn.base import BaseEstimator from sklearn.pipeline import FeatureUnion, Pipeline from sklearn.tree import BaseDecisionTree +from sklearn.utils.metaestimators import _IffHasAttrDescriptor from opentelemetry.instrumentation.instrumentor import BaseInstrumentor from opentelemetry.instrumentation.sklearn.version import __version__ @@ -104,6 +105,55 @@ def wrapper(*args, **kwargs): return wrapper +def implement_spans_delegator(obj: _IffHasAttrDescriptor): + """Wrap the descriptor's fn with a span. + + Args: + obj: An instance of _IffHasAttrDescriptor + """ + # Don't instrument inherited delegators + if hasattr(obj, "_otel_original_fn"): + logger.debug("Already instrumented: %s", obj.fn.__qualname__) + return + + def implement_spans_get(func: Callable): + @wraps(func) + def wrapper(*args, **kwargs): + with get_tracer(__name__, __version__).start_as_current_span( + name=func.__qualname__ + ): + return func(*args, **kwargs) + + return wrapper + + logger.debug("Instrumenting: %s", obj.fn.__qualname__) + + setattr(obj, "_otel_original_fn", getattr(obj, "fn")) + setattr(obj, "fn", implement_spans_get(obj.fn)) + + +def get_delegator( + estimator: Type[BaseEstimator], method_name: str +) -> Union[_IffHasAttrDescriptor, None]: + """Get the delegator from a class method or None. + + Args: + estimator (BaseEstimator): A class derived from ``sklearn``'s + ``BaseEstimator``. + method_name (str): The method name of the estimator on which to + check for delegation. + + Returns: + The delegator, if one exists, otherwise None. + """ + class_attr = getattr(estimator, method_name) + if getattr(class_attr, "__closure__", None) is not None: + for cell in class_attr.__closure__: + if isinstance(cell.cell_contents, _IffHasAttrDescriptor): + return cell.cell_contents + return None + + def get_base_estimators(packages: List[str]) -> Dict[str, Type[BaseEstimator]]: """Walk package hierarchies to get BaseEstimator-derived classes. @@ -389,7 +439,7 @@ def _check_instrumented( method_name (str): The method name of the estimator on which to check for instrumentation. """ - orig_method_name = "_original_" + method_name + orig_method_name = "_otel_original_" + method_name has_original = hasattr(estimator, orig_method_name) orig_class, orig_method = getattr( estimator, orig_method_name, (None, None) @@ -419,11 +469,12 @@ def _uninstrument_class_method( method_name (str): The method name of the estimator on which to apply a span. """ - orig_method_name = "_original_" + method_name + orig_method_name = "_otel_original_" + method_name if isclass(estimator): qualname = estimator.__qualname__ else: qualname = estimator.__class__.__qualname__ + delegator = get_delegator(estimator, method_name) if self._check_instrumented(estimator, method_name): logger.debug( "Uninstrumenting: %s.%s", qualname, method_name, @@ -433,6 +484,16 @@ def _uninstrument_class_method( estimator, method_name, orig_method, ) delattr(estimator, orig_method_name) + elif delegator is not None: + if not hasattr(delegator, "_otel_original_fn"): + logger.debug( + "Already uninstrumented: %s.%s", qualname, method_name, + ) + return + setattr( + delegator, "fn", getattr(delegator, "_otel_original_fn"), + ) + delattr(delegator, "_otel_original_fn") else: logger.debug( "Already uninstrumented: %s.%s", qualname, method_name, @@ -452,7 +513,7 @@ def _uninstrument_instance_method( method_name (str): The method name of the estimator on which to apply a span. """ - orig_method_name = "_original_" + method_name + orig_method_name = "_otel_original_" + method_name if isclass(estimator): qualname = estimator.__qualname__ else: @@ -496,37 +557,25 @@ def _instrument_class_method( ) return class_attr = getattr(estimator, method_name) + delegator = get_delegator(estimator, method_name) if isinstance(class_attr, property): logger.debug( "Not instrumenting found property: %s.%s", estimator.__qualname__, method_name, ) + elif delegator is not None: + implement_spans_delegator(delegator) else: setattr( - estimator, "_original_" + method_name, (estimator, class_attr), + estimator, + "_otel_original_" + method_name, + (estimator, class_attr), ) setattr( estimator, method_name, self.spanner(class_attr, estimator), ) - def _function_wrapper(self, function): - """Get the inner-most decorator of a function.""" - if hasattr(function, "__wrapped__"): - if hasattr(function.__wrapped__, "__wrapped__"): - return self._function_wrapper(function.__wrapped__) - return function - return None - - def _function_wrapper_wrapper(self, function): - """Get the second inner-most decorator of a function""" - if hasattr(function, "__wrapped__"): - if hasattr(function.__wrapped__, "__wrapped__"): - if hasattr(function.__wrapped__.__wrapped__, "__wrapped__"): - return self._function_wrapper_wrapper(function.__wrapped__) - return function - return None - def _unwrap_function(self, function): """Fetch the function underlying any decorators""" if hasattr(function, "__wrapped__"): @@ -564,7 +613,9 @@ def _instrument_instance_method( ) else: method = getattr(estimator, method_name) - setattr(estimator, "_original_" + method_name, (estimator, method)) + setattr( + estimator, "_otel_original_" + method_name, (estimator, method) + ) setattr( estimator, method_name, self.spanner(method, estimator), ) diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py b/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py index bf238064b76..2aa43e700ae 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py +++ b/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py @@ -5,6 +5,7 @@ DEFAULT_METHODS, SklearnInstrumentor, get_base_estimators, + get_delegator, ) from opentelemetry.test.test_base import TestBase from opentelemetry.trace import SpanKind @@ -12,6 +13,46 @@ from .fixtures import pipeline, random_input +def assert_instrumented(base_estimators): + for _, estimator in base_estimators.items(): + for method_name in DEFAULT_METHODS: + original_method_name = "_otel_original_" + method_name + if issubclass(estimator, tuple(DEFAULT_EXCLUDE_CLASSES)): + assert not hasattr(estimator, original_method_name) + continue + class_attr = getattr(estimator, method_name, None) + if isinstance(class_attr, property): + assert not hasattr(estimator, original_method_name) + continue + delegator = None + if hasattr(estimator, method_name): + delegator = get_delegator(estimator, method_name) + if delegator is not None: + assert hasattr(delegator, "_otel_original_fn") + elif hasattr(estimator, method_name): + assert hasattr(estimator, original_method_name) + + +def assert_uninstrumented(base_estimators): + for _, estimator in base_estimators.items(): + for method_name in DEFAULT_METHODS: + original_method_name = "_otel_original_" + method_name + if issubclass(estimator, tuple(DEFAULT_EXCLUDE_CLASSES)): + assert not hasattr(estimator, original_method_name) + continue + class_attr = getattr(estimator, method_name, None) + if isinstance(class_attr, property): + assert not hasattr(estimator, original_method_name) + continue + delegator = None + if hasattr(estimator, method_name): + delegator = get_delegator(estimator, method_name) + if delegator is not None: + assert not hasattr(delegator, "_otel_original_fn") + elif hasattr(estimator, method_name): + assert not hasattr(estimator, original_method_name) + + class TestSklearn(TestBase): def test_package_instrumentation(self): ski = SklearnInstrumentor() @@ -21,42 +62,18 @@ def test_package_instrumentation(self): model = pipeline() ski.instrument() - # assert instrumented - for _, estimator in base_estimators.items(): - for method_name in DEFAULT_METHODS: - if issubclass(estimator, tuple(DEFAULT_EXCLUDE_CLASSES)): - assert not hasattr(estimator, "_original_" + method_name) - continue - class_attr = getattr(estimator, method_name, None) - if isinstance(class_attr, property): - assert not hasattr(estimator, "_original_" + method_name) - continue - if hasattr(estimator, method_name): - assert hasattr(estimator, "_original_" + method_name) + assert_instrumented(base_estimators) x_test = random_input() model.predict(x_test) spans = self.memory_exporter.get_finished_spans() - for span in spans: - print(span) self.assertEqual(len(spans), 8) self.memory_exporter.clear() ski.uninstrument() - # assert uninstrumented - for _, estimator in base_estimators.items(): - for method_name in DEFAULT_METHODS: - if issubclass(estimator, tuple(DEFAULT_EXCLUDE_CLASSES)): - assert not hasattr(estimator, "_original_" + method_name) - continue - class_attr = getattr(estimator, method_name, None) - if isinstance(class_attr, property): - assert not hasattr(estimator, "_original_" + method_name) - continue - if hasattr(estimator, method_name): - assert not hasattr(estimator, "_original_" + method_name) + assert_uninstrumented(base_estimators) model = pipeline() x_test = random_input() From 5173b5184790b70a14428591c7460a632eafe504 Mon Sep 17 00:00:00 2001 From: flynn Date: Wed, 4 Nov 2020 01:55:24 -0500 Subject: [PATCH 12/18] fix ci --- .../opentelemetry-instrumentation-sklearn/setup.cfg | 4 ++-- .../instrumentation/sklearn/__init__.py | 13 ++++++------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg b/instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg index e95c72d0c5d..3519b1ae03b 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg +++ b/instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg @@ -39,8 +39,8 @@ package_dir= =src packages=find_namespace: install_requires = - opentelemetry-api == 0.14.dev0 - opentelemetry-instrumentation == 0.14.dev0 + opentelemetry-api == 0.14b0 + opentelemetry-instrumentation == 0.14b0 scikit-learn ~= 0.22.0 [options.extras_require] diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py index 193cfc58ba6..e57fe2130ea 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py @@ -138,8 +138,7 @@ def get_delegator( """Get the delegator from a class method or None. Args: - estimator (BaseEstimator): A class derived from ``sklearn``'s - ``BaseEstimator``. + estimator: A class derived from ``sklearn``'s ``BaseEstimator``. method_name (str): The method name of the estimator on which to check for delegation. @@ -158,7 +157,7 @@ def get_base_estimators(packages: List[str]) -> Dict[str, Type[BaseEstimator]]: """Walk package hierarchies to get BaseEstimator-derived classes. Args: - packages (List[str]): A list of package names to instrument. + packages (list(str)): A list of package names to instrument. Returns: A dictionary of qualnames and classes inheriting from @@ -374,8 +373,8 @@ def instrument_estimator(self, estimator: BaseEstimator): """Instrument a fitted estimator and its hierarchy where configured. Args: - estimator (BaseEstimator): A fitted ``sklearn`` estimator, - typically a ``Pipeline`` instance. + estimator (sklearn.base.BaseEstimator): A fitted ``sklearn`` + estimator, typically a ``Pipeline`` instance. """ if isinstance(estimator, self.exclude_classes): logger.debug( @@ -402,8 +401,8 @@ def uninstrument_estimator(self, estimator: BaseEstimator): """Uninstrument a fitted estimator and its hierarchy where configured. Args: - estimator (BaseEstimator): A fitted ``sklearn`` estimator, - typically a ``Pipeline`` instance. + estimator (sklearn.base.BaseEstimator): A fitted ``sklearn`` + estimator, typically a ``Pipeline`` instance. """ if isinstance(estimator, self.exclude_classes): logger.debug( From fec31e9ad817731266c6ead1c2ce7f159a87adc3 Mon Sep 17 00:00:00 2001 From: flynn Date: Wed, 4 Nov 2020 02:38:18 -0500 Subject: [PATCH 13/18] update versions --- .../opentelemetry-instrumentation-sklearn/setup.cfg | 6 +++--- .../src/opentelemetry/instrumentation/sklearn/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg b/instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg index 3519b1ae03b..dc6b8e02044 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg +++ b/instrumentation/opentelemetry-instrumentation-sklearn/setup.cfg @@ -39,13 +39,13 @@ package_dir= =src packages=find_namespace: install_requires = - opentelemetry-api == 0.14b0 - opentelemetry-instrumentation == 0.14b0 + opentelemetry-api == 0.16.dev0 + opentelemetry-instrumentation == 0.16.dev0 scikit-learn ~= 0.22.0 [options.extras_require] test = - opentelemetry-test == 0.14.dev0 + opentelemetry-test == 0.16.dev0 [options.packages.find] where = src diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/version.py b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/version.py index eef24b9b14f..4c4951dcda1 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/version.py +++ b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/version.py @@ -1 +1 @@ -__version__ = "0.14.dev0" +__version__ = "0.16.dev0" From 4404dfe9ad5675c7a6c6858a8d35a1c7d83e9e8a Mon Sep 17 00:00:00 2001 From: flynn Date: Wed, 4 Nov 2020 03:07:11 -0500 Subject: [PATCH 14/18] allow attributes to be passed to model instrumentation --- .../instrumentation/sklearn/__init__.py | 62 ++++++++++++++----- .../tests/test_sklearn.py | 14 +++++ 2 files changed, 61 insertions(+), 15 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py index e57fe2130ea..bdaf2a15a22 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py @@ -73,18 +73,22 @@ from opentelemetry.instrumentation.instrumentor import BaseInstrumentor from opentelemetry.instrumentation.sklearn.version import __version__ from opentelemetry.trace import get_tracer +from opentelemetry.util.types import Attributes logger = logging.getLogger(__name__) def implement_spans( - func: Callable, estimator: Union[BaseEstimator, Type[BaseEstimator]] + func: Callable, + estimator: Union[BaseEstimator, Type[BaseEstimator]], + attributes: Attributes = None, ): """Wrap the method call with a span. Args: func: A callable to be wrapped in a span - estimator: An instance or class of an estimator. + estimator: An instance or class of an estimator + attributes: Attributes to apply to the span Returns: The passed function wrapped in a span. @@ -98,7 +102,8 @@ def implement_spans( @wraps(func) def wrapper(*args, **kwargs): with get_tracer(__name__, __version__).start_as_current_span( - name="{cls}.{func}".format(cls=name, func=func.__name__) + name="{cls}.{func}".format(cls=name, func=func.__name__), + attributes=attributes, ): return func(*args, **kwargs) @@ -369,12 +374,15 @@ def _uninstrument(self, **kwargs): estimator=klass, method_name=method_name ) - def instrument_estimator(self, estimator: BaseEstimator): + def instrument_estimator( + self, estimator: BaseEstimator, attributes: Attributes = None + ): """Instrument a fitted estimator and its hierarchy where configured. Args: estimator (sklearn.base.BaseEstimator): A fitted ``sklearn`` estimator, typically a ``Pipeline`` instance. + attributes (dict): Attributes to attach to the spans. """ if isinstance(estimator, self.exclude_classes): logger.debug( @@ -386,15 +394,21 @@ def instrument_estimator(self, estimator: BaseEstimator): if isinstance( estimator, tuple(self.recurse_namedtuple_attribs.keys()) ): - self._instrument_estimator_namedtuple(estimator=estimator) + self._instrument_estimator_namedtuple( + estimator=estimator, attributes=attributes + ) if isinstance(estimator, tuple(self.recurse_attribs.keys())): - self._instrument_estimator_attribute(estimator=estimator) + self._instrument_estimator_attribute( + estimator=estimator, attributes=attributes + ) for method_name in self.methods: if hasattr(estimator, method_name): self._instrument_instance_method( - estimator=estimator, method_name=method_name + estimator=estimator, + method_name=method_name, + attributes=attributes, ) def uninstrument_estimator(self, estimator: BaseEstimator): @@ -582,7 +596,10 @@ def _unwrap_function(self, function): return function def _instrument_instance_method( - self, estimator: BaseEstimator, method_name: str + self, + estimator: BaseEstimator, + method_name: str, + attributes: Attributes = None, ): """Instrument an estimator instance method with a span. @@ -594,6 +611,7 @@ def _instrument_instance_method( estimator (BaseEstimator): A fitted ``sklearn`` estimator. method_name (str): The method name of the estimator on which to apply a span. + attributes (dict): Attributes to attach to the spans. """ if self._check_instrumented(estimator, method_name): logger.debug( @@ -616,10 +634,14 @@ def _instrument_instance_method( estimator, "_otel_original_" + method_name, (estimator, method) ) setattr( - estimator, method_name, self.spanner(method, estimator), + estimator, + method_name, + self.spanner(method, estimator, attributes), ) - def _instrument_estimator_attribute(self, estimator: BaseEstimator): + def _instrument_estimator_attribute( + self, estimator: BaseEstimator, attributes: Attributes = None + ): """Instrument instance attributes which also contain estimators. Handle instance attributes which are also estimators, are a list @@ -634,20 +656,29 @@ def _instrument_estimator_attribute(self, estimator: BaseEstimator): estimator (BaseEstimator): A fitted ``sklearn`` estimator, with an attribute which also contains an estimator or collection of estimators. + attributes (dict): Attributes to attach to the spans. """ attribs = self.recurse_attribs.get(estimator.__class__, []) for attrib in attribs: attrib_value = getattr(estimator, attrib) if isinstance(attrib_value, Sequence): for value in attrib_value: - self.instrument_estimator(estimator=value) + self.instrument_estimator( + estimator=value, attributes=attributes + ) elif isinstance(attrib_value, MutableMapping): for value in attrib_value.values(): - self.instrument_estimator(estimator=value) + self.instrument_estimator( + estimator=value, attributes=attributes + ) else: - self.instrument_estimator(estimator=attrib_value) + self.instrument_estimator( + estimator=attrib_value, attributes=attributes + ) - def _instrument_estimator_namedtuple(self, estimator: BaseEstimator): + def _instrument_estimator_namedtuple( + self, estimator: BaseEstimator, attributes: Attributes = None + ): """Instrument attributes with (name, estimator) tupled components. Examples include Pipeline and FeatureUnion instances which @@ -656,11 +687,12 @@ def _instrument_estimator_namedtuple(self, estimator: BaseEstimator): Args: estimator: A fitted sklearn estimator, with an attribute which also contains an estimator or collection of estimators. + attributes (dict): Attributes to attach to the spans. """ attribs = self.recurse_namedtuple_attribs.get(estimator.__class__, []) for attrib in attribs: for _, est in getattr(estimator, attrib): - self.instrument_estimator(estimator=est) + self.instrument_estimator(estimator=est, attributes=attributes) def _uninstrument_estimator_attribute(self, estimator: BaseEstimator): """Uninstrument instance attributes which also contain estimators. diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py b/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py index 2aa43e700ae..df454b6fa9c 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py +++ b/instrumentation/opentelemetry-instrumentation-sklearn/tests/test_sklearn.py @@ -159,3 +159,17 @@ def test_attrib_config(self): model.predict(x_test) spans = self.memory_exporter.get_finished_spans() self.assertEqual(len(spans), 0) + + def test_span_attributes(self): + model = pipeline() + attributes = {"model_name": "random_forest_model"} + ski = SklearnInstrumentor() + ski.instrument_estimator(estimator=model, attributes=attributes) + + x_test = random_input() + + model.predict(x_test) + + spans = self.memory_exporter.get_finished_spans() + for span in spans: + assert span.attributes["model_name"] == "random_forest_model" From e97f5fd70ebfefb8fcd006e01170878652462b1d Mon Sep 17 00:00:00 2001 From: flynn Date: Wed, 4 Nov 2020 09:15:33 -0500 Subject: [PATCH 15/18] cleanup changelog --- .../opentelemetry-instrumentation-sklearn/CHANGELOG.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/CHANGELOG.md b/instrumentation/opentelemetry-instrumentation-sklearn/CHANGELOG.md index 0ef6e1246c4..3e04402cea9 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/CHANGELOG.md +++ b/instrumentation/opentelemetry-instrumentation-sklearn/CHANGELOG.md @@ -2,6 +2,4 @@ ## Unreleased - - - Initial release From dc67786c0bac4401ef4122239e14559c6e40151b Mon Sep 17 00:00:00 2001 From: flynn Date: Wed, 4 Nov 2020 09:28:28 -0500 Subject: [PATCH 16/18] fix documentation --- .../src/opentelemetry/instrumentation/sklearn/__init__.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py index bdaf2a15a22..3dd68ea8fe3 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py @@ -305,7 +305,8 @@ class descendent) is being instrumented with opentelemetry. Within a element is the estimator. Defaults include sklearn's ``Pipeline`` and its attribute ``steps``, and the ``FeatureUnion`` and its attribute ``transformer_list``. Used in model instrumentation only. - spanner: A function with signature (func, str) which + spanner: A function with signature + (func, sklearn.base.BaseEstimator, Attributes) which decorates instance methods with opentelemetry spans. packages: A list of additional sklearn-compatible packages to instrument. Used with package instrumentation only. @@ -551,7 +552,7 @@ def _instrument_class_method( """Instrument an estimator method with a span. When instrumenting we attach a tuple of (Class, method) to the - attribute ``_original_`` for each method. This allows + attribute ``_otel_original_`` for each method. This allows us to replace the patched with the original in unstrumentation, but also allows proper instrumentation of child classes without instrumenting inherited methods twice. @@ -604,7 +605,7 @@ def _instrument_instance_method( """Instrument an estimator instance method with a span. When instrumenting we attach a tuple of (Class, method) to the - attribute ``_original_`` for each method. This allows + attribute ``_otel_original_`` for each method. This allows us to replace the patched with the original in unstrumentation. Args: From f06a710bae70062b3cee5564427538056df4f4c8 Mon Sep 17 00:00:00 2001 From: flynn Date: Wed, 4 Nov 2020 09:40:57 -0500 Subject: [PATCH 17/18] uninstrumentation --- .../src/opentelemetry/instrumentation/sklearn/__init__.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py index 3dd68ea8fe3..52f4fba4f94 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py @@ -104,7 +104,10 @@ def wrapper(*args, **kwargs): with get_tracer(__name__, __version__).start_as_current_span( name="{cls}.{func}".format(cls=name, func=func.__name__), attributes=attributes, - ): + ) as span: + if span.is_recording(): + for k, v in attributes.items(): + span.set_attribute(k, v) return func(*args, **kwargs) return wrapper @@ -553,7 +556,7 @@ def _instrument_class_method( When instrumenting we attach a tuple of (Class, method) to the attribute ``_otel_original_`` for each method. This allows - us to replace the patched with the original in unstrumentation, but + us to replace the patched with the original in uninstrumentation, but also allows proper instrumentation of child classes without instrumenting inherited methods twice. From 57f98b288dcf39f294ace1df40040a1307bc2ba4 Mon Sep 17 00:00:00 2001 From: flynn Date: Wed, 4 Nov 2020 21:21:13 -0500 Subject: [PATCH 18/18] class method attribs; remove spanner --- .../instrumentation/sklearn/__init__.py | 53 ++++++++++++------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py index 52f4fba4f94..d7ac330d1e8 100644 --- a/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-sklearn/src/opentelemetry/instrumentation/sklearn/__init__.py @@ -30,7 +30,9 @@ SklearnInstrumentor().instrument() # instrument sklearn and other libraries - SklearnInstrumentor(packages=["lightgbm", "xgboost"]).instrument() + SklearnInstrumentor( + packages=["sklearn", "lightgbm", "xgboost"] + ).instrument() Model intrumentation example: @@ -99,37 +101,46 @@ def implement_spans( name = estimator.__class__.__name__ logger.debug("Instrumenting: %s.%s", name, func.__name__) + attributes = attributes or {} + @wraps(func) def wrapper(*args, **kwargs): with get_tracer(__name__, __version__).start_as_current_span( name="{cls}.{func}".format(cls=name, func=func.__name__), - attributes=attributes, ) as span: if span.is_recording(): - for k, v in attributes.items(): - span.set_attribute(k, v) + for key, val in attributes.items(): + span.set_attribute(key, val) return func(*args, **kwargs) return wrapper -def implement_spans_delegator(obj: _IffHasAttrDescriptor): +def implement_spans_delegator( + obj: _IffHasAttrDescriptor, attributes: Attributes = None +): """Wrap the descriptor's fn with a span. Args: obj: An instance of _IffHasAttrDescriptor + attributes: Attributes to apply to the span """ # Don't instrument inherited delegators if hasattr(obj, "_otel_original_fn"): logger.debug("Already instrumented: %s", obj.fn.__qualname__) return - def implement_spans_get(func: Callable): + attributes = attributes or {} + + def implement_spans_fn(func: Callable): @wraps(func) def wrapper(*args, **kwargs): with get_tracer(__name__, __version__).start_as_current_span( name=func.__qualname__ - ): + ) as span: + if span.is_recording(): + for key, val in attributes.items(): + span.set_attribute(key, val) return func(*args, **kwargs) return wrapper @@ -137,7 +148,7 @@ def wrapper(*args, **kwargs): logger.debug("Instrumenting: %s", obj.fn.__qualname__) setattr(obj, "_otel_original_fn", getattr(obj, "fn")) - setattr(obj, "fn", implement_spans_get(obj.fn)) + setattr(obj, "fn", implement_spans_fn(obj.fn)) def get_delegator( @@ -263,7 +274,7 @@ class descendent) is being instrumented with opentelemetry. Within a # instrument the sklearn library SklearnInstrumentor().instrument() - # instrument additional libraries + # instrument several sklearn-compatible libraries packages = ["sklearn", "lightgbm", "xgboost"] SklearnInstrumentor(packages=packages).instrument() @@ -308,10 +319,7 @@ class descendent) is being instrumented with opentelemetry. Within a element is the estimator. Defaults include sklearn's ``Pipeline`` and its attribute ``steps``, and the ``FeatureUnion`` and its attribute ``transformer_list``. Used in model instrumentation only. - spanner: A function with signature - (func, sklearn.base.BaseEstimator, Attributes) which - decorates instance methods with opentelemetry spans. - packages: A list of additional sklearn-compatible packages to + packages: A list of sklearn-compatible packages to instrument. Used with package instrumentation only. exclude_classes: A list of classes to exclude from instrumentation. Child classes are also excluded. Default is sklearn's @@ -337,7 +345,6 @@ def __init__( recurse_namedtuple_attribs: Dict[ Type[BaseEstimator], List[str] ] = None, - spanner: Callable = implement_spans, packages: List[str] = None, exclude_classes: List[Type] = None, ): @@ -346,7 +353,6 @@ def __init__( self.recurse_namedtuple_attribs = ( recurse_namedtuple_attribs or DEFAULT_NAMEDTUPLE_ATTRIBS ) - self.spanner = spanner self.packages = packages or DEFAULT_PACKAGES if exclude_classes is None: self.exclude_classes = tuple(DEFAULT_EXCLUDE_CLASSES) @@ -356,6 +362,7 @@ def __init__( def _instrument(self, **kwargs): """Instrument the library, and any additional specified on init.""" klasses = get_base_estimators(packages=self.packages) + attributes = kwargs.get("attributes") for _, klass in klasses.items(): if issubclass(klass, self.exclude_classes): logger.debug("Not instrumenting (excluded): %s", str(klass)) @@ -364,7 +371,9 @@ def _instrument(self, **kwargs): for method_name in self.methods: if hasattr(klass, method_name): self._instrument_class_method( - estimator=klass, method_name=method_name + estimator=klass, + method_name=method_name, + attributes=attributes, ) def _uninstrument(self, **kwargs): @@ -550,7 +559,10 @@ def _uninstrument_instance_method( ) def _instrument_class_method( - self, estimator: Type[BaseEstimator], method_name: str + self, + estimator: Type[BaseEstimator], + method_name: str, + attributes: Attributes = None, ): """Instrument an estimator method with a span. @@ -565,6 +577,7 @@ def _instrument_class_method( class method_name (str): The method name of the estimator on which to apply a span. + attributes (dict): Attributes to attach to the spans. """ if self._check_instrumented(estimator, method_name): logger.debug( @@ -590,7 +603,9 @@ def _instrument_class_method( (estimator, class_attr), ) setattr( - estimator, method_name, self.spanner(class_attr, estimator), + estimator, + method_name, + implement_spans(class_attr, estimator, attributes), ) def _unwrap_function(self, function): @@ -640,7 +655,7 @@ def _instrument_instance_method( setattr( estimator, method_name, - self.spanner(method, estimator, attributes), + implement_spans(method, estimator, attributes), ) def _instrument_estimator_attribute(