From f1307d8883607acd2d87d84b12509ed16aeb1a5b Mon Sep 17 00:00:00 2001 From: chico Date: Tue, 2 Feb 2021 16:33:53 +0100 Subject: [PATCH 1/3] First documentation --- .binder/apt.txt | 2 + .binder/postBuild | 43 +++ .binder/requirements.txt | 1 + .github/workflows/docs.yml | 43 +++ README.md | 29 +- autoPyTorch/__init__.py | 1 + autoPyTorch/__version__.py | 4 + docs/Makefile | 182 ++++++++++ docs/_templates/class.rst | 6 + docs/_templates/class_without_init.rst | 12 + docs/_templates/function.rst | 10 + docs/_templates/layout.html | 23 ++ docs/api.rst | 18 + docs/conf.py | 383 ++++++++++++++++++++ docs/extending.rst | 7 + docs/index.rst | 78 ++++ docs/installation.rst | 32 ++ docs/manual.rst | 9 + docs/releases.rst | 17 + examples/README.txt | 7 + examples/example_ensemble_classification.py | 213 ----------- examples/example_smac_intensify.py | 151 -------- examples/example_tabular_classification.py | 29 +- examples/traditional_pipeline.py | 86 ----- 24 files changed, 917 insertions(+), 469 deletions(-) create mode 100644 .binder/apt.txt create mode 100644 .binder/postBuild create mode 100644 .binder/requirements.txt create mode 100644 .github/workflows/docs.yml create mode 100644 autoPyTorch/__version__.py create mode 100644 docs/Makefile create mode 100644 docs/_templates/class.rst create mode 100644 docs/_templates/class_without_init.rst create mode 100644 docs/_templates/function.rst create mode 100644 docs/_templates/layout.html create mode 100644 docs/api.rst create mode 100644 docs/conf.py create mode 100644 docs/extending.rst create mode 100644 docs/index.rst create mode 100644 docs/installation.rst create mode 100644 docs/manual.rst create mode 100644 docs/releases.rst create mode 100644 examples/README.txt delete mode 100644 examples/example_ensemble_classification.py delete mode 100644 examples/example_smac_intensify.py delete mode 100644 examples/traditional_pipeline.py diff --git a/.binder/apt.txt b/.binder/apt.txt new file mode 100644 index 000000000..059bded08 --- /dev/null +++ b/.binder/apt.txt @@ -0,0 +1,2 @@ +build-essential +swig diff --git a/.binder/postBuild b/.binder/postBuild new file mode 100644 index 000000000..c7f300460 --- /dev/null +++ b/.binder/postBuild @@ -0,0 +1,43 @@ +#!/bin/bash + +set -e + +python -m pip install .[docs,examples] + +# Taken from https://github.com/scikit-learn/scikit-learn/blob/22cd233e1932457947e9994285dc7fd4e93881e4/.binder/postBuild +# under BSD3 license, copyright the scikit-learn contributors + +# This script is called in a binder context. When this script is called, we are +# inside a git checkout of the automl/auto-sklearn repo. This script +# generates notebooks from the auto-sklearn python examples. + +if [[ ! -f /.dockerenv ]]; then + echo "This script was written for repo2docker and is supposed to run inside a docker container." + echo "Exiting because this script can delete data if run outside of a docker container." + exit 1 +fi + +# Copy content we need from the Auto-PyTorch repo +TMP_CONTENT_DIR=/tmp/Auto-PyTorch +mkdir -p $TMP_CONTENT_DIR +cp -r examples .binder $TMP_CONTENT_DIR +# delete everything in current directory including dot files and dot folders +find . -delete + +# Generate notebooks and remove other files from examples folder +GENERATED_NOTEBOOKS_DIR=examples +cp -r $TMP_CONTENT_DIR/examples $GENERATED_NOTEBOOKS_DIR + +find $GENERATED_NOTEBOOKS_DIR -name 'example_*.py' -exec sphx_glr_python_to_jupyter.py '{}' + +# Keep __init__.py and custom_metrics.py +NON_NOTEBOOKS=$(find $GENERATED_NOTEBOOKS_DIR -type f | grep -v '\.ipynb' | grep -v 'init' | grep -v 'custom_metrics') +rm -f $NON_NOTEBOOKS + +# Modify path to be consistent by the path given by sphinx-gallery +mkdir notebooks +mv $GENERATED_NOTEBOOKS_DIR notebooks/ + +# Put the .binder folder back (may be useful for debugging purposes) +mv $TMP_CONTENT_DIR/.binder . +# Final clean up +rm -rf $TMP_CONTENT_DIR diff --git a/.binder/requirements.txt b/.binder/requirements.txt new file mode 100644 index 000000000..3c8d7e782 --- /dev/null +++ b/.binder/requirements.txt @@ -0,0 +1 @@ +-r ../requirements.txt diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 000000000..9ae8a98ed --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,43 @@ +name: Docs +on: [pull_request, push] + +jobs: + build-and-deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Install dependencies + run: | + pip install -e .[docs,examples] + - name: Make docs + run: | + cd docs + make html + - name: Pull latest gh-pages + if: (contains(github.ref, 'develop') || contains(github.ref, 'master')) && github.event_name == 'push' + run: | + cd .. + git clone https://github.com/automl/Auto-PyTorch.git --branch gh-pages --single-branch gh-pages + - name: Copy new doc into gh-pages + if: (contains(github.ref, 'develop') || contains(github.ref, 'master')) && github.event_name == 'push' + run: | + branch_name=${GITHUB_REF##*/} + cd ../gh-pages + rm -rf $branch_name + cp -r ../Auto-PyTorch/docs/build/html $branch_name + - name: Push to gh-pages + if: (contains(github.ref, 'develop') || contains(github.ref, 'master')) && github.event_name == 'push' + run: | + last_commit=$(git log --pretty=format:"%an: %s") + cd ../gh-pages + branch_name=${GITHUB_REF##*/} + git add $branch_name/ + git config --global user.name 'Github Actions' + git config --global user.email 'not@mail.com' + git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }} + git commit -am "$last_commit" + git push diff --git a/README.md b/README.md index 3d61245ee..380ce0c08 100755 --- a/README.md +++ b/README.md @@ -2,33 +2,36 @@ Copyright (C) 2019 [AutoML Group Freiburg](http://www.automl.org/) -This an alpha version of Auto-PyTorch. -So far, Auto-PyTorch supports tabular data (classification, regression), image data (classification) and time-series data (TODO). +This an alpha version of Auto-PyTorch with improved API. +So far, Auto-PyTorch supports tabular data (classification, regression). +We plan to enable image data and time-series data. + + +Find the documentation [here](https://automl.github.io/Auto-PyTorch/refactor_development) ## Installation ### Pip -```sh -$ pip install autoPyTorch -``` -### Manually +We recommend using Anaconda for developing as follows: + ```sh -$ cd install/path -$ git clone https://github.com/automl/Auto-PyTorch.git -$ cd Auto-PyTorch -$ cat requirements.txt | xargs -n 1 -L 1 pip install -$ python setup.py install -``` +# Following commands assume the user is in a cloned directory of Auto-Pytorch +conda create -n autopytorch python=3.8 +conda activate autopytorch +conda install gxx_linux-64 gcc_linux-64 swig +cat requirements.txt | xargs -n 1 -L 1 pip install +python setup.py install +``` ## Contributing If you want to contribute to Auto-PyTorch, clone the repository and checkout our current development branch ```sh -$ git checkout development +$ git checkout refactor_development ``` diff --git a/autoPyTorch/__init__.py b/autoPyTorch/__init__.py index e69de29bb..99f8e4109 100644 --- a/autoPyTorch/__init__.py +++ b/autoPyTorch/__init__.py @@ -0,0 +1 @@ +from autoPyTorch.__version__ import __version__ # noqa (imported but unused) diff --git a/autoPyTorch/__version__.py b/autoPyTorch/__version__.py new file mode 100644 index 000000000..38eddbeba --- /dev/null +++ b/autoPyTorch/__version__.py @@ -0,0 +1,4 @@ +"""Version information.""" + +# The following line *must* be the last in the module, exactly as formatted: +__version__ = "0.0.3" diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 000000000..fe6318af2 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,182 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = build + +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext + +all: html + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " xml to make Docutils-native XML files" + @echo " pseudoxml to make pseudoxml-XML files for display purposes" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + rm -rf $(BUILDDIR)/* + rm -rf generated + rm -rf examples/ + rm -rf gen_modules/ + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/AutoPyTorch.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/AutoPyTorch.qhc" + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/AutoPyTorch" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/AutoPyTorch" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +latexpdfja: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through platex and dvipdfmx..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." + +xml: + $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml + @echo + @echo "Build finished. The XML files are in $(BUILDDIR)/xml." + +pseudoxml: + $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml + @echo + @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/docs/_templates/class.rst b/docs/_templates/class.rst new file mode 100644 index 000000000..307b0199c --- /dev/null +++ b/docs/_templates/class.rst @@ -0,0 +1,6 @@ +:mod:`{{module}}`.{{objname}} +{{ underline }}============== + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} diff --git a/docs/_templates/class_without_init.rst b/docs/_templates/class_without_init.rst new file mode 100644 index 000000000..79ff2cf80 --- /dev/null +++ b/docs/_templates/class_without_init.rst @@ -0,0 +1,12 @@ +:mod:`{{module}}`.{{objname}} +{{ underline }}============== + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} + +.. include:: {{module}}.{{objname}}.examples + +.. raw:: html + +
diff --git a/docs/_templates/function.rst b/docs/_templates/function.rst new file mode 100644 index 000000000..d8c9bd480 --- /dev/null +++ b/docs/_templates/function.rst @@ -0,0 +1,10 @@ +:mod:`{{module}}`.{{objname}} +{{ underline }}==================== + +.. currentmodule:: {{ module }} + +.. autofunction:: {{ objname }} + +.. raw:: html + +
diff --git a/docs/_templates/layout.html b/docs/_templates/layout.html new file mode 100644 index 000000000..d0cc5e034 --- /dev/null +++ b/docs/_templates/layout.html @@ -0,0 +1,23 @@ +{% extends "!layout.html" %} + +{# Custom CSS overrides #} +{# set bootswatch_css_custom = ['_static/my-styles.css'] #} + +{# Add github banner (from: https://github.com/blog/273-github-ribbons). #} +{% block header %} + {{ super() }} + + +{% endblock %} + diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 000000000..199e85ccc --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,18 @@ +:orphan: + +.. _api: + +APIs +**** + +============ +Main modules +============ + +~~~~~~~~~~~~~~ +Classification +~~~~~~~~~~~~~~ + +.. autoclass:: autoPyTorch.api.tabular_classification.TabularClassificationTask + :members: + :inherited-members: search, refit, predict, score diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 000000000..3bde1c842 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,383 @@ +# -*- coding: utf-8 -*- +# +# Auto-PyTorch documentation build configuration file, created by +# sphinx-quickstart on Thu May 21 13:40:42 2015. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. + +import os +import sys +import sphinx_bootstrap_theme +import autoPyTorch +# Add the parent directory of this file to the PYTHONPATH +import os + +current_directory = os.path.dirname(__file__) +parent_directory = os.path.join(current_directory, '..') +parent_directory = os.path.abspath(parent_directory) +sys.path.append(parent_directory) + +import autoPyTorch + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.autosummary', + 'sphinx.ext.doctest', 'sphinx.ext.coverage', + 'sphinx.ext.mathjax', 'sphinx.ext.viewcode', + 'sphinx_gallery.gen_gallery', 'sphinx.ext.autosectionlabel', + # sphinx.ext.autosexctionlabel raises duplicate label warnings + # because same section headers are used multiple times throughout + # the documentation. + 'numpydoc'] + + +from sphinx_gallery.sorting import ExplicitOrder, FileNameSortKey + +# Configure the extensions +numpydoc_show_class_members = False +autosummary_generate = True + +# prefix each section label with the name of the document it is in, in order to avoid +# ambiguity when there are multiple same section labels in different documents. +autosectionlabel_prefix_document = True + +# Sphinx-gallery configuration. + +# get current branch +binder_branch = 'refactor_development' +import autoPyTorch +if "dev" in autoPyTorch.__version__: + binder_branch = "refactor_development" + +sphinx_gallery_conf = { + # path to the examples + 'examples_dirs': '../examples', + # path where to save gallery generated examples + 'gallery_dirs': 'examples', + #TODO: fix back/forward references for the examples. + #'doc_module': ('autoPyTorch'), + #'reference_url': { + # 'autoPyTorch': None + #}, + 'backreferences_dir': None, + 'filename_pattern': 'example.*.py$', + 'ignore_pattern': r'custom_metrics\.py|__init__\.py', + 'binder': { + # Required keys + 'org': 'automl', + 'repo': 'Auto-PyTorch', + 'branch': binder_branch, + 'binderhub_url': 'https://mybinder.org', + 'dependencies': ['../.binder/apt.txt', '../.binder/requirements.txt'], + #'filepath_prefix': '' # A prefix to prepend to any filepaths in Binder links. + # Jupyter notebooks for Binder will be copied to this directory (relative to built documentation root). + 'notebooks_dir': 'notebooks/', + 'use_jupyter_lab': True, # Whether Binder links should start Jupyter Lab instead of the Jupyter Notebook interface. + }, +} + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +# source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'AutoPyTorch' +copyright = u'2014-2019, Machine Learning Professorship Freiburg' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = autoPyTorch.__version__ +# The full version, including alpha/beta/rc tags. +release = autoPyTorch.__version__ + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +# today = '' +# Else, today_fmt is used as the format for a strftime call. +# today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build', '_templates', '_static'] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +# default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +# add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +# add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +# show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +# modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +# keep_warnings = False + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'bootstrap' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +html_theme_options = { + # Navigation bar title. (Default: ``project`` value) + 'navbar_title': "Auto-PyTorch", + + # Tab name for entire site. (Default: "Site") + # 'navbar_site_name': "Site", + + # A list of tuples containting pages to link to. The value should + # be in the form [(name, page), ..] + 'navbar_links': [ + ('Start', 'index'), + ('Releases', 'releases'), + ('Installation', 'installation'), + ('Manual', 'manual'), + ('Examples', 'examples/index'), + ('API', 'api'), + ('Extending', 'extending'), + ], + + # Render the next and previous page links in navbar. (Default: true) + 'navbar_sidebarrel': False, + + # Render the current pages TOC in the navbar. (Default: true) + 'navbar_pagenav': False, + + # Tab name for the current pages TOC. (Default: "Page") + 'navbar_pagenav_name': "On this page", + + # Global TOC depth for "site" navbar tab. (Default: 1) + # Switching to -1 shows all levels. + 'globaltoc_depth': 1, + + # Include hidden TOCs in Site navbar? + # + # Note: If this is "false", you cannot have mixed ``:hidden:`` and + # non-hidden ``toctree`` directives in the same page, or else the build + # will break. + # + # Values: "true" (default) or "false" + 'globaltoc_includehidden': "false", + + # HTML navbar class (Default: "navbar") to attach to
element. + # For black navbar, do "navbar navbar-inverse" + 'navbar_class': "navbar", + + # Fix navigation bar to top of page? + # Values: "true" (default) or "false" + 'navbar_fixed_top': "true", + + # Location of link to source. + # Options are "nav" (default), "footer" or anything else to exclude. + 'source_link_position': "footer", + + # Bootswatch (http://bootswatch.com/) theme. + # + # Options are nothing with "" (default) or the name of a valid theme + # such as "amelia" or "cosmo". + 'bootswatch_theme': "cosmo", + + # Choose Bootstrap version. + # Values: "3" (default) or "2" (in quotes) + 'bootstrap_version': "3", +} + +# Add any paths that contain custom themes here, relative to this directory. +html_theme_path = sphinx_bootstrap_theme.get_html_theme_path() + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +# html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +# html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +# html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +# html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +# html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +# html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +# html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +html_sidebars = {'**': ['localtoc.html']} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +# html_additional_pages = {} + +# If false, no module index is generated. +# html_domain_indices = True + +# If false, no index is generated. +# html_use_index = True + +# If true, the index is split into individual pages for each letter. +# html_split_index = False + +# If true, links to the reST sources are added to the pages. +# html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +# html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +# html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +# html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +# html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'AutoPyTorchdoc' + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # 'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + # 'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + # 'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [('index', 'AutoPyTorch.tex', u'AutoPyTorch Documentation', + u'AutoML', + 'manual'), ] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +# latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +# latex_use_parts = False + +# If true, show page references after internal links. +# latex_show_pagerefs = False + +# If true, show URL addresses after external links. +# latex_show_urls = False + +# Documents to append as an appendix to all manuals. +# latex_appendices = [] + +# If false, no module index is generated. +# latex_domain_indices = True + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [('index', 'AutoPyTorch', u'AutoPyTorch Documentation', + [u'AutoML'], 1)] + +# If true, show URL addresses after external links. +# man_show_urls = False + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [('index', 'AutoPyTorch', u'AutoPytorch Documentation', + u'AutoML', + 'AutoPyTorch', 'One line description of project.', + 'Miscellaneous'), ] + +# Documents to append as an appendix to all manuals. +# texinfo_appendices = [] +# If false, no module index is generated. +# texinfo_domain_indices = True +# How to display URL addresses: 'footnote', 'no', or 'inline'. +# texinfo_show_urls = 'footnote' +# If true, do not generate a @detailmenu in the "Top" node's menu. +# texinfo_no_detailmenu = False + +# This value selects what content will be inserted into the main body of an +# autoclass directive. The possible values are: +# "class" +# Only the class’ docstring is inserted. This is the default. +# You can still document __init__ as a separate method using automethod or +# the members option to autoclass. +#"both" +# Both the class’ and the __init__ method’s docstring are concatenated and +# inserted. +# "init" +# Only the __init__ method’s docstring is inserted. +autoclass_content = 'both' diff --git a/docs/extending.rst b/docs/extending.rst new file mode 100644 index 000000000..753c494c3 --- /dev/null +++ b/docs/extending.rst @@ -0,0 +1,7 @@ +:orphan: + +.. _extending: + +====================== +Extending Auto-PyTorch +====================== diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 000000000..90eb02def --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,78 @@ +************ +Auto-PyTorch +************ + +.. role:: bash(code) + :language: bash + +.. role:: python(code) + :language: python + +*Auto-PyTorch* is an automated machine learning toolkit based on PyTorch: + + >>> import autoPyTorch + >>> cls = autoPyTorch.api.tabular_classification.TabularClassificationTask() + >>> cls.search(X_train, y_train) + >>> predictions = cls.predict(X_test) + +*Auto-PyTorch* frees a machine learning user from algorithm selection and +hyperparameter tuning. It leverages recent advantages in *Bayesian +optimization*, *meta-learning* and *ensemble construction*. +Learn more about *Auto-PyTorch* by reading our paper +`Auto-PyTorch Tabular: Multi-Fidelity MetaLearning for Efficient and Robust AutoDL `_ +. + +Example +******* + +Manual +****** + +* :ref:`installation` +* :ref:`manual` +* :ref:`api` +* :ref:`extending` + + +License +******* +*Auto-PyTorch* is licensed the same way as *scikit-learn*, +namely the 3-clause BSD license. + +Citing Auto-PyTorch +******************* + +If you use *Auto-PyTorch* in a scientific publication, we would appreciate a +reference to the following paper: + + + `Auto-PyTorch Tabular: Multi-Fidelity MetaLearning for Efficient and Robust AutoDL + `_, + + Bibtex entry:: + + @article{zimmer2020auto, + title={Auto-pytorch tabular: Multi-fidelity metalearning for efficient and robust autodl}, + author={Zimmer, Lucas and Lindauer, Marius and Hutter, Frank}, + journal={arXiv preprint arXiv:2006.13799}, + year={2020} + } + +Contributing +************ + +We appreciate all contribution to *Auto-PyTorch*, from bug reports and +documentation to new features. If you want to contribute to the code, you can +pick an issue from the `issue tracker `_ +which is marked with `Needs contributer`. + +.. note:: + + To avoid spending time on duplicate work or features that are unlikely to + get merged, it is highly advised that you contact the developers + by opening a `github issue `_ before starting to work. + +When developing new features, please create a new branch from the development +branch. When to submitting a pull request, make sure that all tests are +still passing. diff --git a/docs/installation.rst b/docs/installation.rst new file mode 100644 index 000000000..eac2d5e5f --- /dev/null +++ b/docs/installation.rst @@ -0,0 +1,32 @@ +:orphan: + +.. _installation: + +============ +Installation +============ + +System requirements +=================== + +Auto-PyTorch has the following system requirements: + +* Linux operating system (for example Ubuntu) `(get Linux here) `_, +* Python (>=3.6) `(get Python here) `_. +* C++ compiler (with C++11 supports) `(get GCC here) `_ and +* SWIG (version 3.0.* is required; >=4.0.0 is not supported) `(get SWIG here) `_. + +Installing Auto-Pytorch +======================= + +.. code:: bash + + conda create -n autopytorch python=3.8 + conda activate autopytorch + conda install gxx_linux-64 gcc_linux-64 swig + cat requirements.txt | xargs -n 1 -L 1 pip install + python setup.py install + +Docker Image +========================= + TODO diff --git a/docs/manual.rst b/docs/manual.rst new file mode 100644 index 000000000..1ddcbcdce --- /dev/null +++ b/docs/manual.rst @@ -0,0 +1,9 @@ +:orphan: + +.. _manual: + +====== +Manual +====== + +TODO diff --git a/docs/releases.rst b/docs/releases.rst new file mode 100644 index 000000000..991bcb6a0 --- /dev/null +++ b/docs/releases.rst @@ -0,0 +1,17 @@ +:orphan: + +.. _releases: + +.. + The following command allows to retrieve all commiters since a specified + commit. From http://stackoverflow.com/questions/6482436/list-of-authors-in-git-since-a-given-commit + git log 2e29eba.. --format="%aN <%aE>" --reverse | perl -e 'my %dedupe; while () { print unless $dedupe{$_}++}' + + +======== +Releases +======== + +Version 0.0.3 +============== +TODO diff --git a/examples/README.txt b/examples/README.txt new file mode 100644 index 000000000..0df54e962 --- /dev/null +++ b/examples/README.txt @@ -0,0 +1,7 @@ +.. _examples: + +======== +Examples +======== + +Practical examples for using *Auto-PyTorch*. diff --git a/examples/example_ensemble_classification.py b/examples/example_ensemble_classification.py deleted file mode 100644 index b3b5cd9a0..000000000 --- a/examples/example_ensemble_classification.py +++ /dev/null @@ -1,213 +0,0 @@ - -""" -====================== -Ensemble from random search ---------------------------- - -This is a temporal example to make sure that ensemble works. -It also sets how SMAC should create the output information, -so that the ensemble builder works. - -We will remove this file, once SMAC + ensemble builder work -====================== -""" -import copy -import tempfile -import time -import typing - -import dask -import dask.distributed - -import numpy as np - -import sklearn.datasets -import sklearn.model_selection -from sklearn.metrics import accuracy_score - -from autoPyTorch.constants import MULTICLASS, TABULAR_CLASSIFICATION -from autoPyTorch.datasets.tabular_dataset import TabularDataset -from autoPyTorch.ensemble.ensemble_builder import EnsembleBuilderManager -from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy -from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline -from autoPyTorch.utils.backend import Backend, create -from autoPyTorch.utils.pipeline import get_dataset_requirements - - -def get_data_to_train(backend: Backend) -> typing.Tuple[typing.Dict[str, typing.Any]]: - """ - This function returns a fit dictionary that within itself, contains all - the information to fit a pipeline - """ - - # Get the training data for tabular classification - # Move to Australian to showcase numerical vs categorical - X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True) - X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( - X, - y, - random_state=1, - test_size=0.2, - ) - - train_indices, val_indices = sklearn.model_selection.train_test_split( - list(range(X_train.shape[0])), - random_state=1, - test_size=0.25, - ) - - # Create a datamanager for this toy problem - datamanager = TabularDataset( - X=X_train, Y=y_train, - X_test=X_test, Y_test=y_test, - ) - backend.save_datamanager(datamanager) - - info = {'task_type': datamanager.task_type, - 'output_type': datamanager.output_type, - 'issparse': datamanager.issparse, - 'numerical_columns': datamanager.numerical_columns, - 'categorical_columns': datamanager.categorical_columns} - dataset_properties = datamanager.get_dataset_properties(get_dataset_requirements(info)) - - # Fit the pipeline - fit_dictionary = { - 'X_train': X_train, - 'y_train': y_train, - 'train_indices': train_indices, - 'val_indices': val_indices, - 'X_test': X_test, - 'y_test': y_test, - 'dataset_properties': dataset_properties, - # Training configuration - 'job_id': 'example_ensemble_1', - 'working_dir': './tmp/example_ensemble_1', # Hopefully generated by backend - 'device': 'cpu', - 'runtime': 100, - 'torch_num_threads': 1, - 'early_stopping': 20, - 'use_tensorboard_logger': True, - 'use_pynisher': False, - 'memory_limit': 4096, - 'metrics_during_training': True, - 'seed': 0, - 'budget_type': 'epochs', - 'epochs': 10.0, - 'split_id': 0, - 'backend': backend, - } - - return fit_dictionary - - -def random_search_and_save(fit_dictionary: typing.Dict[str, typing.Any], backend: Backend, - num_models: int) -> None: - """ - A function to generate randomly fitted pipelines. - It inefficiently pass the data in the fit dictionary, as there is no datamanager yet. - - It uses the backend to save the models and predictions for the ensemble selection - """ - - # Ensemble selection will evaluate performance on the OOF predictions. Store the OOF - # Ground truth - datamanager = backend.load_datamanager() - X_train, y_train = datamanager.train_tensors - X_test, y_test = (None, None) - if datamanager.test_tensors is not None: - X_test, y_test = datamanager.test_tensors - targets = np.take(y_train, fit_dictionary['val_indices'], axis=0) - backend.save_targets_ensemble(targets) - - for idx in range(num_models): - pipeline = TabularClassificationPipeline( - dataset_properties=fit_dictionary['dataset_properties']) - - # Sample a random configuration - pipeline_cs = pipeline.get_hyperparameter_search_space() - config = pipeline_cs.sample_configuration() - pipeline.set_hyperparameters(config) - - # Fit the sample configuration - pipeline.fit(fit_dictionary) - - # Predict using the fit model - ensemble_predictions = pipeline.predict( - X_train.iloc[fit_dictionary['val_indices']] - ) - test_predictions = pipeline.predict(X_test) - - backend.save_numrun_to_dir( - seed=fit_dictionary['seed'], - idx=idx, - budget=fit_dictionary['epochs'], - model=pipeline, - cv_model=None, - ensemble_predictions=ensemble_predictions, - valid_predictions=None, - test_predictions=test_predictions, - ) - - score = accuracy_score(y_test, np.argmax(test_predictions, axis=1)) - print(f"Fitted a pipeline {idx} with score = {score}") - - return - - -if __name__ == "__main__": - - # Build a repository with random fitted models - backend = create(temporary_directory='./tmp/autoPyTorch_ensemble_test_tmp', - output_directory='./tmp/autoPyTorch_ensemble_test_out', - delete_tmp_folder_after_terminate=False) - - # Create the directory structure - backend._make_internals_directory() - - # Get data to train - fit_dictionary = get_data_to_train(backend) - - # Create some random models for the ensemble - random_search_and_save(fit_dictionary, backend, num_models=1) - - # Build a ensemble from the above components - # Use dak client here to make sure this is proper working, - # as with smac we will have to use a client - dask.config.set({'distributed.worker.daemon': False}) - dask_client = dask.distributed.Client( - dask.distributed.LocalCluster( - n_workers=2, - processes=True, - threads_per_worker=1, - # We use the temporal directory to save the - # dask workers, because deleting workers - # more time than deleting backend directories - # This prevent an error saying that the worker - # file was deleted, so the client could not close - # the worker properly - local_directory=tempfile.gettempdir(), - ) - ) - manager = EnsembleBuilderManager( - start_time=time.time(), - time_left_for_ensembles=100, - backend=copy.deepcopy(backend), - dataset_name=fit_dictionary['job_id'], - output_type=MULTICLASS, - task_type=TABULAR_CLASSIFICATION, - metrics=[accuracy], - opt_metric='accuracy', - ensemble_size=50, - ensemble_nbest=50, - max_models_on_disc=50, - seed=fit_dictionary['seed'], - max_iterations=1, - read_at_most=np.inf, - ensemble_memory_limit=fit_dictionary['memory_limit'], - random_state=fit_dictionary['seed'], - precision=32, - ) - manager.build_ensemble(dask_client) - future = manager.futures.pop() - dask.distributed.wait([future]) # wait for the ensemble process to finish - print(f"Ensemble build it: {future.result()}") diff --git a/examples/example_smac_intensify.py b/examples/example_smac_intensify.py deleted file mode 100644 index b92c90968..000000000 --- a/examples/example_smac_intensify.py +++ /dev/null @@ -1,151 +0,0 @@ -import multiprocessing -import tempfile -import time -import typing - -import dask -import dask.distributed - -import sklearn.datasets -import sklearn.model_selection - -from autoPyTorch.datasets.resampling_strategy import CrossValTypes -from autoPyTorch.datasets.tabular_dataset import TabularDataset -from autoPyTorch.optimizer.smbo import AutoMLSMBO -from autoPyTorch.pipeline.components.training.metrics.utils import get_metrics -from autoPyTorch.utils.backend import create -from autoPyTorch.utils.logging_ import setup_logger, start_log_server -from autoPyTorch.utils.pipeline import get_configuration_space -from autoPyTorch.utils.stopwatch import StopWatch - - -def _start_logger(name, logging_config, backend): - logger_name = 'AutoML :%s' % (name) - setup_logger( - filename='%s.log' % str(logger_name), - logging_config=logging_config, - output_dir=backend.temporary_directory, - ) - - # As Auto-sklearn works with distributed process, - # we implement a logger server that can receive tcp - # pickled messages. They are unpickled and processed locally - # under the above logging configuration setting - # We need to specify the logger_name so that received records - # are treated under the logger_name ROOT logger setting - context = multiprocessing.get_context('spawn') - stop_logging_server = context.Event() - port = context.Value('l') # be safe by using a long - port.value = -1 - - logging_server = context.Process( - target=start_log_server, - kwargs=dict( - host='localhost', - logname=logger_name, - event=stop_logging_server, - port=port, - filename='%s.log' % str(logger_name), - logging_config=logging_config, - output_dir=backend.temporary_directory, - ), - ) - - logging_server.start() - - while True: - with port.get_lock(): - if port.value == -1: - time.sleep(0.01) - else: - break - - return int(port.value), stop_logging_server - - -def get_data_to_train() -> typing.Tuple[typing.Any, typing.Any, typing.Any, typing.Any]: - """ - This function returns a fit dictionary that within itself, contains all - the information to fit a pipeline - """ - - # Get the training data for tabular classification - # Move to Australian to showcase numerical vs categorical - X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True) - X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( - X, - y, - random_state=1, - test_size=0.2, - ) - - return X_train, X_test, y_train, y_test - - -if __name__ == "__main__": - # Get data to train - X_train, X_test, y_train, y_test = get_data_to_train() - - # Build a repository with random fitted models - backend = create(temporary_directory='./tmp/autoPyTorch_smac_test_tmp', - output_directory='./tmp/autoPyTorch_smac_test_out', - delete_tmp_folder_after_terminate=False) - # Create the directory structure - backend._make_internals_directory() - - # Create a datamanager for this toy problem - datamanager = TabularDataset( - X=X_train, Y=y_train, - X_test=X_test, Y_test=y_test, - resampling_strategy=CrossValTypes.k_fold_cross_validation) - backend.save_datamanager(datamanager) - - # Build a ensemble from the above components - # Use dak client here to make sure this is proper working, - # as with smac we will have to use a client - dask.config.set({'distributed.worker.daemon': False}) - dask_client = dask.distributed.Client( - dask.distributed.LocalCluster( - n_workers=2, - processes=True, - threads_per_worker=1, - # We use the temporal directory to save the - # dask workers, because deleting workers - # more time than deleting backend directories - # This prevent an error saying that the worker - # file was deleted, so the client could not close - # the worker properly - local_directory=tempfile.gettempdir(), - ) - ) - port, stop_logging_server = _start_logger("trial_australian", logging_config=None, backend=backend) - - info = {'task_type': datamanager.task_type, - 'output_type': datamanager.output_type, - 'categorical_columns': datamanager.categorical_columns, - 'numerical_columns': datamanager.numerical_columns} - config_space = get_configuration_space(info) - # Make the optimizer - smbo = AutoMLSMBO( - config_space=config_space, - dataset_name='Australian', - backend=backend, - total_walltime_limit=120, - dask_client=dask_client, - func_eval_time_limit=60, - memory_limit=4096, - metric=get_metrics(dataset_properties=dict({'task_type': datamanager.task_type, - 'output_type': datamanager.output_type}))[0], - watcher=StopWatch(), - n_jobs=2, - ensemble_callback=None, - logger_port=port - ) - - # Then run the optimization - run_history, trajectory, budget = smbo.run_smbo() - - for k, v in run_history.data.items(): - print(f"{k}->{v}") - if not stop_logging_server.is_set(): - stop_logging_server.set() diff --git a/examples/example_tabular_classification.py b/examples/example_tabular_classification.py index d87b29e8b..cd6063cd4 100644 --- a/examples/example_tabular_classification.py +++ b/examples/example_tabular_classification.py @@ -2,6 +2,9 @@ ====================== Tabular Classification ====================== + +The following example shows how to fit a sample classification model +with AutoPyTorch """ import typing import warnings @@ -59,17 +62,31 @@ def get_search_space_updates(): if __name__ == '__main__': - # Get data to train + ############################################################################ + # Data Loading + # ============ X_train, X_test, y_train, y_test = get_data_to_train() - - # Create a datamanager for this toy problem datamanager = TabularDataset( X=X_train, Y=y_train, X_test=X_test, Y_test=y_test) - api = TabularClassificationTask(delete_tmp_folder_after_terminate=False, - search_space_updates=get_search_space_updates()) - api.search(dataset=datamanager, optimize_metric='accuracy', total_walltime_limit=500, func_eval_time_limit=150) + ############################################################################ + # Build and fit a classifier + # ========================== + api = TabularClassificationTask( + delete_tmp_folder_after_terminate=False, + search_space_updates=get_search_space_updates() + ) + api.search( + dataset=datamanager, + optimize_metric='accuracy', + total_walltime_limit=500, + func_eval_time_limit=150 + ) + + ############################################################################ + # Print the final ensemble performance + # ==================================== print(api.run_history, api.trajectory) y_pred = api.predict(X_test) score = api.score(y_pred, y_test) diff --git a/examples/traditional_pipeline.py b/examples/traditional_pipeline.py deleted file mode 100644 index 39b5db17e..000000000 --- a/examples/traditional_pipeline.py +++ /dev/null @@ -1,86 +0,0 @@ -""" -====================== -Tabular Classification -====================== -""" -import typing - -import sklearn.datasets -import sklearn.model_selection - -from autoPyTorch.datasets.tabular_dataset import TabularDataset -from autoPyTorch.pipeline.traditional_tabular_classification import TraditionalTabularClassificationPipeline -from autoPyTorch.utils.backend import create -from autoPyTorch.utils.pipeline import get_dataset_requirements - - -# Get the training data for tabular classification -def get_data_to_train() -> typing.Tuple[typing.Any, typing.Any, typing.Any, typing.Any]: - """ - This function returns a fit dictionary that within itself, contains all - the information to fit a pipeline - """ - - # Get the training data for tabular classification - # Move to Australian to showcase numerical vs categorical - X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True) - X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( - X, - y, - random_state=1, - ) - - return X_train, X_test, y_train, y_test - - -if __name__ == '__main__': - # Get data to train - X_train, X_test, y_train, y_test = get_data_to_train() - - # Create a datamanager for this toy problem - datamanager = TabularDataset( - X=X_train, Y=y_train, - X_test=X_test, Y_test=y_test) - - backend = create(temporary_directory='./tmp/example_trad_clf_1_tmp', - output_directory='./tmp/example_trad_clf_1_out', - delete_tmp_folder_after_terminate=False) - backend.save_datamanager(datamanager) - info = {'task_type': datamanager.task_type, - 'output_type': datamanager.output_type, - 'issparse': datamanager.issparse, - 'numerical_columns': datamanager.numerical_columns, - 'categorical_columns': datamanager.categorical_columns} - dataset_requirements = get_dataset_requirements(info=info) - dataset_properties = datamanager.get_dataset_properties(dataset_requirements) - pipeline = TraditionalTabularClassificationPipeline(dataset_properties=dataset_properties) - - split_id = 0 - X = dict({'dataset_properties': dataset_properties, - 'backend': backend, - 'X_train': datamanager.train_tensors[0], - 'y_train': datamanager.train_tensors[1], - 'X_test': datamanager.test_tensors[0] if datamanager.test_tensors is not None else None, - 'y_test': datamanager.test_tensors[1] if datamanager.test_tensors is not None else None, - 'train_indices': datamanager.splits[split_id][0], - 'val_indices': datamanager.splits[split_id][1], - 'split_id': split_id, - 'job_id': 0 - }) - - # Configuration space - pipeline_cs = pipeline.get_hyperparameter_search_space() - print("Pipeline CS:\n", '_' * 40, f"\n{pipeline_cs}") - config = pipeline_cs.sample_configuration() - print("Pipeline Random Config:\n", '_' * 40, f"\n{config}") - pipeline.set_hyperparameters(config) - - # Fit the pipeline - print("Fitting the pipeline...") - pipeline.fit(X) - - # Showcase some components of the pipeline - print(pipeline) - - predictions = pipeline.predict(X_test.to_numpy()) - print(predictions) From c200550bece90d5e71caf3032051bcd5006ef4f3 Mon Sep 17 00:00:00 2001 From: chico Date: Tue, 2 Feb 2021 16:36:40 +0100 Subject: [PATCH 2/3] Default to ubuntu-18.04 --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 9ae8a98ed..a7367c2bd 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -3,7 +3,7 @@ on: [pull_request, push] jobs: build-and-deploy: - runs-on: ubuntu-latest + runs-on: ubuntu-18.04 steps: - uses: actions/checkout@v2 - name: Setup Python From 086ffc72faa6d01c4b7ca9377b8839184e4ff395 Mon Sep 17 00:00:00 2001 From: chico Date: Tue, 2 Feb 2021 19:19:20 +0100 Subject: [PATCH 3/3] Comment enhancements --- .binder/postBuild | 4 ++-- docs/_templates/layout.html | 2 +- docs/index.rst | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.binder/postBuild b/.binder/postBuild index c7f300460..629fbb72b 100644 --- a/.binder/postBuild +++ b/.binder/postBuild @@ -8,8 +8,8 @@ python -m pip install .[docs,examples] # under BSD3 license, copyright the scikit-learn contributors # This script is called in a binder context. When this script is called, we are -# inside a git checkout of the automl/auto-sklearn repo. This script -# generates notebooks from the auto-sklearn python examples. +# inside a git checkout of the automl/Auto-PyTorch repo. This script +# generates notebooks from the Auto-PyTorch python examples. if [[ ! -f /.dockerenv ]]; then echo "This script was written for repo2docker and is supposed to run inside a docker container." diff --git a/docs/_templates/layout.html b/docs/_templates/layout.html index d0cc5e034..5ecc76d64 100644 --- a/docs/_templates/layout.html +++ b/docs/_templates/layout.html @@ -6,7 +6,7 @@ {# Add github banner (from: https://github.com/blog/273-github-ribbons). #} {% block header %} {{ super() }} -