remove archive blocks

Jacob Beck · Jacob Beck · commit f26948dde20e · 2019-06-03T15:57:32.000-06:00
diff --git a/core/dbt/config/runtime.py b/core/dbt/config/runtime.py
@@ -64,7 +64,7 @@ def __init__(self, project_name, version, project_root, source_paths,
         self.validate()
 
     @classmethod
-    def from_parts(cls, project, profile, args):
+    def from_parts(cls, project, profile, args, allow_archive_blocks=False):
         """Instantiate a RuntimeConfig from its components.
 
         :param profile Profile: A parsed dbt Profile.
@@ -77,6 +77,12 @@ def from_parts(cls, project, profile, args):
             .DEFAULTS['quote_policy']
         )
         quoting.update(project.quoting)
+        if project.archive and not allow_archive_blocks:
+            # if the user has an `archive` section, raise an error
+            raise DbtProjectError(
+                'Invalid project configuration: "archive" is not allowed'
+            )
+
         return cls(
             project_name=project.project_name,
             version=project.version,
@@ -163,12 +169,14 @@ def validate(self):
             self.validate_version()
 
     @classmethod
-    def from_args(cls, args):
+    def from_args(cls, args, allow_archive_blocks=False):
         """Given arguments, read in dbt_project.yml from the current directory,
         read in packages.yml if it exists, and use them to find the profile to
         load.
 
         :param args argparse.Namespace: The arguments as parsed from the cli.
+        :param allow_archive_blocks bool: If True, ignore archive blocks in
+            configs. This flag exists to enable archive migration.
         :raises DbtProjectError: If the project is invalid or missing.
         :raises DbtProfileError: If the profile is invalid or missing.
         :raises ValidationException: If the cli variables are invalid.
@@ -185,5 +193,6 @@ def from_args(cls, args):
         return cls.from_parts(
             project=project,
             profile=profile,
-            args=args
+            args=args,
+            allow_archive_blocks=allow_archive_blocks
         )
diff --git a/core/dbt/contracts/project.py b/core/dbt/contracts/project.py
@@ -2,7 +2,7 @@
 from dbt.logger import GLOBAL_LOGGER as logger  # noqa
 from dbt.utils import deep_merge
 
-# TODO: add description fields.
+
 ARCHIVE_TABLE_CONFIG_CONTRACT = {
     'type': 'object',
     'additionalProperties': False,
diff --git a/core/dbt/loader.py b/core/dbt/loader.py
@@ -10,8 +10,8 @@
 from dbt.utils import timestring
 
 from dbt.parser import MacroParser, ModelParser, SeedParser, AnalysisParser, \
-    DocumentationParser, DataTestParser, HookParser, ArchiveParser, \
-    SchemaParser, ParserUtils, ArchiveBlockParser
+    DocumentationParser, DataTestParser, HookParser, SchemaParser, \
+    ParserUtils, ArchiveBlockParser
 
 from dbt.contracts.project import ProjectList
 
@@ -63,18 +63,6 @@ def _load_macros(self, internal_manifest=None):
                 resource_type=NodeType.Macro,
             ))
 
-    def _load_archives_from_project(self):
-        archive_parser = ArchiveParser(self.root_project, self.all_projects,
-                                       self.macro_manifest)
-        for key, node in archive_parser.load_and_parse().items():
-            # we have another archive parser, so we have to check for
-            # collisions
-            existing = self.nodes.get(key)
-            if existing:
-                dbt.exceptions.raise_duplicate_resource_name(existing, node)
-            else:
-                self.nodes[key] = node
-
     def _load_seeds(self):
         parser = SeedParser(self.root_project, self.all_projects,
                             self.macro_manifest)
@@ -98,7 +86,6 @@ def _load_nodes(self):
                                  self.macro_manifest)
         self.nodes.update(hook_parser.load_and_parse())
 
-        self._load_archives_from_project()
         self._load_seeds()
 
     def _load_docs(self):
diff --git a/core/dbt/parser/__init__.py b/core/dbt/parser/__init__.py
@@ -1,6 +1,5 @@
 
 from .analysis import AnalysisParser
-from .archives import ArchiveParser
 from .archives import ArchiveBlockParser
 from .data_test import DataTestParser
 from .docs import DocumentationParser
@@ -14,7 +13,6 @@
 
 __all__ = [
     'AnalysisParser',
-    'ArchiveParser',
     'ArchiveBlockParser',
     'DataTestParser',
     'DocumentationParser',
diff --git a/core/dbt/parser/archives.py b/core/dbt/parser/archives.py
@@ -1,15 +1,11 @@
-from dbt.contracts.graph.unparsed import UnparsedNode
+
 from dbt.contracts.graph.parsed import ParsedArchiveNode
 from dbt.node_types import NodeType
-from dbt.parser.base import MacrosKnownParser
 from dbt.parser.base_sql import BaseSqlParser, SQLParseResult
-from dbt.adapters.factory import get_adapter
 import dbt.clients.jinja
 import dbt.exceptions
 import dbt.utils
 
-import os
-
 
 def set_archive_attributes(node):
     config_keys = {
@@ -24,93 +20,6 @@ def set_archive_attributes(node):
     return node
 
 
-class ArchiveParser(MacrosKnownParser):
-    @classmethod
-    def parse_archives_from_project(cls, config):
-        archives = []
-        archive_configs = config.archive
-
-        for archive_config in archive_configs:
-            tables = archive_config.get('tables')
-
-            if tables is None:
-                continue
-
-            for table in tables:
-                cfg = table.copy()
-                source_database = archive_config.get(
-                    'source_database',
-                    config.credentials.database
-                )
-                cfg['target_database'] = archive_config.get(
-                    'target_database',
-                    config.credentials.database
-                )
-
-                source_schema = archive_config['source_schema']
-                cfg['target_schema'] = archive_config.get('target_schema')
-                # project-defined archives always use the 'timestamp' strategy.
-                cfg['strategy'] = 'timestamp'
-
-                fake_path = [cfg['target_database'], cfg['target_schema'],
-                             cfg['target_table']]
-
-                relation = get_adapter(config).Relation.create(
-                    database=source_database,
-                    schema=source_schema,
-                    identifier=table['source_table'],
-                    type='table'
-                )
-
-                raw_sql = '{{ config(materialized="archive") }}' + \
-                          'select * from {!s}'.format(relation)
-
-                archives.append({
-                    'name': table.get('target_table'),
-                    'root_path': config.project_root,
-                    'resource_type': NodeType.Archive,
-                    'path': os.path.join('archive', *fake_path),
-                    'original_file_path': 'dbt_project.yml',
-                    'package_name': config.project_name,
-                    'config': cfg,
-                    'raw_sql': raw_sql
-                })
-
-        return archives
-
-    def load_and_parse(self):
-        """Load and parse archives in a list of projects. Returns a dict
-           that maps unique ids onto ParsedNodes"""
-
-        archives = []
-        to_return = {}
-
-        for name, project in self.all_projects.items():
-            archives = archives + self.parse_archives_from_project(project)
-
-        # We're going to have a similar issue with parsed nodes, if we want to
-        # make parse_node return those.
-        for a in archives:
-            # archives have a config, but that would make for an invalid
-            # UnparsedNode, so remove it and pass it along to parse_node as an
-            # argument.
-            archive_config = a.pop('config')
-            archive = UnparsedNode(**a)
-            node_path = self.get_path(archive.resource_type,
-                                      archive.package_name,
-                                      archive.name)
-
-            parsed_node = self.parse_node(
-                archive,
-                node_path,
-                self.all_projects.get(archive.package_name),
-                archive_config=archive_config)
-
-            to_return[node_path] = set_archive_attributes(parsed_node)
-
-        return to_return
-
-
 class ArchiveBlockParser(BaseSqlParser):
     def parse_archives_from_file(self, file_node, tags=None):
         # the file node has a 'raw_sql' field that contains the jinja data with
diff --git a/test/integration/004_simple_archive_test/test_simple_archive.py b/test/integration/004_simple_archive_test/test_simple_archive.py
diff --git a/test/unit/test_config.py b/test/unit/test_config.py