diff --git a/requirements-dev.txt b/requirements-dev.txt
index cf27e63..fcc9b3c 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,3 +1,6 @@
 pytest>=7.4.0
 pytest-cov>=4.1.0
 pytest-mock>=3.11.0
+selenium>=4.10.0
+webdriver-manager>=4.0.0
+numpy>=1.24.0
diff --git a/requirements.txt b/requirements.txt
index 4cca127..1688ed0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 requests>=2.31.0
 beautifulsoup4>=4.12.0
 lxml>=4.9.0
+Pillow>=10.0.0
diff --git a/tests/test_cli.py b/tests/test_cli.py
index c0151a9..c7a1dc8 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -1,66 +1,686 @@
 """Tests for CLI."""
 
+import json
 import pytest
 import sys
-from unittest.mock import patch, Mock
-from wayback_diff.cli import format_output
+from unittest.mock import patch, Mock, MagicMock
+from pathlib import Path
+import tempfile
+import os
 
+from wayback_diff.cli import format_output, main
+
+
+class TestFormatOutput:
+    """Test cases for format_output function."""
+
+    def _make_summary(self, total=5, added=2, removed=1, modified=2,
+                      high=1, medium=2, low=2):
+        return {
+            'total_changes': total,
+            'added': added,
+            'removed': removed,
+            'modified': modified,
+            'high_significance': high,
+            'medium_significance': medium,
+            'low_significance': low,
+        }
+
+    def _make_change(self, change_type='modified', old_text='Old', new_text='New',
+                     significance='high'):
+        return {
+            'type': change_type,
+            'old_text': old_text,
+            'new_text': new_text,
+            'significance': significance,
+        }
 
-class TestCLI:
-    """Test cases for CLI."""
-    
     def test_format_output_text(self):
         """Test text output formatting."""
-        summary = {
-            'total_changes': 5,
-            'added': 2,
-            'removed': 1,
-            'modified': 2,
-            'high_significance': 1,
-            'medium_significance': 2,
-            'low_significance': 2,
-        }
-        
-        changes = [
-            {
-                'type': 'modified',
-                'old_text': 'Old',
-                'new_text': 'New',
-                'significance': 'high'
-            }
-        ]
-        
+        summary = self._make_summary()
+        changes = [self._make_change()]
+
         output = format_output(changes, summary, 'text')
-        
+
         assert 'WAYBACK DIFF SUMMARY' in output
         assert 'Total changes: 5' in output
+        assert 'Added: 2' in output
+        assert 'Removed: 1' in output
+        assert 'Modified: 2' in output
+        assert 'High: 1' in output
+        assert 'Medium: 2' in output
+        assert 'Low: 2' in output
         assert 'HIGH SIGNIFICANCE CHANGES' in output
-    
+
     def test_format_output_json(self):
         """Test JSON output formatting."""
-        summary = {
-            'total_changes': 1,
-            'added': 0,
-            'removed': 0,
-            'modified': 1,
-            'high_significance': 1,
-            'medium_significance': 0,
-            'low_significance': 0,
-        }
-        
-        changes = [
-            {
-                'type': 'modified',
-                'old_text': 'Old',
-                'new_text': 'New',
-                'significance': 'high'
-            }
-        ]
-        
+        summary = self._make_summary(total=1, added=0, removed=0, modified=1,
+                                     high=1, medium=0, low=0)
+        changes = [self._make_change()]
+
         output = format_output(changes, summary, 'json')
-        
-        import json
+
         data = json.loads(output)
         assert 'summary' in data
         assert 'changes' in data
         assert data['summary']['total_changes'] == 1
+
+    def test_format_output_unified_returns_empty(self):
+        """Test unified format returns empty string."""
+        summary = self._make_summary()
+        changes = [self._make_change()]
+
+        output = format_output(changes, summary, 'unified')
+        assert output == ""
+
+    def test_format_output_text_no_changes(self):
+        """Test text output with no changes."""
+        summary = self._make_summary(total=0, added=0, removed=0, modified=0,
+                                     high=0, medium=0, low=0)
+        output = format_output([], summary, 'text')
+        assert 'Total changes: 0' in output
+        assert 'HIGH SIGNIFICANCE CHANGES' not in output
+        assert 'MEDIUM SIGNIFICANCE CHANGES' not in output
+
+    def test_format_output_text_medium_changes(self):
+        """Test text output with medium significance changes."""
+        summary = self._make_summary(total=3, added=0, removed=0, modified=3,
+                                     high=0, medium=3, low=0)
+        changes = [self._make_change(significance='medium') for _ in range(3)]
+        output = format_output(changes, summary, 'text')
+        assert 'MEDIUM SIGNIFICANCE CHANGES' in output
+
+    def test_format_output_text_more_than_10_medium(self):
+        """Test text output with more than 10 medium changes (truncation)."""
+        summary = self._make_summary(total=15, added=0, removed=0, modified=15,
+                                     high=0, medium=15, low=0)
+        changes = [self._make_change(significance='medium') for _ in range(15)]
+        output = format_output(changes, summary, 'text')
+        assert 'MEDIUM SIGNIFICANCE CHANGES' in output
+        assert '... and 5 more medium significance changes' in output
+
+    def test_format_output_text_more_than_20_high(self):
+        """Test text output with more than 20 high changes (truncation)."""
+        summary = self._make_summary(total=25, added=0, removed=0, modified=25,
+                                     high=25, medium=0, low=0)
+        changes = [self._make_change(significance='high') for _ in range(25)]
+        output = format_output(changes, summary, 'text')
+        assert 'HIGH SIGNIFICANCE CHANGES' in output
+        assert '... and 5 more high significance changes' in output
+
+    def test_format_output_text_low_changes_small_count(self):
+        """Test text output with a small number of low significance changes."""
+        summary = self._make_summary(total=3, added=0, removed=0, modified=3,
+                                     high=0, medium=0, low=3)
+        changes = [self._make_change(significance='low') for _ in range(3)]
+        output = format_output(changes, summary, 'text')
+        assert 'LOW SIGNIFICANCE CHANGES' in output
+        assert '3 low significance changes' in output
+
+    def test_format_output_text_low_changes_large_count(self):
+        """Test text output with more than 50 low significance changes (hidden)."""
+        summary = self._make_summary(total=55, added=0, removed=0, modified=55,
+                                     high=0, medium=0, low=55)
+        changes = [self._make_change(significance='low') for _ in range(55)]
+        output = format_output(changes, summary, 'text')
+        # Low changes > 50 are not shown
+        assert 'LOW SIGNIFICANCE CHANGES' not in output
+
+    def test_format_output_text_added_change(self):
+        """Test text output with added change (no old_text)."""
+        summary = self._make_summary(total=1, added=1, removed=0, modified=0,
+                                     high=1, medium=0, low=0)
+        changes = [self._make_change(change_type='added', old_text='', new_text='New Content')]
+        output = format_output(changes, summary, 'text')
+        assert 'NEW:' in output
+
+    def test_format_output_text_removed_change(self):
+        """Test text output with removed change (no new_text)."""
+        summary = self._make_summary(total=1, added=0, removed=1, modified=0,
+                                     high=1, medium=0, low=0)
+        changes = [self._make_change(change_type='removed', old_text='Old Content', new_text='')]
+        output = format_output(changes, summary, 'text')
+        assert 'OLD:' in output
+
+    def test_format_output_json_unicode(self):
+        """Test JSON output with unicode characters."""
+        summary = self._make_summary(total=1, added=0, removed=0, modified=1,
+                                     high=1, medium=0, low=0)
+        changes = [self._make_change(old_text='Texto viejo', new_text='Texto nuevo')]
+        output = format_output(changes, summary, 'json')
+        data = json.loads(output)
+        assert 'viejo' in data['changes'][0]['old_text']
+
+
+class TestMain:
+    """Test cases for CLI main function."""
+
+    @patch('wayback_diff.cli.WebFetcher')
+    def test_main_basic_comparison(self, mock_fetcher_cls):
+        """Test basic URL comparison flow."""
+        mock_fetcher = Mock()
+        mock_fetcher_cls.return_value = mock_fetcher
+        mock_fetcher.fetch.side_effect = [
+            (b'<html><body><h1>Old</h1></body></html>', 'text/html', {'status_code': 200}),
+            (b'<html><body><h1>New</h1></body></html>', 'text/html', {'status_code': 200}),
+        ]
+        mock_fetcher.is_html.return_value = True
+
+        with patch('sys.argv', ['wayback-diff', 'https://example.com/old', 'https://example.com/new']):
+            with pytest.raises(SystemExit) as exc_info:
+                main()
+            # Should exit with 1 or 2 (changes detected)
+            assert exc_info.value.code in (1, 2)
+
+    @patch('wayback_diff.cli.WebFetcher')
+    def test_main_identical_pages(self, mock_fetcher_cls):
+        """Test comparison of identical pages exits with 0."""
+        mock_fetcher = Mock()
+        mock_fetcher_cls.return_value = mock_fetcher
+        content = b'<html><body><p>Same content</p></body></html>'
+        mock_fetcher.fetch.side_effect = [
+            (content, 'text/html', {'status_code': 200}),
+            (content, 'text/html', {'status_code': 200}),
+        ]
+        mock_fetcher.is_html.return_value = True
+
+        with patch('sys.argv', ['wayback-diff', 'https://example.com/a', 'https://example.com/b']):
+            with pytest.raises(SystemExit) as exc_info:
+                main()
+            assert exc_info.value.code == 0
+
+    @patch('wayback_diff.cli.WebFetcher')
+    def test_main_fetch_failure_url1(self, mock_fetcher_cls):
+        """Test exit code 1 when URL1 fetch fails."""
+        mock_fetcher = Mock()
+        mock_fetcher_cls.return_value = mock_fetcher
+        mock_fetcher.fetch.return_value = (None, None, {'error': 'Connection failed'})
+
+        with patch('sys.argv', ['wayback-diff', 'https://example.com/bad', 'https://example.com/good']):
+            with pytest.raises(SystemExit) as exc_info:
+                main()
+            assert exc_info.value.code == 1
+
+    @patch('wayback_diff.cli.WebFetcher')
+    def test_main_fetch_failure_url2(self, mock_fetcher_cls):
+        """Test exit code 1 when URL2 fetch fails."""
+        mock_fetcher = Mock()
+        mock_fetcher_cls.return_value = mock_fetcher
+        mock_fetcher.fetch.side_effect = [
+            (b'<html>OK</html>', 'text/html', {'status_code': 200}),
+            (None, None, {'error': 'Timeout'}),
+        ]
+        mock_fetcher.is_html.return_value = True
+
+        with patch('sys.argv', ['wayback-diff', 'https://example.com/a', 'https://example.com/bad']):
+            with pytest.raises(SystemExit) as exc_info:
+                main()
+            assert exc_info.value.code == 1
+
+    @patch('wayback_diff.cli.WebFetcher')
+    def test_main_fetch_failure_url1_no_error_key(self, mock_fetcher_cls):
+        """Test exit code 1 when URL1 fetch fails with no error key in metadata."""
+        mock_fetcher = Mock()
+        mock_fetcher_cls.return_value = mock_fetcher
+        mock_fetcher.fetch.return_value = (None, None, {})
+
+        with patch('sys.argv', ['wayback-diff', 'https://example.com/bad', 'https://example.com/good']):
+            with pytest.raises(SystemExit) as exc_info:
+                main()
+            assert exc_info.value.code == 1
+
+    @patch('wayback_diff.cli.WebFetcher')
+    def test_main_json_format(self, mock_fetcher_cls):
+        """Test --format json flag."""
+        mock_fetcher = Mock()
+        mock_fetcher_cls.return_value = mock_fetcher
+        content = b'<html><body><p>Same</p></body></html>'
+        mock_fetcher.fetch.side_effect = [
+            (content, 'text/html', {'status_code': 200}),
+            (content, 'text/html', {'status_code': 200}),
+        ]
+        mock_fetcher.is_html.return_value = True
+
+        with patch('sys.argv', ['wayback-diff', 'https://a.com', 'https://b.com', '--format', 'json']):
+            with pytest.raises(SystemExit) as exc_info:
+                main()
+            assert exc_info.value.code == 0
+
+    @patch('wayback_diff.cli.WebFetcher')
+    def test_main_unified_format(self, mock_fetcher_cls):
+        """Test --format unified flag."""
+        mock_fetcher = Mock()
+        mock_fetcher_cls.return_value = mock_fetcher
+        mock_fetcher.fetch.side_effect = [
+            (b'<html>Old</html>', 'text/html', {'status_code': 200}),
+            (b'<html>New</html>', 'text/html', {'status_code': 200}),
+        ]
+        mock_fetcher.is_html.return_value = True
+
+        with patch('sys.argv', ['wayback-diff', 'https://a.com', 'https://b.com', '--format', 'unified']):
+            with pytest.raises(SystemExit) as exc_info:
+                main()
+            # unified diff with changes exits non-zero
+            assert exc_info.value.code in (1, 2)
+
+    @patch('wayback_diff.cli.WebFetcher')
+    def test_main_output_to_file(self, mock_fetcher_cls):
+        """Test --output flag writes to file."""
+        mock_fetcher = Mock()
+        mock_fetcher_cls.return_value = mock_fetcher
+        content = b'<html><body>Same</body></html>'
+        mock_fetcher.fetch.side_effect = [
+            (content, 'text/html', {'status_code': 200}),
+            (content, 'text/html', {'status_code': 200}),
+        ]
+        mock_fetcher.is_html.return_value = True
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            outfile = os.path.join(tmpdir, 'output.txt')
+            with patch('sys.argv', ['wayback-diff', 'https://a.com', 'https://b.com', '-o', outfile]):
+                with pytest.raises(SystemExit):
+                    main()
+            assert os.path.exists(outfile)
+
+    @patch('wayback_diff.cli.WebFetcher')
+    def test_main_verbose(self, mock_fetcher_cls):
+        """Test --verbose flag."""
+        mock_fetcher = Mock()
+        mock_fetcher_cls.return_value = mock_fetcher
+        content = b'<html>Same</html>'
+        mock_fetcher.fetch.side_effect = [
+            (content, 'text/html', {'status_code': 200}),
+            (content, 'text/html', {'status_code': 200}),
+        ]
+        mock_fetcher.is_html.return_value = True
+
+        with patch('sys.argv', ['wayback-diff', 'https://a.com', 'https://b.com', '--verbose']):
+            with pytest.raises(SystemExit):
+                main()
+
+    @patch('wayback_diff.cli.WebFetcher')
+    def test_main_non_html_warning(self, mock_fetcher_cls):
+        """Test warning when content is not HTML."""
+        mock_fetcher = Mock()
+        mock_fetcher_cls.return_value = mock_fetcher
+        content = b'{"key": "value"}'
+        mock_fetcher.fetch.side_effect = [
+            (content, 'application/json', {'status_code': 200}),
+            (content, 'application/json', {'status_code': 200}),
+        ]
+        mock_fetcher.is_html.return_value = False
+
+        with patch('sys.argv', ['wayback-diff', 'https://a.com/api', 'https://b.com/api']):
+            with pytest.raises(SystemExit):
+                main()
+
+    @patch('wayback_diff.cli.WaybackCleaner')
+    @patch('wayback_diff.cli.WebFetcher')
+    def test_main_wayback_url_cleaning(self, mock_fetcher_cls, mock_cleaner_cls):
+        """Test Wayback URL auto-cleaning."""
+        mock_fetcher = Mock()
+        mock_fetcher_cls.return_value = mock_fetcher
+        content = b'<html>Content</html>'
+        mock_fetcher.fetch.side_effect = [
+            (content, 'text/html', {'status_code': 200}),
+            (content, 'text/html', {'status_code': 200}),
+        ]
+        mock_fetcher.is_html.return_value = True
+        mock_cleaner_cls.is_wayback_url.side_effect = [True, False]
+        mock_cleaner_cls.clean_wayback_html.return_value = content
+
+        wb_url = 'https://web.archive.org/web/20230101/https://example.com/'
+        with patch('sys.argv', ['wayback-diff', wb_url, 'https://example.com/', '--verbose']):
+            with pytest.raises(SystemExit):
+                main()
+        mock_cleaner_cls.clean_wayback_html.assert_called_once()
+
+    @patch('wayback_diff.cli.WaybackCleaner')
+    @patch('wayback_diff.cli.WebFetcher')
+    def test_main_no_clean_wayback_flag(self, mock_fetcher_cls, mock_cleaner_cls):
+        """Test --no-clean-wayback flag skips cleaning."""
+        mock_fetcher = Mock()
+        mock_fetcher_cls.return_value = mock_fetcher
+        content = b'<html>Content</html>'
+        mock_fetcher.fetch.side_effect = [
+            (content, 'text/html', {'status_code': 200}),
+            (content, 'text/html', {'status_code': 200}),
+        ]
+        mock_fetcher.is_html.return_value = True
+
+        wb_url = 'https://web.archive.org/web/20230101/https://example.com/'
+        with patch('sys.argv', ['wayback-diff', wb_url, 'https://example.com/', '--no-clean-wayback']):
+            with pytest.raises(SystemExit):
+                main()
+        mock_cleaner_cls.clean_wayback_html.assert_not_called()
+
+    @patch('wayback_diff.cli.MarkdownReportGenerator')
+    @patch('wayback_diff.cli.WebFetcher')
+    def test_main_markdown_report(self, mock_fetcher_cls, mock_report_cls):
+        """Test --markdown flag generates report."""
+        mock_fetcher = Mock()
+        mock_fetcher_cls.return_value = mock_fetcher
+        content = b'<html>Same</html>'
+        mock_fetcher.fetch.side_effect = [
+            (content, 'text/html', {'status_code': 200}),
+            (content, 'text/html', {'status_code': 200}),
+        ]
+        mock_fetcher.is_html.return_value = True
+
+        mock_gen = Mock()
+        mock_report_cls.return_value = mock_gen
+        mock_gen.generate_comparison_report.return_value = "# Report"
+        mock_gen.save_report.return_value = "/tmp/report.md"
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            with patch('sys.argv', ['wayback-diff', 'https://a.com', 'https://b.com',
+                                    '--markdown', '--report-dir', tmpdir]):
+                with pytest.raises(SystemExit):
+                    main()
+            mock_gen.generate_comparison_report.assert_called_once()
+            mock_gen.save_report.assert_called_once()
+
+    @patch('wayback_diff.cli.VISUAL_COMPARISON_AVAILABLE', False)
+    @patch('wayback_diff.cli.WebFetcher')
+    def test_main_visual_not_available(self, mock_fetcher_cls):
+        """Test --visual flag when dependencies not available."""
+        mock_fetcher = Mock()
+        mock_fetcher_cls.return_value = mock_fetcher
+        content = b'<html>Content</html>'
+        mock_fetcher.fetch.side_effect = [
+            (content, 'text/html', {'status_code': 200}),
+            (content, 'text/html', {'status_code': 200}),
+        ]
+        mock_fetcher.is_html.return_value = True
+
+        with patch('sys.argv', ['wayback-diff', 'https://a.com', 'https://b.com', '--visual']):
+            with pytest.raises(SystemExit) as exc_info:
+                main()
+            assert exc_info.value.code == 1
+
+    @patch('wayback_diff.cli.VISUAL_COMPARISON_AVAILABLE', True)
+    @patch('wayback_diff.cli.VisualComparison')
+    @patch('wayback_diff.cli.MarkdownReportGenerator')
+    @patch('wayback_diff.cli.WebFetcher')
+    def test_main_visual_comparison_success(self, mock_fetcher_cls, mock_report_cls,
+                                            mock_visual_cls):
+        """Test --visual flag with successful comparison."""
+        mock_fetcher = Mock()
+        mock_fetcher_cls.return_value = mock_fetcher
+        content = b'<html>Same</html>'
+        mock_fetcher.fetch.side_effect = [
+            (content, 'text/html', {'status_code': 200}),
+            (content, 'text/html', {'status_code': 200}),
+        ]
+        mock_fetcher.is_html.return_value = True
+
+        mock_visual = Mock()
+        mock_visual_cls.return_value = mock_visual
+        mock_visual.compare_urls.return_value = {
+            'chrome': {
+                'difference_ratio': 0.02,
+                'different_pixels': 100,
+                'screenshot1': '/tmp/s1.png',
+                'screenshot2': '/tmp/s2.png',
+                'comparison': '/tmp/comp.png',
+            }
+        }
+
+        mock_gen = Mock()
+        mock_report_cls.return_value = mock_gen
+        mock_gen.generate_comparison_report.return_value = "# Report"
+        mock_gen.save_report.return_value = "/tmp/report.md"
+
+        with patch('sys.argv', ['wayback-diff', 'https://a.com', 'https://b.com', '--visual']):
+            with pytest.raises(SystemExit):
+                main()
+
+    @patch('wayback_diff.cli.VISUAL_COMPARISON_AVAILABLE', True)
+    @patch('wayback_diff.cli.VisualComparison')
+    @patch('wayback_diff.cli.MarkdownReportGenerator')
+    @patch('wayback_diff.cli.WebFetcher')
+    def test_main_visual_comparison_with_error_result(self, mock_fetcher_cls,
+                                                       mock_report_cls, mock_visual_cls):
+        """Test --visual flag when browser returns error."""
+        mock_fetcher = Mock()
+        mock_fetcher_cls.return_value = mock_fetcher
+        content = b'<html>Same</html>'
+        mock_fetcher.fetch.side_effect = [
+            (content, 'text/html', {'status_code': 200}),
+            (content, 'text/html', {'status_code': 200}),
+        ]
+        mock_fetcher.is_html.return_value = True
+
+        mock_visual = Mock()
+        mock_visual_cls.return_value = mock_visual
+        mock_visual.compare_urls.return_value = {
+            'chrome': {'error': 'No chrome found'}
+        }
+
+        mock_gen = Mock()
+        mock_report_cls.return_value = mock_gen
+        mock_gen.generate_comparison_report.return_value = "# Report"
+        mock_gen.save_report.return_value = "/tmp/report.md"
+
+        with patch('sys.argv', ['wayback-diff', 'https://a.com', 'https://b.com', '--visual']):
+            with pytest.raises(SystemExit):
+                main()
+
+    @patch('wayback_diff.cli.VISUAL_COMPARISON_AVAILABLE', True)
+    @patch('wayback_diff.cli.VisualComparison')
+    @patch('wayback_diff.cli.WebFetcher')
+    def test_main_visual_comparison_exception(self, mock_fetcher_cls, mock_visual_cls):
+        """Test --visual flag when visual comparison raises exception."""
+        mock_fetcher = Mock()
+        mock_fetcher_cls.return_value = mock_fetcher
+        content = b'<html>Same</html>'
+        mock_fetcher.fetch.side_effect = [
+            (content, 'text/html', {'status_code': 200}),
+            (content, 'text/html', {'status_code': 200}),
+        ]
+        mock_fetcher.is_html.return_value = True
+
+        mock_visual_cls.side_effect = Exception("Browser crash")
+
+        with patch('sys.argv', ['wayback-diff', 'https://a.com', 'https://b.com',
+                                '--visual', '--verbose']):
+            with pytest.raises(SystemExit):
+                main()
+
+    @patch('wayback_diff.cli.VISUAL_COMPARISON_AVAILABLE', True)
+    @patch('wayback_diff.cli.VisualComparison')
+    @patch('wayback_diff.cli.WebFetcher')
+    def test_main_visual_import_error(self, mock_fetcher_cls, mock_visual_cls):
+        """Test --visual flag when ImportError occurs."""
+        mock_fetcher = Mock()
+        mock_fetcher_cls.return_value = mock_fetcher
+        content = b'<html>Same</html>'
+        mock_fetcher.fetch.side_effect = [
+            (content, 'text/html', {'status_code': 200}),
+            (content, 'text/html', {'status_code': 200}),
+        ]
+        mock_fetcher.is_html.return_value = True
+
+        mock_visual_cls.side_effect = ImportError("No selenium")
+
+        with patch('sys.argv', ['wayback-diff', 'https://a.com', 'https://b.com', '--visual']):
+            with pytest.raises(SystemExit):
+                main()
+
+    @patch('wayback_diff.cli.LinkTraverser')
+    @patch('wayback_diff.cli.WebFetcher')
+    def test_main_traverse_mode(self, mock_fetcher_cls, mock_traverser_cls):
+        """Test --traverse flag."""
+        mock_fetcher = Mock()
+        mock_fetcher_cls.return_value = mock_fetcher
+        content = b'<html>Content</html>'
+        mock_fetcher.fetch.side_effect = [
+            (content, 'text/html', {'status_code': 200}),
+            (content, 'text/html', {'status_code': 200}),
+        ]
+        mock_fetcher.is_html.return_value = True
+
+        mock_traverser = Mock()
+        mock_traverser_cls.return_value = mock_traverser
+        mock_traverser.traverse_and_compare.return_value = []
+        mock_traverser.generate_report.return_value = "Traversal report"
+
+        with patch('sys.argv', ['wayback-diff', 'https://a.com', 'https://b.com', '--traverse']):
+            with pytest.raises(SystemExit) as exc_info:
+                main()
+            assert exc_info.value.code == 0
+
+    @patch('wayback_diff.cli.LinkTraverser')
+    @patch('wayback_diff.cli.WebFetcher')
+    def test_main_traverse_with_high_diffs(self, mock_fetcher_cls, mock_traverser_cls):
+        """Test --traverse with high significance differences exits with 2."""
+        mock_fetcher = Mock()
+        mock_fetcher_cls.return_value = mock_fetcher
+        content = b'<html>Content</html>'
+        mock_fetcher.fetch.side_effect = [
+            (content, 'text/html', {'status_code': 200}),
+            (content, 'text/html', {'status_code': 200}),
+        ]
+        mock_fetcher.is_html.return_value = True
+
+        mock_traverser = Mock()
+        mock_traverser_cls.return_value = mock_traverser
+        mock_traverser.traverse_and_compare.return_value = [
+            {'status': 'compared', 'high_significance': 5}
+        ]
+        mock_traverser.generate_report.return_value = "Report"
+
+        with patch('sys.argv', ['wayback-diff', 'https://a.com', 'https://b.com', '--traverse']):
+            with pytest.raises(SystemExit) as exc_info:
+                main()
+            assert exc_info.value.code == 2
+
+    @patch('wayback_diff.cli.LinkTraverser')
+    @patch('wayback_diff.cli.WebFetcher')
+    def test_main_traverse_compared_no_high(self, mock_fetcher_cls, mock_traverser_cls):
+        """Test --traverse with compared pages but no high significance exits with 1."""
+        mock_fetcher = Mock()
+        mock_fetcher_cls.return_value = mock_fetcher
+        content = b'<html>Content</html>'
+        mock_fetcher.fetch.side_effect = [
+            (content, 'text/html', {'status_code': 200}),
+            (content, 'text/html', {'status_code': 200}),
+        ]
+        mock_fetcher.is_html.return_value = True
+
+        mock_traverser = Mock()
+        mock_traverser_cls.return_value = mock_traverser
+        mock_traverser.traverse_and_compare.return_value = [
+            {'status': 'compared', 'high_significance': 0}
+        ]
+        mock_traverser.generate_report.return_value = "Report"
+
+        with patch('sys.argv', ['wayback-diff', 'https://a.com', 'https://b.com', '--traverse']):
+            with pytest.raises(SystemExit) as exc_info:
+                main()
+            assert exc_info.value.code == 1
+
+    @patch('wayback_diff.cli.MarkdownReportGenerator')
+    @patch('wayback_diff.cli.LinkTraverser')
+    @patch('wayback_diff.cli.WebFetcher')
+    def test_main_traverse_with_markdown(self, mock_fetcher_cls, mock_traverser_cls,
+                                         mock_report_cls):
+        """Test --traverse --markdown generates report."""
+        mock_fetcher = Mock()
+        mock_fetcher_cls.return_value = mock_fetcher
+        content = b'<html>Content</html>'
+        mock_fetcher.fetch.side_effect = [
+            (content, 'text/html', {'status_code': 200}),
+            (content, 'text/html', {'status_code': 200}),
+        ]
+        mock_fetcher.is_html.return_value = True
+
+        mock_traverser = Mock()
+        mock_traverser_cls.return_value = mock_traverser
+        mock_traverser.traverse_and_compare.return_value = []
+        mock_traverser.generate_report.return_value = "Report"
+
+        mock_gen = Mock()
+        mock_report_cls.return_value = mock_gen
+        mock_gen.generate_comparison_report.return_value = "# Report"
+        mock_gen.save_report.return_value = "/tmp/report.md"
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            with patch('sys.argv', ['wayback-diff', 'https://a.com', 'https://b.com',
+                                    '--traverse', '--markdown', '--report-dir', tmpdir]):
+                with pytest.raises(SystemExit):
+                    main()
+            mock_gen.save_report.assert_called_once()
+
+    @patch('wayback_diff.cli.LinkTraverser')
+    @patch('wayback_diff.cli.WebFetcher')
+    def test_main_traverse_output_to_file(self, mock_fetcher_cls, mock_traverser_cls):
+        """Test --traverse -o flag writes report to file."""
+        mock_fetcher = Mock()
+        mock_fetcher_cls.return_value = mock_fetcher
+        content = b'<html>Content</html>'
+        mock_fetcher.fetch.side_effect = [
+            (content, 'text/html', {'status_code': 200}),
+            (content, 'text/html', {'status_code': 200}),
+        ]
+        mock_fetcher.is_html.return_value = True
+
+        mock_traverser = Mock()
+        mock_traverser_cls.return_value = mock_traverser
+        mock_traverser.traverse_and_compare.return_value = []
+        mock_traverser.generate_report.return_value = "Traversal report text"
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            outfile = os.path.join(tmpdir, 'out.txt')
+            with patch('sys.argv', ['wayback-diff', 'https://a.com', 'https://b.com',
+                                    '--traverse', '-o', outfile, '--verbose']):
+                with pytest.raises(SystemExit):
+                    main()
+            assert os.path.exists(outfile)
+            with open(outfile) as f:
+                assert 'Traversal report text' in f.read()
+
+    @patch('wayback_diff.cli.DiffEngine')
+    @patch('wayback_diff.cli.WebFetcher')
+    def test_main_high_significance_exits_2(self, mock_fetcher_cls, mock_engine_cls):
+        """Test exit code 2 for high significance changes."""
+        mock_fetcher = Mock()
+        mock_fetcher_cls.return_value = mock_fetcher
+        mock_fetcher.fetch.side_effect = [
+            (b'<html>Old</html>', 'text/html', {'status_code': 200}),
+            (b'<html>New</html>', 'text/html', {'status_code': 200}),
+        ]
+        mock_fetcher.is_html.return_value = True
+
+        mock_engine = Mock()
+        mock_engine_cls.return_value = mock_engine
+        mock_engine.extract_meaningful_changes.return_value = [
+            {'type': 'modified', 'significance': 'high', 'old_text': 'Old', 'new_text': 'New'}
+        ]
+        mock_engine.get_summary.return_value = {
+            'total_changes': 1, 'added': 0, 'removed': 0, 'modified': 1,
+            'high_significance': 1, 'medium_significance': 0, 'low_significance': 0,
+        }
+
+        with patch('sys.argv', ['wayback-diff', 'https://a.com', 'https://b.com']):
+            with pytest.raises(SystemExit) as exc_info:
+                main()
+            assert exc_info.value.code == 2
+
+    @patch('wayback_diff.cli.WebFetcher')
+    def test_main_browsers_auto(self, mock_fetcher_cls):
+        """Test --browsers auto flag parsing."""
+        mock_fetcher = Mock()
+        mock_fetcher_cls.return_value = mock_fetcher
+        content = b'<html>Same</html>'
+        mock_fetcher.fetch.side_effect = [
+            (content, 'text/html', {'status_code': 200}),
+            (content, 'text/html', {'status_code': 200}),
+        ]
+        mock_fetcher.is_html.return_value = True
+
+        # Just verify parsing works (no --visual so browsers arg is stored but not used)
+        with patch('sys.argv', ['wayback-diff', 'https://a.com', 'https://b.com',
+                                '--browsers', 'chrome', 'firefox']):
+            with pytest.raises(SystemExit):
+                main()
diff --git a/tests/test_diff_engine.py b/tests/test_diff_engine.py
index 1e14432..fae8552 100644
--- a/tests/test_diff_engine.py
+++ b/tests/test_diff_engine.py
@@ -1,92 +1,458 @@
 """Tests for diff engine."""
 
 import pytest
-from wayback_diff.diff_engine import DiffEngine
+from wayback_diff.diff_engine import DiffEngine, HTMLStructureParser
+
+
+class TestHTMLStructureParser:
+    """Test cases for HTMLStructureParser."""
+
+    def test_parse_basic_html(self):
+        """Test parsing basic HTML structure."""
+        parser = HTMLStructureParser()
+        parser.feed("<html><body><h1>Title</h1><p>Text</p></body></html>")
+
+        assert len(parser.structure) > 0
+        tags = [s['tag'] for s in parser.structure if s['type'] == 'start']
+        assert 'h1' in tags
+        assert 'p' in tags
+
+    def test_parse_text_content(self):
+        """Test extracting text content."""
+        parser = HTMLStructureParser()
+        parser.feed("<div><p>Hello World</p></div>")
+
+        assert 'Hello World' in parser.text_content
+
+    def test_parse_ignores_whitespace_text(self):
+        """Test that whitespace-only text is ignored."""
+        parser = HTMLStructureParser()
+        parser.feed("<div>   </div><p>Real text</p>")
+
+        assert 'Real text' in parser.text_content
+        assert '   ' not in parser.text_content
+
+    def test_parse_attributes(self):
+        """Test parsing element attributes."""
+        parser = HTMLStructureParser()
+        parser.feed('<a href="https://example.com">Link</a>')
+
+        start_tags = [s for s in parser.structure if s['type'] == 'start' and s['tag'] == 'a']
+        assert len(start_tags) == 1
+        assert start_tags[0]['attrs']['href'] == 'https://example.com'
+
+    def test_parse_depth_tracking(self):
+        """Test depth tracking."""
+        parser = HTMLStructureParser()
+        parser.feed("<div><section><p>Deep</p></section></div>")
+
+        start_tags = [s for s in parser.structure if s['type'] == 'start']
+        depths = [s['depth'] for s in start_tags]
+        assert len(depths) >= 2
+        # Deeper elements should have higher depth
+        assert max(depths) >= 2
+
+    def test_parse_important_tags(self):
+        """Test that important tags are captured."""
+        important_tags = ['div', 'section', 'article', 'header', 'footer',
+                          'nav', 'main', 'aside', 'h1', 'h2', 'h3', 'h4',
+                          'h5', 'h6', 'p', 'a', 'img', 'script', 'style',
+                          'link', 'meta', 'title']
+        for tag in important_tags:
+            parser = HTMLStructureParser()
+            if tag in ('img', 'link', 'meta'):
+                parser.feed(f'<{tag} src="x">')
+            else:
+                parser.feed(f'<{tag}>content</{tag}>')
+            start_tags = [s['tag'] for s in parser.structure if s['type'] == 'start']
+            assert tag in start_tags, f"Tag {tag} should be captured"
+
+    def test_parse_non_important_tags_ignored(self):
+        """Test that non-important tags are not captured in structure."""
+        parser = HTMLStructureParser()
+        parser.feed("<span>text</span><b>bold</b><em>emphasis</em>")
+
+        start_tags = [s['tag'] for s in parser.structure if s['type'] == 'start']
+        assert 'span' not in start_tags
+        assert 'b' not in start_tags
+        assert 'em' not in start_tags
+
+    def test_parse_end_tags(self):
+        """Test end tag handling."""
+        parser = HTMLStructureParser()
+        parser.feed("<h1>Title</h1>")
+
+        end_tags = [s for s in parser.structure if s['type'] == 'end']
+        assert len(end_tags) >= 1
+        assert end_tags[0]['tag'] == 'h1'
+
+    def test_parse_nested_text(self):
+        """Test multiple text nodes."""
+        parser = HTMLStructureParser()
+        parser.feed("<div>First</div><p>Second</p>")
+
+        assert 'First' in parser.text_content
+        assert 'Second' in parser.text_content
 
 
 class TestDiffEngine:
     """Test cases for DiffEngine."""
-    
-    def test_normalize_content(self):
-        """Test content normalization."""
+
+    def test_init_defaults(self):
+        """Test default initialization."""
+        engine = DiffEngine()
+        assert engine.ignore_whitespace is True
+        assert engine.ignore_case is False
+
+    def test_init_custom(self):
+        """Test custom initialization."""
+        engine = DiffEngine(ignore_whitespace=False, ignore_case=True)
+        assert engine.ignore_whitespace is False
+        assert engine.ignore_case is True
+
+    def test_normalize_content_whitespace(self):
+        """Test content normalization with whitespace."""
         engine = DiffEngine(ignore_whitespace=True)
-        
+
         content1 = b"<div>  Test  </div>"
         content2 = b"<div>Test</div>"
-        
+
         norm1 = engine.normalize_content(content1)
         norm2 = engine.normalize_content(content2)
-        
-        # After normalization, whitespace should be similar
+
         assert b'<div>' in norm1
         assert b'Test' in norm1
-    
+
+    def test_normalize_content_no_whitespace(self):
+        """Test normalization with whitespace handling disabled."""
+        engine = DiffEngine(ignore_whitespace=False)
+        content = b"<div>  Test  </div>"
+        normalized = engine.normalize_content(content)
+        assert b'  Test  ' in normalized
+
+    def test_normalize_content_case(self):
+        """Test case normalization."""
+        engine = DiffEngine(ignore_case=True)
+        content = b"<DIV>Test</DIV>"
+        normalized = engine.normalize_content(content)
+        assert b'<div>' in normalized
+        assert b'test' in normalized
+
+    def test_normalize_content_tag_whitespace(self):
+        """Test whitespace between tags is normalized."""
+        engine = DiffEngine(ignore_whitespace=True)
+        content = b"<div>  \n  </div>  <p>Text</p>"
+        normalized = engine.normalize_content(content)
+        assert b'><' in normalized or b'> <' in normalized
+
     def test_extract_meaningful_changes(self):
         """Test change extraction."""
         engine = DiffEngine()
-        
+
         old_content = b"<html><body><h1>Old Title</h1><p>Content</p></body></html>"
         new_content = b"<html><body><h1>New Title</h1><p>Content</p></body></html>"
-        
+
         changes = engine.extract_meaningful_changes(old_content, new_content)
-        
+
         assert len(changes) > 0
-        # Should detect the title change - the diff engine extracts the changed parts
-        # which may be just "Old" and "New" rather than full "Old Title" and "New Title"
         all_text = ' '.join([c.get('old_text', '') + c.get('new_text', '') for c in changes])
         all_context = ' '.join([c.get('old_context', '') + c.get('new_context', '') for c in changes])
-        # Check if the change or context contains the title text
-        assert ('Old' in all_text and 'New' in all_text) or 'Title' in all_context or any('Title' in c.get('old_context', '') or 'Title' in c.get('new_context', '') for c in changes)
-    
+        assert ('Old' in all_text and 'New' in all_text) or 'Title' in all_context
+
+    def test_extract_meaningful_changes_identical(self):
+        """Test no changes for identical content."""
+        engine = DiffEngine()
+        content = b"<html><body><p>Same content</p></body></html>"
+        changes = engine.extract_meaningful_changes(content, content)
+        assert len(changes) == 0
+
+    def test_extract_meaningful_changes_added(self):
+        """Test detecting added content."""
+        engine = DiffEngine()
+        old = b"<html><body></body></html>"
+        new = b"<html><body><p>New paragraph</p></body></html>"
+        changes = engine.extract_meaningful_changes(old, new)
+        assert len(changes) > 0
+        types = [c['type'] for c in changes]
+        assert 'added' in types or 'modified' in types
+
+    def test_extract_meaningful_changes_removed(self):
+        """Test detecting removed content."""
+        engine = DiffEngine()
+        old = b"<html><body><p>Old paragraph</p></body></html>"
+        new = b"<html><body></body></html>"
+        changes = engine.extract_meaningful_changes(old, new)
+        assert len(changes) > 0
+        types = [c['type'] for c in changes]
+        assert 'removed' in types or 'modified' in types
+
+    def test_extract_meaningful_changes_large_content(self):
+        """Test autojunk for large content (> 100000 bytes)."""
+        engine = DiffEngine()
+        old = b"<html><body>" + b"<p>Content paragraph</p>\n" * 10000 + b"</body></html>"
+        new = b"<html><body>" + b"<p>Content paragraph</p>\n" * 9999 + b"<p>Different</p></body></html>"
+        changes = engine.extract_meaningful_changes(old, new)
+        # Should complete without error
+        assert isinstance(changes, list)
+
+    def test_extract_meaningful_changes_has_positions(self):
+        """Test that changes have position information."""
+        engine = DiffEngine()
+        old = b"<html>Old</html>"
+        new = b"<html>New</html>"
+        changes = engine.extract_meaningful_changes(old, new)
+        assert len(changes) > 0
+        for change in changes:
+            assert 'old_position' in change
+            assert 'new_position' in change
+            assert 'old_context' in change
+            assert 'new_context' in change
+
+    def test_extract_meaningful_changes_has_significance(self):
+        """Test that changes have significance levels."""
+        engine = DiffEngine()
+        old = b"<html><head><title>Old</title></head></html>"
+        new = b"<html><head><title>New</title></head></html>"
+        changes = engine.extract_meaningful_changes(old, new)
+        for change in changes:
+            assert change['significance'] in ('high', 'medium', 'low')
+
     def test_get_summary(self):
         """Test summary generation."""
         engine = DiffEngine()
-        
+
         old_content = b"<html><body><h1>Old</h1></body></html>"
         new_content = b"<html><body><h1>New</h1><p>Added</p></body></html>"
-        
+
         changes = engine.extract_meaningful_changes(old_content, new_content)
         summary = engine.get_summary(changes)
-        
+
         assert 'total_changes' in summary
         assert 'added' in summary
         assert 'removed' in summary
         assert 'modified' in summary
+        assert 'high_significance' in summary
+        assert 'medium_significance' in summary
+        assert 'low_significance' in summary
         assert summary['total_changes'] > 0
-    
-    def test_assess_significance(self):
-        """Test significance assessment."""
+
+    def test_get_summary_empty(self):
+        """Test summary for empty changes list."""
+        engine = DiffEngine()
+        summary = engine.get_summary([])
+        assert summary['total_changes'] == 0
+        assert summary['added'] == 0
+        assert summary['removed'] == 0
+        assert summary['modified'] == 0
+
+    def test_assess_significance_high_title(self):
+        """Test high significance for title changes."""
         engine = DiffEngine()
-        
-        # High significance - title change
-        high_change = engine._assess_significance(
+        result = engine._assess_significance(
             b"<title>Old</title>",
             b"<title>New</title>"
         )
-        assert high_change == 'high'
-        
-        # Medium significance - div change
-        medium_change = engine._assess_significance(
+        assert result == 'high'
+
+    def test_assess_significance_high_heading(self):
+        """Test high significance for heading changes."""
+        engine = DiffEngine()
+        for i in range(1, 7):
+            result = engine._assess_significance(
+                f"<h{i}>Old</h{i}>".encode(),
+                f"<h{i}>New</h{i}>".encode()
+            )
+            assert result == 'high', f"h{i} should be high significance"
+
+    def test_assess_significance_high_meta(self):
+        """Test high significance for meta tag changes."""
+        engine = DiffEngine()
+        result = engine._assess_significance(
+            b'<meta name="description" content="old">',
+            b'<meta name="description" content="new">'
+        )
+        assert result == 'high'
+
+    def test_assess_significance_high_script(self):
+        """Test high significance for script changes."""
+        engine = DiffEngine()
+        result = engine._assess_significance(
+            b'<script src="old.js"></script>',
+            b'<script src="new.js"></script>'
+        )
+        assert result == 'high'
+
+    def test_assess_significance_high_stylesheet(self):
+        """Test high significance for stylesheet changes."""
+        engine = DiffEngine()
+        result = engine._assess_significance(
+            b'<link rel="stylesheet" href="old.css">',
+            b'<link rel="stylesheet" href="new.css">'
+        )
+        assert result == 'high'
+
+    def test_assess_significance_high_body(self):
+        """Test high significance for body tag changes."""
+        engine = DiffEngine()
+        result = engine._assess_significance(b'<body class="old">', b'<body class="new">')
+        assert result == 'high'
+
+    def test_assess_significance_high_main(self):
+        """Test high significance for main/article/section changes."""
+        engine = DiffEngine()
+        for tag in ['main', 'article', 'section']:
+            result = engine._assess_significance(
+                f'<{tag}>old</{tag}>'.encode(),
+                f'<{tag}>new</{tag}>'.encode()
+            )
+            assert result == 'high', f"{tag} should be high significance"
+
+    def test_assess_significance_medium_class(self):
+        """Test medium significance for class changes."""
+        engine = DiffEngine()
+        result = engine._assess_significance(
+            b'class="old-style"',
+            b'class="new-style"'
+        )
+        assert result == 'medium'
+
+    def test_assess_significance_medium_div(self):
+        """Test medium significance for div changes."""
+        engine = DiffEngine()
+        result = engine._assess_significance(
             b"<div>Old</div>",
             b"<div>New</div>"
         )
-        assert medium_change == 'medium'
-        
-        # Low significance - whitespace
-        low_change = engine._assess_significance(
-            b"  ",
-            b" "
+        assert result == 'medium'
+
+    def test_assess_significance_medium_style(self):
+        """Test medium significance for style attribute changes."""
+        engine = DiffEngine()
+        result = engine._assess_significance(
+            b'style="color: red"',
+            b'style="color: blue"'
+        )
+        assert result == 'medium'
+
+    def test_assess_significance_medium_span(self):
+        """Test medium significance for span changes."""
+        engine = DiffEngine()
+        result = engine._assess_significance(
+            b'<span>old</span>',
+            b'<span>new</span>'
         )
-        assert low_change == 'low'
-    
+        assert result == 'medium'
+
+    def test_assess_significance_medium_id(self):
+        """Test medium significance for id changes."""
+        engine = DiffEngine()
+        result = engine._assess_significance(b'id="old"', b'id="new"')
+        assert result == 'medium'
+
+    def test_assess_significance_low(self):
+        """Test low significance for whitespace/minor changes."""
+        engine = DiffEngine()
+        result = engine._assess_significance(b"  ", b" ")
+        assert result == 'low'
+
+    def test_assess_significance_low_plain_text(self):
+        """Test low significance for plain text without structural markers."""
+        engine = DiffEngine()
+        result = engine._assess_significance(b"old text", b"new text")
+        assert result == 'low'
+
     def test_generate_unified_diff(self):
         """Test unified diff generation."""
         engine = DiffEngine()
-        
+
         old_content = b"Line 1\nLine 2\nLine 3"
         new_content = b"Line 1\nLine 2 Modified\nLine 3"
-        
+
         diff = engine.generate_unified_diff(old_content, new_content, "old.txt", "new.txt")
-        
+
         assert len(diff) > 0
         assert any('Line 2' in line for line in diff)
+
+    def test_generate_unified_diff_identical(self):
+        """Test unified diff for identical content."""
+        engine = DiffEngine()
+        content = b"Line 1\nLine 2\nLine 3"
+        diff = engine.generate_unified_diff(content, content)
+        assert len(diff) == 0
+
+    def test_generate_unified_diff_custom_context(self):
+        """Test unified diff with custom context lines."""
+        engine = DiffEngine()
+        old = b"A\nB\nC\nD\nE\nF\nG"
+        new = b"A\nB\nC\nX\nE\nF\nG"
+        diff = engine.generate_unified_diff(old, new, n=1)
+        assert len(diff) > 0
+
+    def test_generate_unified_diff_labels(self):
+        """Test unified diff labels."""
+        engine = DiffEngine()
+        old = b"old line"
+        new = b"new line"
+        diff = engine.generate_unified_diff(old, new, old_label="file1.html", new_label="file2.html")
+        joined = '\n'.join(diff)
+        assert 'file1.html' in joined
+        assert 'file2.html' in joined
+
+    def test_compare_structures(self):
+        """Test HTML structure comparison returns expected keys."""
+        engine = DiffEngine()
+        old = b"<html><body><h1>Title</h1><p>Text</p></body></html>"
+        new = b"<html><body><h2>Title</h2><p>Text</p><div>New</div></body></html>"
+
+        result = engine.compare_structures(old, new)
+
+        assert 'structural_changes' in result
+        assert 'old_structure' in result
+        assert 'new_structure' in result
+        assert 'similarity' in result
+        assert 0.0 <= result['similarity'] <= 1.0
+        # Different structures should have some changes
+        assert len(result['structural_changes']) > 0
+
+    def test_compare_structures_identical(self):
+        """Test structure comparison for identical HTML."""
+        engine = DiffEngine()
+        html = b"<html><body><h1>Title</h1></body></html>"
+        result = engine.compare_structures(html, html)
+        assert result['similarity'] == 1.0
+        assert len(result['structural_changes']) == 0
+
+    def test_compare_structures_unicode(self):
+        """Test structure comparison with unicode content."""
+        engine = DiffEngine()
+        old = '<html><body><p>Texto en espanol</p></body></html>'.encode('utf-8')
+        new = '<html><body><p>Texto en ingles</p></body></html>'.encode('utf-8')
+        result = engine.compare_structures(old, new)
+        assert 'similarity' in result
+        # Same structure, different text content (text is not in structure)
+        assert result['similarity'] == 1.0
+
+    def test_compare_structures_empty(self):
+        """Test structure comparison with empty content."""
+        engine = DiffEngine()
+        result = engine.compare_structures(b"", b"")
+        # Empty content produces no structure elements, so SequenceMatcher
+        # works fine on empty lists
+        assert 'similarity' in result
+        assert isinstance(result['structural_changes'], list)
+
+    def test_compare_structures_no_important_tags(self):
+        """Test structure comparison with no important tags (empty structures)."""
+        engine = DiffEngine()
+        # span/b/em are not captured, so structures are empty lists
+        old = b"<span>text1</span>"
+        new = b"<span>text2</span>"
+        result = engine.compare_structures(old, new)
+        assert result['similarity'] == 1.0
+        assert len(result['structural_changes']) == 0
+
+    def test_compare_structures_returns_dict(self):
+        """Test compare_structures always returns a dict."""
+        engine = DiffEngine()
+        result = engine.compare_structures(b"<p>a</p>", b"<p>b</p>")
+        assert isinstance(result, dict)
diff --git a/tests/test_fetcher.py b/tests/test_fetcher.py
index 4210e44..e5197fa 100644
--- a/tests/test_fetcher.py
+++ b/tests/test_fetcher.py
@@ -1,21 +1,43 @@
 """Tests for web fetcher."""
 
 import pytest
-from unittest.mock import Mock, patch
+from unittest.mock import Mock, patch, PropertyMock
+import requests
 from wayback_diff.fetcher import WebFetcher
 
 
 class TestWebFetcher:
     """Test cases for WebFetcher."""
-    
+
+    def test_init_defaults(self):
+        """Test default initialization."""
+        fetcher = WebFetcher()
+        assert fetcher.timeout == 30
+        assert fetcher.max_retries == 3
+
+    def test_init_custom(self):
+        """Test custom initialization."""
+        fetcher = WebFetcher(timeout=10, max_retries=5)
+        assert fetcher.timeout == 10
+        assert fetcher.max_retries == 5
+
+    def test_default_headers_set(self):
+        """Test that default headers are set on session."""
+        fetcher = WebFetcher()
+        assert 'User-Agent' in fetcher.session.headers
+
     def test_is_html(self):
         """Test HTML content type detection."""
         fetcher = WebFetcher()
-        assert fetcher.is_html("text/html")
-        assert fetcher.is_html("text/html; charset=utf-8")
-        assert not fetcher.is_html("application/json")
-        assert not fetcher.is_html(None)
-    
+        assert fetcher.is_html("text/html") is True
+        assert fetcher.is_html("text/html; charset=utf-8") is True
+        assert fetcher.is_html("TEXT/HTML") is True
+        assert fetcher.is_html("application/json") is False
+        assert fetcher.is_html("application/xml") is False
+        assert fetcher.is_html("image/png") is False
+        assert fetcher.is_html(None) is False
+        assert fetcher.is_html("") is False
+
     @patch('wayback_diff.fetcher.requests.Session.get')
     def test_fetch_success(self, mock_get):
         """Test successful fetch."""
@@ -25,14 +47,46 @@ def test_fetch_success(self, mock_get):
         mock_response.headers = {'Content-Type': 'text/html; charset=utf-8'}
         mock_response.encoding = 'utf-8'
         mock_get.return_value = mock_response
-        
+
         fetcher = WebFetcher()
         content, content_type, metadata = fetcher.fetch("https://example.com")
-        
+
         assert content == b'<html><body>Test</body></html>'
         assert content_type == 'text/html; charset=utf-8'
         assert metadata['status_code'] == 200
-    
+
+    @patch('wayback_diff.fetcher.requests.Session.get')
+    def test_fetch_success_adds_charset(self, mock_get):
+        """Test that charset is added when missing from content type."""
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.content = b'<html>Test</html>'
+        mock_response.headers = {'Content-Type': 'text/html'}
+        mock_response.encoding = 'utf-8'
+        mock_get.return_value = mock_response
+
+        fetcher = WebFetcher()
+        content, content_type, metadata = fetcher.fetch("https://example.com")
+
+        assert 'charset=utf-8' in content_type
+
+    @patch('wayback_diff.fetcher.requests.Session.get')
+    def test_fetch_binary_content(self, mock_get):
+        """Test fetching binary content that cannot decode as utf-8."""
+        mock_response = Mock()
+        mock_response.status_code = 200
+        # Create content that fails strict utf-8 decode
+        mock_response.content = bytes([0xFF, 0xFE, 0x00, 0x01, 0x80, 0x81])
+        mock_response.headers = {'Content-Type': 'application/octet-stream'}
+        mock_response.encoding = None
+        mock_get.return_value = mock_response
+
+        fetcher = WebFetcher()
+        content, content_type, metadata = fetcher.fetch("https://example.com/file.bin")
+
+        assert content is not None
+        assert 'application/octet-stream' in content_type
+
     @patch('wayback_diff.fetcher.requests.Session.get')
     def test_fetch_404(self, mock_get):
         """Test 404 response."""
@@ -40,17 +94,160 @@ def test_fetch_404(self, mock_get):
         mock_response.status_code = 404
         mock_response.headers = {}
         mock_get.return_value = mock_response
-        
+
         fetcher = WebFetcher()
         content, content_type, metadata = fetcher.fetch("https://example.com/notfound")
-        
+
         assert content is None
         assert metadata['status_code'] == 404
-    
-    def test_url_normalization(self):
-        """Test URL normalization."""
-        fetcher = WebFetcher()
-        
-        # Test adding https://
-        content, _, _ = fetcher.fetch("example.com")
-        # Should not raise an error (will fail in actual request, but URL is normalized)
+
+    @patch('wayback_diff.fetcher.requests.Session.get')
+    def test_fetch_500(self, mock_get):
+        """Test 500 response."""
+        mock_response = Mock()
+        mock_response.status_code = 500
+        mock_response.headers = {'Server': 'nginx'}
+        mock_get.return_value = mock_response
+
+        fetcher = WebFetcher()
+        content, content_type, metadata = fetcher.fetch("https://example.com/error")
+
+        assert content is None
+        assert metadata['status_code'] == 500
+
+    @patch('wayback_diff.fetcher.requests.Session.get')
+    def test_fetch_timeout_retries(self, mock_get):
+        """Test timeout with retries."""
+        mock_get.side_effect = requests.exceptions.Timeout("Connection timed out")
+
+        fetcher = WebFetcher(timeout=1, max_retries=2)
+        content, content_type, metadata = fetcher.fetch("https://example.com")
+
+        assert content is None
+        assert mock_get.call_count == 2
+
+    @patch('wayback_diff.fetcher.requests.Session.get')
+    def test_fetch_request_exception_retries(self, mock_get):
+        """Test request exception with retries."""
+        mock_get.side_effect = requests.exceptions.ConnectionError("Connection refused")
+
+        fetcher = WebFetcher(timeout=1, max_retries=3)
+        content, content_type, metadata = fetcher.fetch("https://example.com")
+
+        assert content is None
+        assert mock_get.call_count == 3
+        assert 'error' in metadata
+
+    @patch('wayback_diff.fetcher.requests.Session.get')
+    def test_fetch_timeout_then_success(self, mock_get):
+        """Test timeout on first attempt then success."""
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.content = b'<html>OK</html>'
+        mock_response.headers = {'Content-Type': 'text/html'}
+        mock_response.encoding = 'utf-8'
+
+        mock_get.side_effect = [
+            requests.exceptions.Timeout("timeout"),
+            mock_response,
+        ]
+
+        fetcher = WebFetcher(timeout=1, max_retries=3)
+        content, content_type, metadata = fetcher.fetch("https://example.com")
+
+        assert content == b'<html>OK</html>'
+        assert mock_get.call_count == 2
+
+    def test_url_auto_https_prefix(self):
+        """Test URL normalization adds https://."""
+        fetcher = WebFetcher()
+        # We can't fully test since it will try to make a real request,
+        # but we can verify the URL validation logic via mocking.
+        with patch.object(fetcher.session, 'get') as mock_get:
+            mock_response = Mock()
+            mock_response.status_code = 200
+            mock_response.content = b'<html>OK</html>'
+            mock_response.headers = {'Content-Type': 'text/html'}
+            mock_response.encoding = 'utf-8'
+            mock_get.return_value = mock_response
+
+            fetcher.fetch("example.com")
+            # Should have added https:// prefix
+            call_url = mock_get.call_args[0][0]
+            assert call_url.startswith("https://")
+
+    @patch('wayback_diff.fetcher.requests.Session.get')
+    def test_fetch_metadata_includes_headers(self, mock_get):
+        """Test that metadata includes response headers."""
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.content = b'<html>OK</html>'
+        mock_response.headers = {
+            'Content-Type': 'text/html',
+            'X-Custom': 'value'
+        }
+        mock_response.encoding = 'utf-8'
+        mock_get.return_value = mock_response
+
+        fetcher = WebFetcher()
+        _, _, metadata = fetcher.fetch("https://example.com")
+
+        assert metadata['headers']['X-Custom'] == 'value'
+
+    @patch('wayback_diff.fetcher.requests.Session.get')
+    def test_fetch_metadata_includes_encoding(self, mock_get):
+        """Test that metadata includes encoding."""
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.content = b'<html>OK</html>'
+        mock_response.headers = {'Content-Type': 'text/html; charset=iso-8859-1'}
+        mock_response.encoding = 'iso-8859-1'
+        mock_get.return_value = mock_response
+
+        fetcher = WebFetcher()
+        _, _, metadata = fetcher.fetch("https://example.com")
+
+        assert metadata['encoding'] == 'iso-8859-1'
+
+    @patch('wayback_diff.fetcher.requests.Session.get')
+    def test_fetch_no_encoding(self, mock_get):
+        """Test fetch when response has no encoding."""
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.content = b'<html>OK</html>'
+        mock_response.headers = {'Content-Type': 'text/html'}
+        mock_response.encoding = None
+        mock_get.return_value = mock_response
+
+        fetcher = WebFetcher()
+        _, _, metadata = fetcher.fetch("https://example.com")
+
+        assert metadata['encoding'] is None
+
+    @patch('wayback_diff.fetcher.requests.Session.get')
+    def test_fetch_private_ip_allowed(self, mock_get):
+        """Test that private IP addresses pass through (logged but allowed)."""
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.content = b'<html>OK</html>'
+        mock_response.headers = {'Content-Type': 'text/html'}
+        mock_response.encoding = 'utf-8'
+        mock_get.return_value = mock_response
+
+        fetcher = WebFetcher()
+        content, _, _ = fetcher.fetch("https://192.168.1.1/page")
+        assert content is not None
+
+    @patch('wayback_diff.fetcher.requests.Session.get')
+    def test_fetch_localhost_allowed(self, mock_get):
+        """Test that localhost passes through."""
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.content = b'<html>OK</html>'
+        mock_response.headers = {'Content-Type': 'text/html'}
+        mock_response.encoding = 'utf-8'
+        mock_get.return_value = mock_response
+
+        fetcher = WebFetcher()
+        content, _, _ = fetcher.fetch("https://localhost:8080/page")
+        assert content is not None
diff --git a/tests/test_link_traverser.py b/tests/test_link_traverser.py
new file mode 100644
index 0000000..4ca412b
--- /dev/null
+++ b/tests/test_link_traverser.py
@@ -0,0 +1,657 @@
+"""Tests for link traverser."""
+
+import pytest
+from unittest.mock import patch, Mock, MagicMock
+from wayback_diff.link_traverser import LinkTraverser
+
+
+class TestLinkTraverserInit:
+    """Test LinkTraverser initialization."""
+
+    def test_init_basic(self):
+        """Test basic initialization."""
+        traverser = LinkTraverser("https://example.com", "https://example.org")
+        assert traverser.base_url1 == "https://example.com"
+        assert traverser.base_url2 == "https://example.org"
+        assert traverser.max_depth == 2
+        assert traverser.max_pages == 50
+        assert traverser.same_domain_only is True
+
+    def test_init_custom(self):
+        """Test custom initialization."""
+        traverser = LinkTraverser(
+            "https://a.com", "https://b.com",
+            max_depth=5, max_pages=100, same_domain_only=False
+        )
+        assert traverser.max_depth == 5
+        assert traverser.max_pages == 100
+        assert traverser.same_domain_only is False
+
+    def test_init_domain_extraction(self):
+        """Test domain extraction from URLs."""
+        traverser = LinkTraverser("https://www.example.com/path", "https://example.org/path")
+        assert traverser.domain1 == "example.com"
+        assert traverser.domain2 == "example.org"
+
+    def test_init_empty_results(self):
+        """Test that results start empty."""
+        traverser = LinkTraverser("https://a.com", "https://b.com")
+        assert traverser.results == []
+        assert len(traverser.visited) == 0
+
+
+class TestNormalizeUrl:
+    """Test URL normalization."""
+
+    def test_normalize_absolute_url(self):
+        """Test normalizing absolute URL."""
+        traverser = LinkTraverser("https://a.com", "https://b.com")
+        result = traverser._normalize_url("https://example.com/page/")
+        assert result == "https://example.com/page"
+
+    def test_normalize_removes_trailing_slash(self):
+        """Test trailing slash removal."""
+        traverser = LinkTraverser("https://a.com", "https://b.com")
+        result = traverser._normalize_url("https://example.com/path/")
+        assert not result.endswith("/")
+
+    def test_normalize_root_keeps_slash(self):
+        """Test root path keeps slash."""
+        traverser = LinkTraverser("https://a.com", "https://b.com")
+        result = traverser._normalize_url("https://example.com/")
+        assert result.endswith("/")
+
+    def test_normalize_relative_url(self):
+        """Test normalizing relative URL with base."""
+        traverser = LinkTraverser("https://a.com", "https://b.com")
+        result = traverser._normalize_url("/page", "https://example.com/")
+        assert result == "https://example.com/page"
+
+    def test_normalize_relative_url_no_base(self):
+        """Test normalizing relative URL without base returns as-is."""
+        traverser = LinkTraverser("https://a.com", "https://b.com")
+        result = traverser._normalize_url("/page")
+        assert result == "/page"
+
+    def test_normalize_sorts_query_params(self):
+        """Test query parameter sorting."""
+        traverser = LinkTraverser("https://a.com", "https://b.com")
+        result = traverser._normalize_url("https://example.com/page?z=1&a=2")
+        assert "a=2&z=1" in result
+
+    def test_normalize_lowercases_netloc(self):
+        """Test netloc lowercasing."""
+        traverser = LinkTraverser("https://a.com", "https://b.com")
+        result = traverser._normalize_url("https://EXAMPLE.COM/page")
+        assert "example.com" in result
+
+
+class TestIsSameDomain:
+    """Test same domain checking."""
+
+    def test_same_domain(self):
+        """Test matching domain."""
+        traverser = LinkTraverser("https://a.com", "https://b.com")
+        assert traverser._is_same_domain("https://example.com/page", "example.com") is True
+
+    def test_different_domain(self):
+        """Test non-matching domain."""
+        traverser = LinkTraverser("https://a.com", "https://b.com")
+        assert traverser._is_same_domain("https://other.com/page", "example.com") is False
+
+    def test_empty_domain(self):
+        """Test empty domain (relative URL)."""
+        traverser = LinkTraverser("https://a.com", "https://b.com")
+        assert traverser._is_same_domain("/relative/path", "example.com") is True
+
+    def test_www_prefix(self):
+        """Test www prefix stripping."""
+        traverser = LinkTraverser("https://a.com", "https://b.com")
+        assert traverser._is_same_domain("https://www.example.com/page", "example.com") is True
+
+
+class TestExtractLinks:
+    """Test link extraction."""
+
+    def test_extract_basic_links(self):
+        """Test extracting basic links."""
+        traverser = LinkTraverser("https://example.com", "https://other.com")
+        html = b'<html><body><a href="https://example.com/page1">Link 1</a><a href="https://example.com/page2">Link 2</a></body></html>'
+        links = traverser._extract_links(html, "https://example.com/")
+        assert len(links) >= 1
+
+    def test_extract_links_skips_anchors(self):
+        """Test that anchor-only links are skipped."""
+        traverser = LinkTraverser("https://example.com", "https://b.com")
+        html = b'<html><body><a href="#section">Anchor</a></body></html>'
+        links = traverser._extract_links(html, "https://example.com/")
+        assert len(links) == 0
+
+    def test_extract_links_skips_javascript(self):
+        """Test that javascript: links are skipped."""
+        traverser = LinkTraverser("https://example.com", "https://b.com")
+        html = b'<html><body><a href="javascript:void(0)">JS</a></body></html>'
+        links = traverser._extract_links(html, "https://example.com/")
+        assert len(links) == 0
+
+    def test_extract_links_skips_mailto(self):
+        """Test that mailto: links are skipped."""
+        traverser = LinkTraverser("https://example.com", "https://b.com")
+        html = b'<html><body><a href="mailto:test@example.com">Email</a></body></html>'
+        links = traverser._extract_links(html, "https://example.com/")
+        assert len(links) == 0
+
+    def test_extract_links_skips_tel(self):
+        """Test that tel: links are skipped."""
+        traverser = LinkTraverser("https://example.com", "https://b.com")
+        html = b'<html><body><a href="tel:+1234567890">Call</a></body></html>'
+        links = traverser._extract_links(html, "https://example.com/")
+        assert len(links) == 0
+
+    def test_extract_links_skips_sms(self):
+        """Test that sms: links are skipped."""
+        traverser = LinkTraverser("https://example.com", "https://b.com")
+        html = b'<html><body><a href="sms:+1234567890">SMS</a></body></html>'
+        links = traverser._extract_links(html, "https://example.com/")
+        assert len(links) == 0
+
+    def test_extract_links_deduplicates(self):
+        """Test that duplicate links are removed."""
+        traverser = LinkTraverser("https://example.com", "https://b.com")
+        html = b'<html><body><a href="https://example.com/page">A</a><a href="https://example.com/page">B</a></body></html>'
+        links = traverser._extract_links(html, "https://example.com/")
+        # Should be deduplicated
+        assert len(links) == len(set(links))
+
+    def test_extract_links_same_domain_only(self):
+        """Test same domain filtering."""
+        traverser = LinkTraverser("https://example.com", "https://b.com",
+                                  same_domain_only=True)
+        html = b'<html><body><a href="https://example.com/page">Same</a><a href="https://other.com/page">Other</a></body></html>'
+        links = traverser._extract_links(html, "https://example.com/")
+        # Should only include same-domain links
+        for link in links:
+            assert 'other.com' not in link
+
+    def test_extract_links_handles_error(self):
+        """Test error handling in link extraction."""
+        traverser = LinkTraverser("https://example.com", "https://b.com")
+        # Invalid HTML should not crash
+        links = traverser._extract_links(b'not html at all <<<>>>', "https://example.com/")
+        assert isinstance(links, list)
+
+    def test_extract_links_empty_href(self):
+        """Test handling of empty href."""
+        traverser = LinkTraverser("https://example.com", "https://b.com")
+        html = b'<html><body><a href="">Empty</a></body></html>'
+        links = traverser._extract_links(html, "https://example.com/")
+        assert isinstance(links, list)
+
+    def test_extract_links_wayback_url_extraction(self):
+        """Test extracting original URLs from Wayback links."""
+        traverser = LinkTraverser(
+            "https://web.archive.org/web/20230101/https://example.com",
+            "https://example.com",
+            same_domain_only=False
+        )
+        html = b'<html><body><a href="/web/20230101/https://example.com/page">Link</a></body></html>'
+        links = traverser._extract_links(html, "https://web.archive.org/web/20230101/https://example.com/")
+        # Should extract the original URL
+        if links:
+            assert any('example.com' in link for link in links)
+
+    def test_extract_links_skips_email_at_sign(self):
+        """Test that URLs with @ but not mailto are skipped."""
+        traverser = LinkTraverser("https://example.com", "https://b.com")
+        html = b'<html><body><a href="user@example.com">User</a></body></html>'
+        links = traverser._extract_links(html, "https://example.com/")
+        assert len(links) == 0
+
+    def test_extract_links_relative(self):
+        """Test extracting relative links."""
+        traverser = LinkTraverser("https://example.com", "https://b.com",
+                                  same_domain_only=True)
+        html = b'<html><body><a href="/about">About</a></body></html>'
+        links = traverser._extract_links(html, "https://example.com/")
+        if links:
+            assert any('example.com' in link for link in links)
+
+
+class TestGetMatchingUrl:
+    """Test matching URL generation."""
+
+    def test_basic_matching(self):
+        """Test basic URL matching."""
+        traverser = LinkTraverser("https://old.example.com", "https://new.example.com")
+        result = traverser._get_matching_url("https://old.example.com/page")
+        assert result == "https://new.example.com/page"
+
+    def test_matching_with_query(self):
+        """Test URL matching preserves query string."""
+        traverser = LinkTraverser("https://old.com", "https://new.com")
+        result = traverser._get_matching_url("https://old.com/page?q=test")
+        assert "?q=test" in result
+
+    def test_matching_wayback_url(self):
+        """Test URL matching with Wayback URL."""
+        traverser = LinkTraverser(
+            "https://web.archive.org/web/20230101/https://example.com",
+            "https://example.com"
+        )
+        result = traverser._get_matching_url(
+            "https://web.archive.org/web/20230101/https://example.com/page"
+        )
+        assert result is not None
+        assert "example.com" in result
+
+
+class TestComparePage:
+    """Test page comparison."""
+
+    @patch.object(LinkTraverser, '__init__', lambda self, *a, **kw: None)
+    def test_compare_page_success(self):
+        """Test successful page comparison."""
+        traverser = LinkTraverser.__new__(LinkTraverser)
+        traverser.fetcher = Mock()
+        traverser.diff_engine = Mock()
+        traverser.same_domain_only = True
+
+        traverser.fetcher.fetch.side_effect = [
+            (b'<html>Page1</html>', 'text/html', {}),
+            (b'<html>Page2</html>', 'text/html', {}),
+        ]
+        traverser.diff_engine.extract_meaningful_changes.return_value = []
+        traverser.diff_engine.get_summary.return_value = {
+            'total_changes': 0, 'high_significance': 0
+        }
+
+        result = traverser.compare_page("https://a.com", "https://b.com")
+        assert result['status'] == 'compared'
+        assert 'links1' in result
+        assert 'links2' in result
+
+    @patch.object(LinkTraverser, '__init__', lambda self, *a, **kw: None)
+    def test_compare_page_fetch_failure(self):
+        """Test page comparison when fetch fails."""
+        traverser = LinkTraverser.__new__(LinkTraverser)
+        traverser.fetcher = Mock()
+        traverser.diff_engine = Mock()
+
+        traverser.fetcher.fetch.side_effect = [
+            (None, None, {}),
+            (b'<html>OK</html>', 'text/html', {}),
+        ]
+
+        result = traverser.compare_page("https://a.com/bad", "https://b.com/good")
+        assert result['status'] == 'error'
+
+    @patch.object(LinkTraverser, '__init__', lambda self, *a, **kw: None)
+    def test_compare_page_both_fail(self):
+        """Test page comparison when both fetches fail."""
+        traverser = LinkTraverser.__new__(LinkTraverser)
+        traverser.fetcher = Mock()
+        traverser.diff_engine = Mock()
+
+        traverser.fetcher.fetch.return_value = (None, None, {})
+
+        result = traverser.compare_page("https://a.com/bad", "https://b.com/bad")
+        assert result['status'] == 'error'
+
+    @patch.object(LinkTraverser, '__init__', lambda self, *a, **kw: None)
+    def test_compare_page_cleans_wayback(self):
+        """Test that Wayback artifacts are cleaned during comparison."""
+        traverser = LinkTraverser.__new__(LinkTraverser)
+        traverser.fetcher = Mock()
+        traverser.diff_engine = Mock()
+        traverser.same_domain_only = True
+
+        traverser.fetcher.fetch.side_effect = [
+            (b'<html>WB content</html>', 'text/html', {}),
+            (b'<html>Clean content</html>', 'text/html', {}),
+        ]
+        traverser.diff_engine.extract_meaningful_changes.return_value = []
+        traverser.diff_engine.get_summary.return_value = {
+            'total_changes': 0, 'high_significance': 0
+        }
+
+        wb_url = "https://web.archive.org/web/20230101/https://example.com"
+        with patch('wayback_diff.link_traverser.WaybackCleaner') as mock_cleaner:
+            mock_cleaner.is_wayback_url.side_effect = [True, False]
+            mock_cleaner.clean_wayback_html.return_value = b'<html>Cleaned</html>'
+
+            result = traverser.compare_page(wb_url, "https://example.com")
+            mock_cleaner.clean_wayback_html.assert_called_once()
+
+
+class TestTraverseAndCompare:
+    """Test traversal logic."""
+
+    @patch.object(LinkTraverser, 'compare_page')
+    def test_traverse_single_page(self, mock_compare):
+        """Test traversal of single page (max_depth=0)."""
+        traverser = LinkTraverser("https://a.com", "https://b.com", max_depth=0, max_pages=10)
+
+        mock_compare.return_value = {
+            'url1': 'https://a.com',
+            'url2': 'https://b.com',
+            'status': 'compared',
+            'summary': {'total_changes': 0, 'high_significance': 0},
+            'changes_count': 0,
+            'high_significance': 0,
+            'links1': [],
+            'links2': [],
+        }
+
+        results = traverser.traverse_and_compare()
+        assert len(results) == 1
+        assert results[0]['status'] == 'compared'
+
+    @patch.object(LinkTraverser, 'compare_page')
+    def test_traverse_max_pages_limit(self, mock_compare):
+        """Test that traversal respects max_pages limit."""
+        traverser = LinkTraverser("https://a.com", "https://b.com",
+                                  max_depth=5, max_pages=2)
+
+        call_count = [0]
+        def mock_compare_fn(url1, url2):
+            call_count[0] += 1
+            links = [f"https://a.com/page{i}" for i in range(10)]
+            return {
+                'url1': url1,
+                'url2': url2,
+                'status': 'compared',
+                'summary': {'total_changes': 0, 'high_significance': 0},
+                'changes_count': 0,
+                'high_significance': 0,
+                'links1': links,
+                'links2': [],
+            }
+
+        mock_compare.side_effect = mock_compare_fn
+
+        results = traverser.traverse_and_compare()
+        assert len(results) <= 2
+
+    @patch.object(LinkTraverser, 'compare_page')
+    def test_traverse_skips_visited(self, mock_compare):
+        """Test that already-visited URLs are skipped."""
+        traverser = LinkTraverser("https://a.com", "https://b.com",
+                                  max_depth=2, max_pages=10)
+
+        mock_compare.return_value = {
+            'url1': 'https://a.com',
+            'url2': 'https://b.com',
+            'status': 'compared',
+            'summary': {'total_changes': 0, 'high_significance': 0},
+            'changes_count': 0,
+            'high_significance': 0,
+            'links1': ['https://a.com/'],  # Link back to base
+            'links2': [],
+        }
+
+        results = traverser.traverse_and_compare()
+        # Should only compare once (the initial page)
+        assert mock_compare.call_count == 1
+
+    @patch.object(LinkTraverser, 'compare_page')
+    def test_traverse_error_results_no_links(self, mock_compare):
+        """Test that error results don't produce link traversal."""
+        traverser = LinkTraverser("https://a.com", "https://b.com",
+                                  max_depth=2, max_pages=10)
+
+        mock_compare.return_value = {
+            'url1': 'https://a.com',
+            'url2': 'https://b.com',
+            'status': 'error',
+            'error': 'Fetch failed',
+        }
+
+        results = traverser.traverse_and_compare()
+        assert len(results) == 1
+        assert results[0]['status'] == 'error'
+
+
+class TestExtractLinksWaybackBranches:
+    """Test Wayback-specific link extraction branches."""
+
+    def test_extract_links_wayback_relative_path(self):
+        """Test extracting relative Wayback path links."""
+        traverser = LinkTraverser(
+            "https://web.archive.org/web/20230101/https://example.com",
+            "https://example.com",
+            same_domain_only=False
+        )
+        # A relative Wayback link that has no full URL, just a path fragment
+        html = b'<html><body><a href="/web/20230101/somepage">Link</a></body></html>'
+        links = traverser._extract_links(
+            html, "https://web.archive.org/web/20230101/https://example.com/"
+        )
+        # Should attempt to construct URL from base
+        assert isinstance(links, list)
+
+    def test_extract_links_wayback_no_base_match(self):
+        """Test Wayback link extraction when base URL has no extractable domain."""
+        traverser = LinkTraverser(
+            "https://web.archive.org/web/20230101/https://example.com",
+            "https://example.com",
+            same_domain_only=False
+        )
+        html = b'<html><body><a href="/web/20230101/relative">Link</a></body></html>'
+        # Use a base_url that doesn't match the expected pattern
+        links = traverser._extract_links(html, "https://web.archive.org/weird/path")
+        assert isinstance(links, list)
+
+    def test_extract_links_wayback_domain_filtering(self):
+        """Test Wayback domain filtering for same_domain_only."""
+        traverser = LinkTraverser(
+            "https://web.archive.org/web/20230101/https://example.com",
+            "https://example.com",
+            same_domain_only=True
+        )
+        html = b'''<html><body>
+            <a href="https://example.com/page">Same</a>
+            <a href="https://other.com/page">Other</a>
+        </body></html>'''
+        links = traverser._extract_links(
+            html, "https://web.archive.org/web/20230101/https://example.com/"
+        )
+        for link in links:
+            assert 'other.com' not in link
+
+    def test_extract_links_non_wayback_cross_domain_filtered(self):
+        """Test non-Wayback cross domain links are filtered."""
+        traverser = LinkTraverser(
+            "https://example.com",
+            "https://example.org",
+            same_domain_only=True
+        )
+        html = b'<html><body><a href="https://external.com/page">Ext</a></body></html>'
+        links = traverser._extract_links(html, "https://example.com/")
+        for link in links:
+            assert 'external.com' not in link
+
+
+class TestTraverseAndCompareAdvanced:
+    """Test advanced traversal scenarios."""
+
+    @patch.object(LinkTraverser, 'compare_page')
+    @patch.object(LinkTraverser, '_get_matching_url')
+    def test_traverse_follows_links(self, mock_match, mock_compare):
+        """Test that traversal follows links from first page."""
+        traverser = LinkTraverser("https://a.com", "https://b.com",
+                                  max_depth=1, max_pages=5)
+
+        mock_match.return_value = "https://b.com/page1"
+
+        call_count = [0]
+        def compare_side_effect(url1, url2):
+            call_count[0] += 1
+            if call_count[0] == 1:
+                return {
+                    'url1': url1,
+                    'url2': url2,
+                    'status': 'compared',
+                    'summary': {'total_changes': 0, 'high_significance': 0},
+                    'changes_count': 0,
+                    'high_significance': 0,
+                    'links1': ['https://a.com/page1'],
+                    'links2': [],
+                }
+            return {
+                'url1': url1,
+                'url2': url2,
+                'status': 'compared',
+                'summary': {'total_changes': 0, 'high_significance': 0},
+                'changes_count': 0,
+                'high_significance': 0,
+                'links1': [],
+                'links2': [],
+            }
+
+        mock_compare.side_effect = compare_side_effect
+
+        results = traverser.traverse_and_compare()
+        assert len(results) == 2
+
+    @patch.object(LinkTraverser, 'compare_page')
+    def test_traverse_depth_limit(self, mock_compare):
+        """Test that traversal respects depth limit."""
+        traverser = LinkTraverser("https://a.com", "https://b.com",
+                                  max_depth=0, max_pages=100)
+
+        mock_compare.return_value = {
+            'url1': 'https://a.com',
+            'url2': 'https://b.com',
+            'status': 'compared',
+            'summary': {'total_changes': 0, 'high_significance': 0},
+            'changes_count': 0,
+            'high_significance': 0,
+            'links1': ['https://a.com/deep1', 'https://a.com/deep2'],
+            'links2': [],
+        }
+
+        results = traverser.traverse_and_compare()
+        # With max_depth=0, should only compare the initial page
+        assert len(results) == 1
+
+    @patch.object(LinkTraverser, 'compare_page')
+    def test_traverse_link_processing_error(self, mock_compare):
+        """Test that errors in link processing are handled gracefully."""
+        traverser = LinkTraverser("https://a.com", "https://b.com",
+                                  max_depth=1, max_pages=10)
+
+        mock_compare.return_value = {
+            'url1': 'https://a.com',
+            'url2': 'https://b.com',
+            'status': 'compared',
+            'summary': {'total_changes': 0, 'high_significance': 0},
+            'changes_count': 0,
+            'high_significance': 0,
+            'links1': ['not-a-valid-url', '', None],
+            'links2': [],
+        }
+
+        # Should not crash
+        results = traverser.traverse_and_compare()
+        assert len(results) >= 1
+
+
+class TestGetMatchingUrlAdvanced:
+    """Test advanced URL matching scenarios."""
+
+    def test_matching_wayback_url_no_original(self):
+        """Test URL matching with Wayback URL that has no extractable original."""
+        traverser = LinkTraverser(
+            "https://web.archive.org/web/20230101/https://old.com",
+            "https://new.com"
+        )
+        result = traverser._get_matching_url(
+            "https://web.archive.org/web/20230101/https://old.com/page?q=1"
+        )
+        assert result is not None
+        assert "new.com" in result
+
+    def test_matching_non_wayback_with_path(self):
+        """Test URL matching preserves path."""
+        traverser = LinkTraverser("https://old.com", "https://new.com")
+        result = traverser._get_matching_url("https://old.com/deep/path/page.html")
+        assert result == "https://new.com/deep/path/page.html"
+
+
+class TestGenerateReport:
+    """Test report generation."""
+
+    def test_generate_report_empty(self):
+        """Test report with no results."""
+        traverser = LinkTraverser("https://a.com", "https://b.com")
+        report = traverser.generate_report()
+        assert 'LINK TRAVERSAL COMPARISON REPORT' in report
+        assert 'Pages compared: 0' in report
+
+    def test_generate_report_with_results(self):
+        """Test report with comparison results."""
+        traverser = LinkTraverser("https://a.com", "https://b.com")
+        traverser.results = [
+            {
+                'url1': 'https://a.com/',
+                'url2': 'https://b.com/',
+                'status': 'compared',
+                'summary': {
+                    'total_changes': 5,
+                    'high_significance': 2,
+                    'medium_significance': 2,
+                    'low_significance': 1,
+                },
+                'changes_count': 5,
+                'high_significance': 2,
+            }
+        ]
+        report = traverser.generate_report()
+        assert 'Pages compared: 1' in report
+        assert 'Successfully compared: 1' in report
+        assert 'HIGH SIGNIFICANCE DIFFERENCES' in report
+        assert 'https://a.com/' in report
+
+    def test_generate_report_with_errors(self):
+        """Test report with error results."""
+        traverser = LinkTraverser("https://a.com", "https://b.com")
+        traverser.results = [
+            {
+                'url1': 'https://a.com/bad',
+                'url2': 'https://b.com/bad',
+                'status': 'error',
+                'error': 'Connection refused',
+            }
+        ]
+        report = traverser.generate_report()
+        assert 'Errors: 1' in report
+        assert 'Connection refused' in report
+
+    def test_generate_report_mixed_results(self):
+        """Test report with mixed compared and error results."""
+        traverser = LinkTraverser("https://a.com", "https://b.com")
+        traverser.results = [
+            {
+                'url1': 'https://a.com/',
+                'url2': 'https://b.com/',
+                'status': 'compared',
+                'summary': {
+                    'total_changes': 0,
+                    'high_significance': 0,
+                    'medium_significance': 0,
+                    'low_significance': 0,
+                },
+                'changes_count': 0,
+                'high_significance': 0,
+            },
+            {
+                'url1': 'https://a.com/broken',
+                'url2': 'https://b.com/broken',
+                'status': 'error',
+                'error': 'Timeout',
+            }
+        ]
+        report = traverser.generate_report()
+        assert 'Pages compared: 2' in report
+        assert 'Successfully compared: 1' in report
+        assert 'Errors: 1' in report
diff --git a/tests/test_report_generator.py b/tests/test_report_generator.py
new file mode 100644
index 0000000..8ba98a6
--- /dev/null
+++ b/tests/test_report_generator.py
@@ -0,0 +1,444 @@
+"""Tests for markdown report generator."""
+
+import os
+import pytest
+import tempfile
+import shutil
+from pathlib import Path
+from unittest.mock import patch
+from wayback_diff.report_generator import MarkdownReportGenerator
+
+
+class TestMarkdownReportGeneratorInit:
+    """Test report generator initialization."""
+
+    def test_init_creates_directory(self):
+        """Test that output directory is created."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            output_dir = os.path.join(tmpdir, "reports")
+            gen = MarkdownReportGenerator(output_dir=output_dir)
+            assert os.path.isdir(output_dir)
+
+    def test_init_default_dir(self):
+        """Test default output directory."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            old_cwd = os.getcwd()
+            try:
+                os.chdir(tmpdir)
+                gen = MarkdownReportGenerator()
+                assert gen.output_dir == Path("./reports")
+            finally:
+                os.chdir(old_cwd)
+                # Clean up if created
+                reports_dir = os.path.join(tmpdir, "reports")
+                if os.path.exists(reports_dir):
+                    shutil.rmtree(reports_dir)
+
+
+class TestGenerateComparisonReport:
+    """Test report generation."""
+
+    def _make_summary(self, total=5, added=2, removed=1, modified=2,
+                      high=1, medium=2, low=2):
+        return {
+            'total_changes': total,
+            'added': added,
+            'removed': removed,
+            'modified': modified,
+            'high_significance': high,
+            'medium_significance': medium,
+            'low_significance': low,
+        }
+
+    def _make_change(self, change_type='modified', old_text='Old content',
+                     new_text='New content', significance='high'):
+        return {
+            'type': change_type,
+            'old_text': old_text,
+            'new_text': new_text,
+            'significance': significance,
+        }
+
+    def test_basic_report(self):
+        """Test basic report generation."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            gen = MarkdownReportGenerator(output_dir=tmpdir)
+            report = gen.generate_comparison_report(
+                "https://old.example.com",
+                "https://new.example.com",
+                [],
+                self._make_summary(total=0, added=0, removed=0, modified=0,
+                                   high=0, medium=0, low=0)
+            )
+
+            assert '# Website Comparison Report' in report
+            assert 'https://old.example.com' in report
+            assert 'https://new.example.com' in report
+            assert '**Total Changes:** 0' in report
+
+    def test_report_with_high_changes(self):
+        """Test report with high significance changes."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            gen = MarkdownReportGenerator(output_dir=tmpdir)
+            changes = [self._make_change(significance='high')]
+            report = gen.generate_comparison_report(
+                "https://a.com", "https://b.com",
+                changes, self._make_summary()
+            )
+
+            assert '## High Significance Changes' in report
+            assert 'MODIFIED' in report
+            assert 'Old content' in report
+            assert 'New content' in report
+
+    def test_report_with_medium_changes(self):
+        """Test report with medium significance changes."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            gen = MarkdownReportGenerator(output_dir=tmpdir)
+            changes = [self._make_change(significance='medium') for _ in range(3)]
+            report = gen.generate_comparison_report(
+                "https://a.com", "https://b.com",
+                changes, self._make_summary(high=0, medium=3, low=0)
+            )
+
+            assert '## Medium Significance Changes' in report
+            assert '**Total:** 3 changes' in report
+
+    def test_report_medium_changes_truncation(self):
+        """Test medium changes are truncated after 10."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            gen = MarkdownReportGenerator(output_dir=tmpdir)
+            changes = [self._make_change(significance='medium') for _ in range(15)]
+            report = gen.generate_comparison_report(
+                "https://a.com", "https://b.com",
+                changes, self._make_summary(total=15, high=0, medium=15, low=0)
+            )
+
+            assert '... and 5 more medium significance changes' in report
+
+    def test_report_high_changes_truncation(self):
+        """Test high changes are truncated after 50."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            gen = MarkdownReportGenerator(output_dir=tmpdir)
+            changes = [self._make_change(significance='high') for _ in range(55)]
+            report = gen.generate_comparison_report(
+                "https://a.com", "https://b.com",
+                changes, self._make_summary(total=55, high=55, medium=0, low=0)
+            )
+
+            assert '... and 5 more high significance changes' in report
+
+    def test_report_added_change(self):
+        """Test report with added change (no old_text)."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            gen = MarkdownReportGenerator(output_dir=tmpdir)
+            changes = [self._make_change(change_type='added', old_text='', new_text='New')]
+            report = gen.generate_comparison_report(
+                "https://a.com", "https://b.com",
+                changes, self._make_summary(total=1, added=1, removed=0, modified=0)
+            )
+
+            assert 'ADDED' in report
+            assert '**Added/New:**' in report
+
+    def test_report_removed_change(self):
+        """Test report with removed change (no new_text)."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            gen = MarkdownReportGenerator(output_dir=tmpdir)
+            changes = [self._make_change(change_type='removed', old_text='Gone', new_text='')]
+            report = gen.generate_comparison_report(
+                "https://a.com", "https://b.com",
+                changes, self._make_summary(total=1, added=0, removed=1, modified=0)
+            )
+
+            assert 'REMOVED' in report
+            assert '**Removed/Changed:**' in report
+
+    def test_report_long_text_truncation(self):
+        """Test that long text is truncated in report."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            gen = MarkdownReportGenerator(output_dir=tmpdir)
+            long_text = 'A' * 500
+            changes = [self._make_change(old_text=long_text, new_text=long_text)]
+            report = gen.generate_comparison_report(
+                "https://a.com", "https://b.com",
+                changes, self._make_summary()
+            )
+
+            assert '...' in report
+
+    def test_report_recommendations_high_significance(self):
+        """Test recommendations when high significance changes exist."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            gen = MarkdownReportGenerator(output_dir=tmpdir)
+            report = gen.generate_comparison_report(
+                "https://a.com", "https://b.com",
+                [], self._make_summary(high=5)
+            )
+
+            assert 'Action Required' in report
+
+    def test_report_recommendations_minimal_changes(self):
+        """Test recommendations when changes are minimal."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            gen = MarkdownReportGenerator(output_dir=tmpdir)
+            report = gen.generate_comparison_report(
+                "https://a.com", "https://b.com",
+                [], self._make_summary(total=2, high=0, medium=2, low=0)
+            )
+
+            assert 'Migration Status' in report
+
+    def test_report_with_visual_results(self):
+        """Test report with visual comparison results."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            gen = MarkdownReportGenerator(output_dir=tmpdir)
+
+            visual_results = {
+                'chrome': {
+                    'difference_ratio': 0.02,
+                    'different_pixels': 500,
+                    'total_pixels': 100000,
+                    'screenshot1': '',
+                    'screenshot2': '',
+                    'comparison': '',
+                }
+            }
+
+            report = gen.generate_comparison_report(
+                "https://a.com", "https://b.com",
+                [], self._make_summary(total=0, high=0, medium=0, low=0),
+                visual_results=visual_results
+            )
+
+            assert '## Visual Comparison' in report
+            assert 'CHROME' in report
+            assert '2.00%' in report
+            assert 'Minimal differences' in report
+
+    def test_report_visual_high_difference(self):
+        """Test report with high visual difference."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            gen = MarkdownReportGenerator(output_dir=tmpdir)
+
+            visual_results = {
+                'firefox': {
+                    'difference_ratio': 0.15,
+                    'different_pixels': 15000,
+                    'total_pixels': 100000,
+                    'screenshot1': '',
+                    'screenshot2': '',
+                    'comparison': '',
+                }
+            }
+
+            report = gen.generate_comparison_report(
+                "https://a.com", "https://b.com",
+                [], self._make_summary(total=0, high=0, medium=0, low=0),
+                visual_results=visual_results
+            )
+
+            assert 'Significant differences detected' in report
+
+    def test_report_visual_with_error(self):
+        """Test report with visual comparison error."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            gen = MarkdownReportGenerator(output_dir=tmpdir)
+
+            visual_results = {
+                'chrome': {
+                    'error': 'Browser not found',
+                }
+            }
+
+            report = gen.generate_comparison_report(
+                "https://a.com", "https://b.com",
+                [], self._make_summary(total=0, high=0, medium=0, low=0),
+                visual_results=visual_results
+            )
+
+            assert 'Error' in report
+            assert 'Browser not found' in report
+
+    def test_report_visual_recommendations(self):
+        """Test visual-specific recommendations."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            gen = MarkdownReportGenerator(output_dir=tmpdir)
+
+            visual_results = {
+                'chrome': {
+                    'difference_ratio': 0.20,
+                    'different_pixels': 20000,
+                    'total_pixels': 100000,
+                    'screenshot1': '',
+                    'screenshot2': '',
+                    'comparison': '',
+                }
+            }
+
+            report = gen.generate_comparison_report(
+                "https://a.com", "https://b.com",
+                [], self._make_summary(total=0, high=0, medium=0, low=0),
+                visual_results=visual_results
+            )
+
+            assert 'Visual Differences' in report
+            assert 'chrome' in report.lower()
+
+    def test_report_visual_with_screenshots(self):
+        """Test report with actual screenshot files."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            gen = MarkdownReportGenerator(output_dir=tmpdir)
+
+            # Create fake screenshot files
+            screenshots_dir = os.path.join(tmpdir, "ss")
+            os.makedirs(screenshots_dir, exist_ok=True)
+            s1 = os.path.join(screenshots_dir, "s1.png")
+            s2 = os.path.join(screenshots_dir, "s2.png")
+            comp = os.path.join(screenshots_dir, "comp.png")
+            for f in [s1, s2, comp]:
+                with open(f, 'wb') as fh:
+                    fh.write(b'\x89PNG\r\n')  # PNG header bytes
+
+            visual_results = {
+                'chrome': {
+                    'difference_ratio': 0.05,
+                    'different_pixels': 5000,
+                    'total_pixels': 100000,
+                    'screenshot1': s1,
+                    'screenshot2': s2,
+                    'comparison': comp,
+                }
+            }
+
+            report = gen.generate_comparison_report(
+                "https://a.com", "https://b.com",
+                [], self._make_summary(total=0, high=0, medium=0, low=0),
+                visual_results=visual_results
+            )
+
+            assert '![' in report  # Image references
+            assert 'screenshots/' in report
+
+    def test_report_with_traversal_results(self):
+        """Test report with traversal results."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            gen = MarkdownReportGenerator(output_dir=tmpdir)
+
+            traversal_results = [
+                {
+                    'url1': 'https://a.com/',
+                    'url2': 'https://b.com/',
+                    'status': 'compared',
+                    'high_significance': 3,
+                    'changes_count': 10,
+                },
+                {
+                    'url1': 'https://a.com/page2',
+                    'url2': 'https://b.com/page2',
+                    'status': 'error',
+                    'error': 'Timeout',
+                }
+            ]
+
+            report = gen.generate_comparison_report(
+                "https://a.com", "https://b.com",
+                [], self._make_summary(total=0, high=0, medium=0, low=0),
+                traversal_results=traversal_results
+            )
+
+            assert '## Site-Wide Comparison' in report
+            assert '**Pages Compared:** 1' in report
+            assert '**Pages with Errors:** 1' in report
+            assert 'High Significance Differences' in report
+
+    def test_report_traversal_long_urls(self):
+        """Test traversal report truncates long URLs."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            gen = MarkdownReportGenerator(output_dir=tmpdir)
+
+            long_url = 'https://example.com/' + 'a' * 100
+            traversal_results = [
+                {
+                    'url1': long_url,
+                    'url2': long_url,
+                    'status': 'compared',
+                    'high_significance': 0,
+                    'changes_count': 0,
+                }
+            ]
+
+            report = gen.generate_comparison_report(
+                "https://a.com", "https://b.com",
+                [], self._make_summary(total=0, high=0, medium=0, low=0),
+                traversal_results=traversal_results
+            )
+
+            assert '...' in report  # URL should be truncated
+
+    def test_report_footer(self):
+        """Test report footer."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            gen = MarkdownReportGenerator(output_dir=tmpdir)
+            report = gen.generate_comparison_report(
+                "https://a.com", "https://b.com",
+                [], self._make_summary(total=0, high=0, medium=0, low=0)
+            )
+
+            assert 'Report generated by Wayback-Diff' in report
+
+    def test_report_timestamp(self):
+        """Test report contains generation timestamp."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            gen = MarkdownReportGenerator(output_dir=tmpdir)
+            report = gen.generate_comparison_report(
+                "https://a.com", "https://b.com",
+                [], self._make_summary(total=0, high=0, medium=0, low=0)
+            )
+
+            assert '**Generated:**' in report
+
+
+class TestSaveReport:
+    """Test report saving."""
+
+    def test_save_report_auto_filename(self):
+        """Test saving report with auto-generated filename."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            gen = MarkdownReportGenerator(output_dir=tmpdir)
+            path = gen.save_report("# Test Report")
+
+            assert os.path.exists(path)
+            assert path.endswith('.md')
+            assert 'comparison_report_' in path
+
+            with open(path) as f:
+                assert f.read() == '# Test Report'
+
+    def test_save_report_custom_filename(self):
+        """Test saving report with custom filename."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            gen = MarkdownReportGenerator(output_dir=tmpdir)
+            path = gen.save_report("# Custom", filename="custom.md")
+
+            assert os.path.exists(path)
+            assert path.endswith('custom.md')
+
+    def test_save_report_returns_path(self):
+        """Test that save_report returns the file path."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            gen = MarkdownReportGenerator(output_dir=tmpdir)
+            path = gen.save_report("# Report")
+
+            assert isinstance(path, str)
+            assert tmpdir in path
+
+    def test_save_report_unicode(self):
+        """Test saving report with unicode content."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            gen = MarkdownReportGenerator(output_dir=tmpdir)
+            path = gen.save_report("# Informe con acentos y tildes")
+
+            with open(path, encoding='utf-8') as f:
+                content = f.read()
+                assert 'acentos' in content
diff --git a/tests/test_visual_comparison.py b/tests/test_visual_comparison.py
new file mode 100644
index 0000000..27015f6
--- /dev/null
+++ b/tests/test_visual_comparison.py
@@ -0,0 +1,873 @@
+"""Tests for visual comparison module."""
+
+import os
+import io
+import pytest
+import tempfile
+from unittest.mock import patch, Mock, MagicMock, PropertyMock
+from pathlib import Path
+from PIL import Image
+
+from wayback_diff.visual_comparison import (
+    VisualComparison,
+    SELENIUM_AVAILABLE,
+    WEBDRIVER_MANAGER_AVAILABLE,
+)
+
+
+class TestVisualComparisonInit:
+    """Test VisualComparison initialization."""
+
+    def test_init_defaults(self):
+        """Test default initialization."""
+        vc = VisualComparison()
+        assert vc.browser_name == 'chrome'
+        assert vc.headless is True
+        assert vc.viewport_width == 1920
+        assert vc.viewport_height == 1080
+        assert vc.wait_time == 3
+        assert vc.driver is None
+
+    def test_init_custom(self):
+        """Test custom initialization."""
+        vc = VisualComparison(
+            browser='firefox',
+            headless=False,
+            viewport_width=1280,
+            viewport_height=720,
+            wait_time=5
+        )
+        assert vc.browser_name == 'firefox'
+        assert vc.headless is False
+        assert vc.viewport_width == 1280
+        assert vc.viewport_height == 720
+        assert vc.wait_time == 5
+
+    def test_init_all_supported_browsers(self):
+        """Test initialization with all supported browser names."""
+        for browser in ['chrome', 'chromium', 'firefox', 'opera', 'edge']:
+            vc = VisualComparison(browser=browser)
+            assert vc.browser_name == browser
+
+    def test_init_invalid_browser(self):
+        """Test initialization with invalid browser."""
+        with pytest.raises(ValueError, match="Browser must be one of"):
+            VisualComparison(browser='safari')
+
+    def test_init_case_insensitive_browser(self):
+        """Test browser name is lowercased."""
+        vc = VisualComparison(browser='Chrome')
+        assert vc.browser_name == 'chrome'
+
+    @patch('wayback_diff.visual_comparison.SELENIUM_AVAILABLE', False)
+    def test_init_no_selenium(self):
+        """Test initialization when selenium is not available."""
+        with pytest.raises(ImportError, match="Selenium is required"):
+            VisualComparison()
+
+    def test_supported_browsers_list(self):
+        """Test supported browsers class variable."""
+        assert 'chrome' in VisualComparison.SUPPORTED_BROWSERS
+        assert 'firefox' in VisualComparison.SUPPORTED_BROWSERS
+        assert 'chromium' in VisualComparison.SUPPORTED_BROWSERS
+
+    def test_default_viewport_constants(self):
+        """Test default viewport constants."""
+        assert VisualComparison.DEFAULT_VIEWPORT_WIDTH == 1920
+        assert VisualComparison.DEFAULT_VIEWPORT_HEIGHT == 1080
+
+
+class TestDetectAvailableBrowsers:
+    """Test browser detection."""
+
+    @patch('wayback_diff.visual_comparison.SELENIUM_AVAILABLE', False)
+    def test_detect_no_selenium(self):
+        """Test detection when selenium unavailable."""
+        result = VisualComparison.detect_available_browsers()
+        assert result == []
+
+    @patch('wayback_diff.visual_comparison.SELENIUM_AVAILABLE', True)
+    @patch('wayback_diff.visual_comparison.webdriver')
+    def test_detect_chrome_available(self, mock_webdriver):
+        """Test detection when Chrome is available."""
+        mock_driver = Mock()
+        mock_webdriver.Chrome.return_value = mock_driver
+
+        result = VisualComparison.detect_available_browsers()
+        assert 'chrome' in result
+
+    @patch('wayback_diff.visual_comparison.SELENIUM_AVAILABLE', True)
+    @patch('wayback_diff.visual_comparison.webdriver')
+    def test_detect_all_fail_returns_chrome(self, mock_webdriver):
+        """Test fallback to chrome when no browsers detected."""
+        mock_webdriver.Chrome.side_effect = Exception("No chrome")
+        mock_webdriver.Firefox.side_effect = Exception("No firefox")
+        mock_webdriver.Edge.side_effect = Exception("No edge")
+
+        result = VisualComparison.detect_available_browsers()
+        assert result == ['chrome']
+
+
+class TestCompareImages:
+    """Test image comparison."""
+
+    def _create_test_image(self, width=100, height=100, color=(255, 0, 0)):
+        """Create a test image."""
+        img = Image.new('RGB', (width, height), color=color)
+        return img
+
+    def _save_test_image(self, path, width=100, height=100, color=(255, 0, 0)):
+        """Save a test image to path."""
+        img = self._create_test_image(width, height, color)
+        img.save(path)
+        return path
+
+    def test_compare_identical_images(self):
+        """Test comparing identical images."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            img_path = os.path.join(tmpdir, "img.png")
+            self._save_test_image(img_path, color=(100, 100, 100))
+
+            vc = VisualComparison()
+            result = vc.compare_images(img_path, img_path)
+
+            assert float(result['difference_ratio']) == 0.0
+            assert int(result['different_pixels']) == 0
+            assert result['is_similar'] == True
+
+    def test_compare_different_images(self):
+        """Test comparing different images."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            img1_path = os.path.join(tmpdir, "img1.png")
+            img2_path = os.path.join(tmpdir, "img2.png")
+            self._save_test_image(img1_path, color=(255, 0, 0))
+            self._save_test_image(img2_path, color=(0, 0, 255))
+
+            vc = VisualComparison()
+            result = vc.compare_images(img1_path, img2_path)
+
+            assert result['difference_ratio'] > 0
+            assert result['different_pixels'] > 0
+
+    def test_compare_images_different_sizes(self):
+        """Test comparing images of different sizes."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            img1_path = os.path.join(tmpdir, "img1.png")
+            img2_path = os.path.join(tmpdir, "img2.png")
+            self._save_test_image(img1_path, width=100, height=100)
+            self._save_test_image(img2_path, width=200, height=200)
+
+            vc = VisualComparison()
+            result = vc.compare_images(img1_path, img2_path)
+
+            assert 'difference_ratio' in result
+            assert result['total_pixels'] > 0
+
+    def test_compare_images_output_path(self):
+        """Test that comparison image is saved."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            img1_path = os.path.join(tmpdir, "img1.png")
+            img2_path = os.path.join(tmpdir, "img2.png")
+            output_path = os.path.join(tmpdir, "comparison.png")
+            self._save_test_image(img1_path, color=(255, 0, 0))
+            self._save_test_image(img2_path, color=(0, 255, 0))
+
+            vc = VisualComparison()
+            result = vc.compare_images(img1_path, img2_path, output_path=output_path)
+
+            assert os.path.exists(output_path)
+            assert result['comparison_image_path'] == output_path
+
+    def test_compare_images_no_output_path(self):
+        """Test comparison without saving."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            img1_path = os.path.join(tmpdir, "img1.png")
+            img2_path = os.path.join(tmpdir, "img2.png")
+            self._save_test_image(img1_path)
+            self._save_test_image(img2_path)
+
+            vc = VisualComparison()
+            result = vc.compare_images(img1_path, img2_path)
+
+            assert result['comparison_image_path'] is None
+
+    def test_compare_images_rgba(self):
+        """Test comparing RGBA images (converted to RGB)."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            img1_path = os.path.join(tmpdir, "img1.png")
+            img2_path = os.path.join(tmpdir, "img2.png")
+
+            img1 = Image.new('RGBA', (50, 50), (255, 0, 0, 128))
+            img1.save(img1_path)
+            img2 = Image.new('RGBA', (50, 50), (0, 0, 255, 128))
+            img2.save(img2_path)
+
+            vc = VisualComparison()
+            result = vc.compare_images(img1_path, img2_path)
+
+            assert 'difference_ratio' in result
+
+    def test_compare_images_threshold(self):
+        """Test custom threshold parameter."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            img1_path = os.path.join(tmpdir, "img1.png")
+            img2_path = os.path.join(tmpdir, "img2.png")
+            # Clearly different colors
+            self._save_test_image(img1_path, color=(0, 0, 0))
+            self._save_test_image(img2_path, color=(255, 255, 255))
+
+            vc = VisualComparison()
+            # With very high threshold, even big differences are "similar"
+            result_high = vc.compare_images(img1_path, img2_path, threshold=1.0)
+            assert result_high['is_similar'] == True
+
+            # With low threshold, should be different
+            result_low = vc.compare_images(img1_path, img2_path, threshold=0.1)
+            assert int(result_low['different_pixels']) > 0
+
+
+class TestCompareImagesWithoutNumpy:
+    """Test image comparison fallback without numpy."""
+
+    def _save_test_image(self, path, width=20, height=20, color=(255, 0, 0)):
+        """Save a small test image."""
+        img = Image.new('RGB', (width, height), color=color)
+        img.save(path)
+
+    def test_compare_fallback_identical(self):
+        """Test pixel-by-pixel fallback with identical images."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            img_path = os.path.join(tmpdir, "img.png")
+            self._save_test_image(img_path, width=10, height=10, color=(100, 100, 100))
+
+            vc = VisualComparison()
+
+            # Force the ImportError fallback path by making np.array raise
+            import numpy as np
+            with patch.object(np, 'array', side_effect=ImportError("no numpy")):
+                result = vc.compare_images(img_path, img_path)
+
+            assert float(result['difference_ratio']) == 0.0
+            assert int(result['different_pixels']) == 0
+
+    def test_compare_fallback_different(self):
+        """Test pixel-by-pixel fallback with different images."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            img1_path = os.path.join(tmpdir, "img1.png")
+            img2_path = os.path.join(tmpdir, "img2.png")
+            self._save_test_image(img1_path, width=10, height=10, color=(255, 0, 0))
+            self._save_test_image(img2_path, width=10, height=10, color=(0, 0, 255))
+
+            vc = VisualComparison()
+
+            import numpy as np
+            with patch.object(np, 'array', side_effect=ImportError("no numpy")):
+                result = vc.compare_images(img1_path, img2_path)
+
+            assert float(result['difference_ratio']) > 0
+            assert int(result['different_pixels']) > 0
+
+    def test_compare_fallback_with_output(self):
+        """Test pixel-by-pixel fallback saves comparison output."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            img1_path = os.path.join(tmpdir, "img1.png")
+            img2_path = os.path.join(tmpdir, "img2.png")
+            out_path = os.path.join(tmpdir, "comp.png")
+            self._save_test_image(img1_path, width=5, height=5, color=(100, 100, 100))
+            self._save_test_image(img2_path, width=5, height=5, color=(110, 110, 110))
+
+            vc = VisualComparison()
+
+            import numpy as np
+            with patch.object(np, 'array', side_effect=ImportError("no numpy")):
+                result = vc.compare_images(img1_path, img2_path, output_path=out_path)
+
+            assert os.path.exists(out_path)
+
+
+class TestCreateSideBySide:
+    """Test side-by-side comparison image creation."""
+
+    def test_create_side_by_side(self):
+        """Test creating side-by-side image."""
+        vc = VisualComparison()
+        img1 = Image.new('RGB', (100, 100), color=(255, 0, 0))
+        img2 = Image.new('RGB', (100, 100), color=(0, 255, 0))
+        diff = Image.new('RGB', (100, 100), color=(0, 0, 255))
+
+        result = vc._create_side_by_side(img1, img2, diff)
+
+        assert isinstance(result, Image.Image)
+        # Width should be 3 images + spacing
+        assert result.width == 100 * 3 + 20
+        # Height should include label
+        assert result.height == 100 + 40
+
+
+class TestQuit:
+    """Test driver cleanup."""
+
+    def test_quit_with_driver(self):
+        """Test quitting with active driver."""
+        vc = VisualComparison()
+        mock_driver = Mock()
+        vc.driver = mock_driver
+
+        vc.quit()
+
+        mock_driver.quit.assert_called_once()
+        assert vc.driver is None
+
+    def test_quit_without_driver(self):
+        """Test quitting without driver does nothing."""
+        vc = VisualComparison()
+        vc.driver = None
+        vc.quit()  # Should not raise
+
+    def test_quit_driver_exception(self):
+        """Test quitting when driver.quit raises exception."""
+        vc = VisualComparison()
+        mock_driver = Mock()
+        mock_driver.quit.side_effect = Exception("Already closed")
+        vc.driver = mock_driver
+
+        vc.quit()  # Should not raise
+        assert vc.driver is None
+
+
+class TestContextManager:
+    """Test context manager protocol."""
+
+    def test_enter(self):
+        """Test __enter__ returns self."""
+        vc = VisualComparison()
+        assert vc.__enter__() is vc
+
+    def test_exit_calls_quit(self):
+        """Test __exit__ calls quit."""
+        vc = VisualComparison()
+        mock_driver = Mock()
+        vc.driver = mock_driver
+
+        vc.__exit__(None, None, None)
+
+        mock_driver.quit.assert_called_once()
+        assert vc.driver is None
+
+    def test_context_manager_usage(self):
+        """Test using as context manager."""
+        with VisualComparison() as vc:
+            assert isinstance(vc, VisualComparison)
+        assert vc.driver is None
+
+
+class TestTakeScreenshot:
+    """Test screenshot taking."""
+
+    def test_take_screenshot_creates_driver(self):
+        """Test that take_screenshot creates driver if needed."""
+        vc = VisualComparison()
+        assert vc.driver is None
+
+        mock_driver = Mock()
+        mock_driver.get_screenshot_as_png.return_value = b'\x89PNG'
+        mock_driver.execute_script.side_effect = [
+            'complete',  # document.readyState
+            100,  # scrollWidth
+            100,  # scrollHeight
+            100,  # innerWidth
+            100,  # innerHeight
+            None,  # scrollTo
+        ]
+
+        with patch.object(vc, '_create_driver', return_value=mock_driver):
+            # Mock full page screenshot
+            with patch.object(vc, '_take_full_page_screenshot', return_value=b'\x89PNG'):
+                result = vc.take_screenshot("https://example.com")
+
+        assert result == b'\x89PNG'
+
+    def test_take_screenshot_saves_to_file(self):
+        """Test that screenshot is saved when output_path provided."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            vc = VisualComparison()
+            output_path = os.path.join(tmpdir, "screenshot.png")
+
+            mock_driver = Mock()
+            vc.driver = mock_driver
+            mock_driver.execute_script.side_effect = [
+                'complete',  # readyState
+            ]
+
+            with patch.object(vc, '_take_full_page_screenshot', return_value=b'\x89PNG\r\n\x1a\n'):
+                result = vc.take_screenshot("https://example.com", output_path=output_path)
+
+            assert os.path.exists(output_path)
+
+    def test_take_screenshot_viewport_only(self):
+        """Test viewport-only screenshot."""
+        vc = VisualComparison()
+        mock_driver = Mock()
+        vc.driver = mock_driver
+        mock_driver.get_screenshot_as_png.return_value = b'\x89PNG'
+        mock_driver.execute_script.side_effect = [
+            'complete',  # readyState
+        ]
+
+        result = vc.take_screenshot("https://example.com", full_page=False)
+        assert result == b'\x89PNG'
+        mock_driver.get_screenshot_as_png.assert_called_once()
+
+    def test_take_screenshot_wayback_url(self):
+        """Test that Wayback banner is removed for archive URLs."""
+        vc = VisualComparison()
+        mock_driver = Mock()
+        vc.driver = mock_driver
+        mock_driver.execute_script.side_effect = [
+            'complete',  # readyState
+            None,  # _remove_wayback_banner script
+        ]
+
+        with patch.object(vc, '_remove_wayback_banner'):
+            with patch.object(vc, '_take_full_page_screenshot', return_value=b'\x89PNG'):
+                result = vc.take_screenshot(
+                    "https://web.archive.org/web/20230101/https://example.com/"
+                )
+                vc._remove_wayback_banner.assert_called_once()
+
+    def test_take_screenshot_timeout_handled(self):
+        """Test that WebDriverWait timeout is handled gracefully."""
+        vc = VisualComparison()
+        mock_driver = Mock()
+        vc.driver = mock_driver
+
+        # Simulate timeout on readyState check
+        from selenium.common.exceptions import TimeoutException
+        mock_driver.execute_script.side_effect = [
+            TimeoutException("Timeout"),
+        ]
+
+        with patch.object(vc, '_take_full_page_screenshot', return_value=b'\x89PNG'):
+            with patch('wayback_diff.visual_comparison.WebDriverWait') as mock_wait:
+                mock_wait.return_value.until.side_effect = TimeoutException("Timeout")
+                result = vc.take_screenshot("https://example.com")
+                assert result == b'\x89PNG'
+
+
+class TestCompareUrls:
+    """Test multi-browser URL comparison."""
+
+    def test_compare_urls_single_browser(self):
+        """Test comparison with single browser."""
+        vc = VisualComparison()
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Create mock screenshots
+            mock_screenshot = b'\x89PNG\r\n'
+
+            with patch.object(vc, '_create_driver') as mock_create:
+                mock_driver = Mock()
+                mock_create.return_value = mock_driver
+
+                with patch.object(vc, 'take_screenshot') as mock_ts:
+                    mock_ts.return_value = mock_screenshot
+
+                    with patch.object(vc, 'compare_images') as mock_ci:
+                        mock_ci.return_value = {
+                            'difference_ratio': 0.01,
+                            'different_pixels': 100,
+                            'total_pixels': 100000,
+                            'is_similar': True,
+                            'comparison_image_path': None,
+                        }
+
+                        results = vc.compare_urls(
+                            "https://a.com", "https://b.com",
+                            tmpdir, browsers=['chrome']
+                        )
+
+            assert 'chrome' in results
+            assert results['chrome']['difference_ratio'] == 0.01
+
+    def test_compare_urls_auto_detect(self):
+        """Test comparison with auto-detected browsers."""
+        vc = VisualComparison()
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            with patch.object(VisualComparison, 'detect_available_browsers',
+                              return_value=['chrome']):
+                with patch.object(vc, '_create_driver') as mock_create:
+                    mock_driver = Mock()
+                    mock_create.return_value = mock_driver
+
+                    with patch.object(vc, 'take_screenshot', return_value=b'\x89PNG'):
+                        with patch.object(vc, 'compare_images', return_value={
+                            'difference_ratio': 0.0,
+                            'different_pixels': 0,
+                            'total_pixels': 100000,
+                            'is_similar': True,
+                            'comparison_image_path': None,
+                        }):
+                            results = vc.compare_urls(
+                                "https://a.com", "https://b.com", tmpdir
+                            )
+
+            assert 'chrome' in results
+
+    def test_compare_urls_browser_error(self):
+        """Test comparison when screenshot raises error."""
+        vc = VisualComparison()
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            with patch.object(vc, '_create_driver') as mock_create:
+                mock_driver = Mock()
+                mock_create.return_value = mock_driver
+
+                with patch.object(vc, 'take_screenshot',
+                                  side_effect=Exception("Screenshot failed")):
+                    results = vc.compare_urls(
+                        "https://a.com", "https://b.com",
+                        tmpdir, browsers=['chrome']
+                    )
+
+            assert 'chrome' in results
+            assert 'error' in results['chrome']
+
+    def test_compare_urls_unsupported_browser_skipped(self):
+        """Test that unsupported browsers are skipped."""
+        vc = VisualComparison()
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            results = vc.compare_urls(
+                "https://a.com", "https://b.com",
+                tmpdir, browsers=['safari']
+            )
+            assert 'safari' not in results
+
+    def test_compare_urls_creates_output_dir(self):
+        """Test that output directory is created."""
+        vc = VisualComparison()
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            output_dir = os.path.join(tmpdir, "screenshots")
+            assert not os.path.exists(output_dir)
+
+            with patch.object(vc, '_create_driver') as mock_create:
+                mock_driver = Mock()
+                mock_create.return_value = mock_driver
+
+                with patch.object(vc, 'take_screenshot', return_value=b'\x89PNG'):
+                    with patch.object(vc, 'compare_images', return_value={
+                        'difference_ratio': 0.0,
+                        'different_pixels': 0,
+                        'total_pixels': 100000,
+                        'is_similar': True,
+                        'comparison_image_path': None,
+                    }):
+                        vc.compare_urls(
+                            "https://a.com", "https://b.com",
+                            output_dir, browsers=['chrome']
+                        )
+
+            assert os.path.exists(output_dir)
+
+
+class TestRemoveWaybackBanner:
+    """Test Wayback banner removal."""
+
+    def test_remove_banner(self):
+        """Test that banner removal script is executed."""
+        vc = VisualComparison()
+        mock_driver = Mock()
+        vc.driver = mock_driver
+
+        with patch('wayback_diff.visual_comparison.time'):
+            vc._remove_wayback_banner()
+
+        mock_driver.execute_script.assert_called_once()
+        script = mock_driver.execute_script.call_args[0][0]
+        assert 'wm-ipp' in script
+
+    def test_remove_banner_error_handled(self):
+        """Test that banner removal errors are handled."""
+        vc = VisualComparison()
+        mock_driver = Mock()
+        mock_driver.execute_script.side_effect = Exception("Script error")
+        vc.driver = mock_driver
+
+        # Should not raise
+        vc._remove_wayback_banner()
+
+
+class TestCreateDriver:
+    """Test WebDriver creation."""
+
+    @patch('wayback_diff.visual_comparison.WEBDRIVER_MANAGER_AVAILABLE', False)
+    @patch('wayback_diff.visual_comparison.webdriver')
+    def test_create_chrome_driver(self, mock_webdriver):
+        """Test Chrome driver creation."""
+        vc = VisualComparison(browser='chrome')
+        mock_driver = Mock()
+        mock_webdriver.Chrome.return_value = mock_driver
+
+        result = vc._create_driver()
+        assert result == mock_driver
+
+    @patch('wayback_diff.visual_comparison.WEBDRIVER_MANAGER_AVAILABLE', False)
+    @patch('wayback_diff.visual_comparison.webdriver')
+    def test_create_firefox_driver(self, mock_webdriver):
+        """Test Firefox driver creation."""
+        vc = VisualComparison(browser='firefox')
+        mock_driver = Mock()
+        mock_webdriver.Firefox.return_value = mock_driver
+
+        result = vc._create_driver()
+        mock_driver.set_window_size.assert_called_once_with(1920, 1080)
+
+    @patch('wayback_diff.visual_comparison.WEBDRIVER_MANAGER_AVAILABLE', True)
+    @patch('wayback_diff.visual_comparison.ChromeDriverManager')
+    @patch('wayback_diff.visual_comparison.webdriver')
+    def test_create_chrome_with_manager(self, mock_webdriver, mock_manager):
+        """Test Chrome driver creation with webdriver-manager."""
+        vc = VisualComparison(browser='chrome')
+        mock_driver = Mock()
+        mock_webdriver.Chrome.return_value = mock_driver
+        mock_manager.return_value.install.return_value = '/path/to/chromedriver'
+
+        result = vc._create_driver()
+        assert result == mock_driver
+
+    @patch('wayback_diff.visual_comparison.WEBDRIVER_MANAGER_AVAILABLE', True)
+    @patch('wayback_diff.visual_comparison.ChromeDriverManager')
+    @patch('wayback_diff.visual_comparison.ChromeService')
+    @patch('wayback_diff.visual_comparison.webdriver')
+    def test_create_chrome_manager_fallback(self, mock_webdriver, mock_service,
+                                            mock_manager):
+        """Test Chrome driver falls back to system when manager fails."""
+        vc = VisualComparison(browser='chrome')
+        mock_driver = Mock()
+        # Manager install fails, triggering fallback to system chromedriver
+        mock_manager.return_value.install.side_effect = Exception("Download failed")
+        mock_service.side_effect = Exception("Service failed")
+        # First Chrome() call is the fallback (no service)
+        mock_webdriver.Chrome.return_value = mock_driver
+
+        result = vc._create_driver()
+        assert result == mock_driver
+
+    @patch('wayback_diff.visual_comparison.WEBDRIVER_MANAGER_AVAILABLE', False)
+    @patch('wayback_diff.visual_comparison.webdriver')
+    def test_create_chromium_driver(self, mock_webdriver):
+        """Test Chromium driver creation."""
+        vc = VisualComparison(browser='chromium')
+        mock_driver = Mock()
+        mock_webdriver.Chrome.return_value = mock_driver
+
+        with patch('os.path.exists', return_value=False):
+            result = vc._create_driver()
+        assert result == mock_driver
+
+    @patch('wayback_diff.visual_comparison.WEBDRIVER_MANAGER_AVAILABLE', False)
+    @patch('wayback_diff.visual_comparison.webdriver')
+    def test_create_opera_driver(self, mock_webdriver):
+        """Test Opera driver creation."""
+        vc = VisualComparison(browser='opera')
+        mock_driver = Mock()
+        mock_webdriver.Chrome.return_value = mock_driver
+
+        with patch('os.path.exists', return_value=False):
+            result = vc._create_driver()
+        assert result == mock_driver
+
+    @patch('wayback_diff.visual_comparison.WEBDRIVER_MANAGER_AVAILABLE', False)
+    @patch('wayback_diff.visual_comparison.webdriver')
+    def test_create_edge_driver(self, mock_webdriver):
+        """Test Edge driver creation."""
+        vc = VisualComparison(browser='edge')
+        mock_driver = Mock()
+        mock_webdriver.Edge.return_value = mock_driver
+
+        result = vc._create_driver()
+        assert result == mock_driver
+
+    @patch('wayback_diff.visual_comparison.WEBDRIVER_MANAGER_AVAILABLE', False)
+    @patch('wayback_diff.visual_comparison.webdriver')
+    def test_create_chrome_headless_args(self, mock_webdriver):
+        """Test Chrome headless arguments."""
+        vc = VisualComparison(browser='chrome', headless=True)
+        mock_driver = Mock()
+        mock_webdriver.Chrome.return_value = mock_driver
+
+        vc._create_driver()
+        # Verify Chrome was called (args checked via options)
+        mock_webdriver.Chrome.assert_called()
+
+    @patch('wayback_diff.visual_comparison.WEBDRIVER_MANAGER_AVAILABLE', False)
+    @patch('wayback_diff.visual_comparison.webdriver')
+    def test_create_chrome_non_headless(self, mock_webdriver):
+        """Test Chrome non-headless mode."""
+        vc = VisualComparison(browser='chrome', headless=False)
+        mock_driver = Mock()
+        mock_webdriver.Chrome.return_value = mock_driver
+
+        vc._create_driver()
+        mock_webdriver.Chrome.assert_called()
+
+
+class TestTakeFullPageScreenshot:
+    """Test full page screenshot functionality."""
+
+    def test_full_page_screenshot_basic(self):
+        """Test full page screenshot with mocked driver."""
+        vc = VisualComparison()
+        mock_driver = Mock()
+        vc.driver = mock_driver
+
+        # Create a small test PNG
+        img = Image.new('RGB', (100, 100), color=(255, 255, 255))
+        img_bytes = io.BytesIO()
+        img.save(img_bytes, format='PNG')
+        png_data = img_bytes.getvalue()
+
+        mock_driver.current_url = "https://example.com"
+        mock_driver.execute_script.side_effect = [
+            100,   # scrollWidth
+            100,   # scrollHeight
+            100,   # innerWidth
+            100,   # innerHeight
+            None,  # scrollTo
+            None,  # scrollTo(0,0) reset
+        ]
+        mock_driver.get_screenshot_as_png.return_value = png_data
+
+        result = vc._take_full_page_screenshot()
+        assert isinstance(result, bytes)
+        assert len(result) > 0
+
+    def test_full_page_screenshot_wayback_url(self):
+        """Test full page screenshot removes Wayback banner."""
+        vc = VisualComparison()
+        mock_driver = Mock()
+        vc.driver = mock_driver
+
+        img = Image.new('RGB', (50, 50), color=(200, 200, 200))
+        img_bytes = io.BytesIO()
+        img.save(img_bytes, format='PNG')
+        png_data = img_bytes.getvalue()
+
+        mock_driver.current_url = "https://web.archive.org/web/20230101/https://example.com"
+        # With _remove_wayback_banner patched out, execute_script calls are:
+        # scrollWidth, scrollHeight, innerWidth, innerHeight, scrollTo, wayback-style, scrollTo-reset
+        mock_driver.execute_script.side_effect = [
+            50,    # scrollWidth
+            50,    # scrollHeight
+            50,    # innerWidth
+            50,    # innerHeight
+            None,  # scrollTo
+            None,  # wayback style injection during scroll
+            None,  # scrollTo(0,0) reset
+        ]
+        mock_driver.get_screenshot_as_png.return_value = png_data
+
+        with patch.object(vc, '_remove_wayback_banner'):
+            result = vc._take_full_page_screenshot()
+            vc._remove_wayback_banner.assert_called_once()
+        assert isinstance(result, bytes)
+
+    def test_full_page_screenshot_multi_scroll(self):
+        """Test full page screenshot that requires scrolling."""
+        vc = VisualComparison()
+        mock_driver = Mock()
+        vc.driver = mock_driver
+
+        # Page is 200x200 but viewport is 100x100, needs 4 screenshots
+        img = Image.new('RGB', (100, 100), color=(128, 128, 128))
+        img_bytes = io.BytesIO()
+        img.save(img_bytes, format='PNG')
+        png_data = img_bytes.getvalue()
+
+        mock_driver.current_url = "https://example.com"
+        # Each scroll iteration calls execute_script for scrollTo
+        mock_driver.execute_script.side_effect = [
+            200,   # scrollWidth
+            200,   # scrollHeight
+            100,   # innerWidth
+            100,   # innerHeight
+            None,  # scrollTo(0,0)
+            None,  # scrollTo(100,0)
+            None,  # scrollTo(0,100)
+            None,  # scrollTo(100,100)
+            None,  # scrollTo(0,0) reset
+        ]
+        mock_driver.get_screenshot_as_png.return_value = png_data
+
+        result = vc._take_full_page_screenshot()
+        assert isinstance(result, bytes)
+        # Should have taken 4 viewport screenshots
+        assert mock_driver.get_screenshot_as_png.call_count == 4
+
+
+class TestEdgeDriverFallback:
+    """Test Edge driver creation with fallback paths."""
+
+    @patch('wayback_diff.visual_comparison.WEBDRIVER_MANAGER_AVAILABLE', False)
+    @patch('wayback_diff.visual_comparison.webdriver')
+    def test_create_edge_fallback_to_chrome(self, mock_webdriver):
+        """Test Edge driver falls back to Chrome-based Edge."""
+        vc = VisualComparison(browser='edge')
+        mock_driver = Mock()
+        # Edge driver fails, Chrome-based Edge succeeds
+        mock_webdriver.Edge.side_effect = Exception("No Edge driver")
+        mock_webdriver.Chrome.return_value = mock_driver
+
+        with patch('os.path.exists', return_value=False):
+            result = vc._create_driver()
+        assert result == mock_driver
+
+
+class TestFirefoxDriverManager:
+    """Test Firefox driver creation with webdriver-manager."""
+
+    @patch('wayback_diff.visual_comparison.WEBDRIVER_MANAGER_AVAILABLE', True)
+    @patch('wayback_diff.visual_comparison.GeckoDriverManager')
+    @patch('wayback_diff.visual_comparison.FirefoxService')
+    @patch('wayback_diff.visual_comparison.webdriver')
+    def test_create_firefox_with_manager(self, mock_webdriver, mock_service,
+                                          mock_manager):
+        """Test Firefox driver creation with webdriver-manager."""
+        vc = VisualComparison(browser='firefox')
+        mock_driver = Mock()
+        mock_webdriver.Firefox.return_value = mock_driver
+        mock_manager.return_value.install.return_value = '/path/to/geckodriver'
+
+        result = vc._create_driver()
+        mock_driver.set_window_size.assert_called_once()
+
+    @patch('wayback_diff.visual_comparison.WEBDRIVER_MANAGER_AVAILABLE', True)
+    @patch('wayback_diff.visual_comparison.GeckoDriverManager')
+    @patch('wayback_diff.visual_comparison.FirefoxService')
+    @patch('wayback_diff.visual_comparison.webdriver')
+    def test_create_firefox_manager_fallback(self, mock_webdriver, mock_service,
+                                              mock_manager):
+        """Test Firefox driver falls back when manager fails."""
+        vc = VisualComparison(browser='firefox')
+        mock_driver = Mock()
+        mock_manager.return_value.install.side_effect = Exception("Download failed")
+        mock_service.side_effect = Exception("Service failed")
+        mock_webdriver.Firefox.return_value = mock_driver
+
+        result = vc._create_driver()
+        assert result == mock_driver
+
+
+class TestModuleEntryPoint:
+    """Test __main__.py module entry point."""
+
+    def test_main_module_import(self):
+        """Test that __main__ module can be imported."""
+        import wayback_diff.__main__
+
+    def test_package_version(self):
+        """Test package version is set."""
+        from wayback_diff import __version__
+        assert __version__ == "1.1.0"
diff --git a/tests/test_wayback_cleaner.py b/tests/test_wayback_cleaner.py
index ac68084..bc706b2 100644
--- a/tests/test_wayback_cleaner.py
+++ b/tests/test_wayback_cleaner.py
@@ -6,23 +6,45 @@
 
 class TestWaybackCleaner:
     """Test cases for WaybackCleaner."""
-    
-    def test_is_wayback_url(self):
-        """Test Wayback URL detection."""
+
+    def test_is_wayback_url_full(self):
+        """Test Wayback URL detection with full URL."""
         assert WaybackCleaner.is_wayback_url("https://web.archive.org/web/20230101/https://example.com/")
+        assert WaybackCleaner.is_wayback_url("http://web.archive.org/web/20230101/https://example.com/")
+
+    def test_is_wayback_url_relative(self):
+        """Test Wayback URL detection with relative URL."""
         assert WaybackCleaner.is_wayback_url("/web/20230101/https://example.com/")
+
+    def test_is_wayback_url_non_wayback(self):
+        """Test non-Wayback URLs."""
         assert not WaybackCleaner.is_wayback_url("https://example.com/")
-    
-    def test_extract_timestamp(self):
-        """Test timestamp extraction."""
+        assert not WaybackCleaner.is_wayback_url("https://archive.org/")
+        assert not WaybackCleaner.is_wayback_url("")
+
+    def test_extract_timestamp_full_url(self):
+        """Test timestamp extraction from full URL."""
         url = "https://web.archive.org/web/20230101120000/https://example.com/"
         timestamp = WaybackCleaner.extract_timestamp(url)
         assert timestamp == "20230101120000"
-        
-        url2 = "/web/20230101/https://example.com/"
-        timestamp2 = WaybackCleaner.extract_timestamp(url2)
-        assert timestamp2 == "20230101"
-    
+
+    def test_extract_timestamp_short(self):
+        """Test timestamp extraction from short URL."""
+        url = "/web/20230101/https://example.com/"
+        timestamp = WaybackCleaner.extract_timestamp(url)
+        assert timestamp == "20230101"
+
+    def test_extract_timestamp_with_suffix(self):
+        """Test timestamp extraction with suffix (cs_, im_, etc.)."""
+        url = "https://web.archive.org/web/20230101cs_/https://example.com/style.css"
+        timestamp = WaybackCleaner.extract_timestamp(url)
+        assert timestamp == "20230101"
+
+    def test_extract_timestamp_no_match(self):
+        """Test timestamp extraction when no timestamp present."""
+        assert WaybackCleaner.extract_timestamp("https://example.com/") is None
+        assert WaybackCleaner.extract_timestamp("") is None
+
     def test_remove_wayback_header(self):
         """Test header removal."""
         content = b'''<!DOCTYPE html>
@@ -36,13 +58,58 @@ def test_remove_wayback_header(self):
 </head>
 <body>Content</body>
 </html>'''
-        
+
         cleaned = WaybackCleaner.remove_wayback_header(content)
         assert b'archive.org/includes/analytics.js' not in cleaned
         assert b'__wm.init' not in cleaned
         assert b'<meta charset="utf-8">' in cleaned
         assert b'Content' in cleaned
-    
+
+    def test_remove_wayback_header_bundle_playback(self):
+        """Test header removal with bundle-playback.js pattern."""
+        content = b'''<head>
+<script type="text/javascript" src="/_static/js/bundle-playback.js"></script>
+<script>some_wb_code();</script>
+<!-- End Wayback Rewrite JS Include -->
+<title>Page</title>
+</head>'''
+
+        cleaned = WaybackCleaner.remove_wayback_header(content)
+        assert b'bundle-playback.js' not in cleaned
+        assert b'<title>Page</title>' in cleaned
+
+    def test_remove_wayback_header_no_end_marker(self):
+        """Test header removal when end marker is missing but meta tag exists after scripts."""
+        content = b'''<head>
+<script src="//archive.org/includes/analytics.js"></script>
+<script>init();</script>
+<meta charset="utf-8">
+<title>Page</title>
+</head>'''
+
+        cleaned = WaybackCleaner.remove_wayback_header(content)
+        # Fallback finds <meta as the boundary; content from <meta onward is preserved
+        assert b'<meta charset="utf-8">' in cleaned
+        assert b'<title>Page</title>' in cleaned
+
+    def test_remove_wayback_header_no_header(self):
+        """Test when there is no Wayback header."""
+        content = b'<html><head><title>Clean</title></head></html>'
+        cleaned = WaybackCleaner.remove_wayback_header(content)
+        assert cleaned == content
+
+    def test_remove_wayback_header_no_end_marker_no_meta(self):
+        """Test header removal when neither end marker nor meta tag exists."""
+        content = b'''<head>
+<script src="//archive.org/includes/analytics.js"></script>
+<script>__wm.init();</script>
+<title>Page</title>
+</head>'''
+
+        cleaned = WaybackCleaner.remove_wayback_header(content)
+        # Should return content unchanged since no end marker found
+        assert b'<title>Page</title>' in cleaned
+
     def test_remove_wayback_footer(self):
         """Test footer removal."""
         content = b'''<body>Content</body>
@@ -51,23 +118,85 @@ def test_remove_wayback_footer(self):
      FILE ARCHIVED ON 23:59:13 Nov 20, 2021 AND RETRIEVED FROM THE
      INTERNET ARCHIVE ON 00:41:42 Dec 01, 2021.
 -->'''
-        
+
         cleaned = WaybackCleaner.remove_wayback_footer(content)
         assert b'FILE ARCHIVED ON' not in cleaned
         assert cleaned.endswith(b'</html>\n')
-    
+
+    def test_remove_wayback_footer_inline_comment(self):
+        """Test footer removal with inline comment format."""
+        content = b'''<body>Content</body>
+</html><!-- FILE ARCHIVED ON 2021-01-01 -->'''
+
+        cleaned = WaybackCleaner.remove_wayback_footer(content)
+        assert b'FILE ARCHIVED ON' not in cleaned
+
+    def test_remove_wayback_footer_no_footer(self):
+        """Test when there is no Wayback footer."""
+        content = b'<html><body>Clean</body></html>'
+        cleaned = WaybackCleaner.remove_wayback_footer(content)
+        # Content should still contain the original body
+        assert b'Clean' in cleaned
+
+    def test_remove_wayback_footer_carriage_return(self):
+        """Test footer removal with \\r\\n line endings."""
+        content = b'<body>Content</body>\r\n</html>\r\n<!--\n     FILE ARCHIVED ON 2021 -->'
+        cleaned = WaybackCleaner.remove_wayback_footer(content)
+        assert b'FILE ARCHIVED ON' not in cleaned
+
+    def test_remove_wayback_footer_standalone_comment(self):
+        """Test footer removal when comment is standalone (not right after </html>)."""
+        content = b'''<body>Content</body>
+</html>
+
+<!--
+     FILE ARCHIVED ON 23:59:13 Nov 20, 2021 AND RETRIEVED FROM THE
+     INTERNET ARCHIVE ON 00:41:42 Dec 01, 2021.
+-->'''
+
+        cleaned = WaybackCleaner.remove_wayback_footer(content)
+        cleaned_str = cleaned.decode('utf-8', errors='ignore')
+        assert 'FILE ARCHIVED ON' not in cleaned_str
+
     def test_remove_wayback_urls(self):
-        """Test URL prefix removal."""
+        """Test URL prefix removal with timestamp."""
         content = b'''<a href="http://web.archive.org/web/20230101/https://example.com/page">Link</a>
 <img src="/web/20230101im_/https://example.com/image.png">
 <link href="/web/20230101cs_/https://example.com/style.css">'''
-        
+
         cleaned = WaybackCleaner.remove_wayback_urls(content, "20230101")
         assert b'web.archive.org' not in cleaned
         assert b'/web/20230101' not in cleaned
         assert b'https://example.com/page' in cleaned
         assert b'https://example.com/image.png' in cleaned
-    
+
+    def test_remove_wayback_urls_js_prefix(self):
+        """Test URL removal with js_ prefix."""
+        content = b'<script src="/web/20230101js_/https://example.com/app.js"></script>'
+        cleaned = WaybackCleaner.remove_wayback_urls(content, "20230101")
+        assert b'/web/20230101' not in cleaned
+        assert b'https://example.com/app.js' in cleaned
+
+    def test_remove_wayback_urls_no_timestamp(self):
+        """Test URL removal without explicit timestamp (extracts from content)."""
+        content = b'''<a href="http://web.archive.org/web/20230101/https://example.com/page">Link</a>
+<img src="/web/20230101im_/https://example.com/image.png">'''
+
+        cleaned = WaybackCleaner.remove_wayback_urls(content, None)
+        assert b'web.archive.org' not in cleaned
+
+    def test_remove_wayback_urls_no_timestamp_in_content(self):
+        """Test URL removal when no timestamp can be extracted."""
+        content = b'<a href="http://web.archive.org/path/page">Link</a>'
+        cleaned = WaybackCleaner.remove_wayback_urls(content, None)
+        assert b'web.archive.org' not in cleaned
+
+    def test_remove_wayback_urls_https_archive(self):
+        """Test URL removal with https web.archive.org."""
+        content = b'<a href="https://web.archive.org/web/20230101/https://example.com/">Link</a>'
+        cleaned = WaybackCleaner.remove_wayback_urls(content, "20230101")
+        assert b'web.archive.org' not in cleaned
+
     def test_clean_wayback_html(self):
         """Test complete cleaning."""
         content = b'''<!DOCTYPE html>
@@ -82,12 +211,69 @@ def test_clean_wayback_html(self):
 </body>
 </html>
 <!-- FILE ARCHIVED ON -->'''
-        
+
         cleaned = WaybackCleaner.clean_wayback_html(content, "https://web.archive.org/web/20230101/https://example.com/")
-        # Check that wayback artifacts are removed
         assert b'archive.org/includes' not in cleaned
         assert b'web.archive.org' not in cleaned
         assert b'https://example.com/' in cleaned
-        # The cleaner should remove the footer comment
         cleaned_str = cleaned.decode('utf-8', errors='ignore')
         assert 'FILE ARCHIVED ON' not in cleaned_str
+
+    def test_clean_wayback_html_no_url(self):
+        """Test cleaning without URL (no timestamp extraction)."""
+        content = b'''<html>
+<head>
+<script src="//archive.org/includes/analytics.js"></script>
+<!-- End Wayback Rewrite JS Include -->
+<title>Test</title>
+</head>
+<body>Content</body>
+</html>'''
+
+        cleaned = WaybackCleaner.clean_wayback_html(content, None)
+        assert b'archive.org/includes' not in cleaned
+
+    def test_clean_wayback_html_empty(self):
+        """Test cleaning empty content."""
+        assert WaybackCleaner.clean_wayback_html(b'', None) == b''
+
+    def test_clean_wayback_html_none(self):
+        """Test cleaning None content."""
+        assert WaybackCleaner.clean_wayback_html(b'', None) == b''
+
+    def test_clean_wayback_html_no_artifacts(self):
+        """Test cleaning content with no Wayback artifacts."""
+        content = b'<html><body><p>Clean content</p></body></html>'
+        cleaned = WaybackCleaner.clean_wayback_html(content, "https://example.com")
+        assert b'Clean content' in cleaned
+
+    def test_normalize_html_whitespace_self_closing(self):
+        """Test whitespace normalization in self-closing tags."""
+        html = b'<img src="test.png" />'
+        normalized = WaybackCleaner.normalize_html_whitespace(html)
+        assert b' />' not in normalized
+        assert b'/>' in normalized
+
+    def test_normalize_html_whitespace_multiple_spaces(self):
+        """Test normalization of multiple spaces."""
+        html = b'<div>  text    here  </div>'
+        normalized = WaybackCleaner.normalize_html_whitespace(html)
+        assert b'  ' not in normalized
+
+    def test_normalize_html_whitespace_tabs(self):
+        """Test normalization of tabs."""
+        html = b'<div>\ttext\there</div>'
+        normalized = WaybackCleaner.normalize_html_whitespace(html)
+        assert b'\t' not in normalized
+
+    def test_normalize_html_whitespace_newlines(self):
+        """Test normalization of newlines with spaces."""
+        html = b'<div>  \n  text</div>'
+        normalized = WaybackCleaner.normalize_html_whitespace(html)
+        assert b'  \n  ' not in normalized
+
+    def test_extract_timestamp_jm_prefix(self):
+        """Test timestamp with jm_ prefix."""
+        url = "https://web.archive.org/web/20230101jm_/https://example.com/"
+        timestamp = WaybackCleaner.extract_timestamp(url)
+        assert timestamp == "20230101"
diff --git a/wayback_diff/diff_engine.py b/wayback_diff/diff_engine.py
index 479cac6..aba69ff 100644
--- a/wayback_diff/diff_engine.py
+++ b/wayback_diff/diff_engine.py
@@ -252,15 +252,35 @@ def compare_structures(self, old_html: bytes, new_html: bytes) -> Dict:
                 'new_structure': [],
                 'similarity': 0.0
             }
-        
+
         # Compare structures
         old_structure = old_parser.structure
         new_structure = new_parser.structure
-        
+
         # Calculate similarity
-        matcher = SequenceMatcher(None, old_structure, new_structure)
+        # Convert structure dicts to hashable tuples for SequenceMatcher
+        def _make_hashable(d):
+            items = []
+            for k, v in sorted(d.items(), key=lambda x: str(x[0])):
+                if isinstance(v, dict):
+                    v = tuple(sorted(v.items()))
+                items.append((k, v))
+            return tuple(items)
+
+        try:
+            old_hashable = [_make_hashable(d) for d in old_structure]
+            new_hashable = [_make_hashable(d) for d in new_structure]
+            matcher = SequenceMatcher(None, old_hashable, new_hashable)
+        except (TypeError, Exception):
+            # Fallback if structures cannot be compared
+            return {
+                'structural_changes': [],
+                'old_structure': old_structure,
+                'new_structure': new_structure,
+                'similarity': 0.0
+            }
         similarity = matcher.ratio()
-        
+
         # Find structural differences
         structural_changes = []
         for tag, i1, i2, j1, j2 in matcher.get_opcodes():
diff --git a/wayback_diff/visual_comparison.py b/wayback_diff/visual_comparison.py
index a25016b..9a87936 100644
--- a/wayback_diff/visual_comparison.py
+++ b/wayback_diff/visual_comparison.py
@@ -1,5 +1,7 @@
 """Visual comparison module for taking screenshots and comparing them."""
 
+from __future__ import annotations
+
 import os
 import time
 from typing import Optional, Tuple, List, Dict
diff --git a/wayback_diff/wayback_cleaner.py b/wayback_diff/wayback_cleaner.py
index 0ac1f7b..9f28664 100644
--- a/wayback_diff/wayback_cleaner.py
+++ b/wayback_diff/wayback_cleaner.py
@@ -89,10 +89,11 @@ def remove_wayback_header(content: bytes) -> bytes:
                 # This is a fallback for pages where the comment might be missing
                 next_tag = content.find(b'<meta', start_idx)
                 if next_tag > start_idx:
-                    end_idx = next_tag
+                    # Use <meta position directly as boundary (don't skip it)
+                    return content[:start_idx] + content[next_tag:]
                 else:
                     return content
-        
+
         # Remove the header section
         return content[:start_idx] + content[end_idx + len(end_marker):]