Skip to content
Next Next commit
Updates
  • Loading branch information
sydp committed Apr 30, 2024
commit 1cda90dc958e8010b0255800466cea2b224945f3
54 changes: 19 additions & 35 deletions dfindexeddb/indexeddb/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from dfindexeddb.indexeddb.chromium import v8
from dfindexeddb.indexeddb.safari import record as safari_record


_VALID_PRINTABLE_CHARACTERS = (
' abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' +
'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~.')
Expand Down Expand Up @@ -76,42 +77,25 @@ def _Output(structure, output):
def DbCommand(args):
"""The CLI for processing a directory as IndexedDB."""
if args.format in ('chrome', 'chromium'):
if args.use_manifest:
for db_record in leveldb_record.LevelDBRecord.FromManifest(args.source):
record = db_record.record
try:
idb_record = chromium_record.IndexedDBRecord.FromLevelDBRecord(
db_record)
except(
errors.ParserError,
errors.DecoderError,
NotImplementedError) as err:
print((
f'Error parsing Indexeddb record {record.__class__.__name__}: '
f'{err} at offset {record.offset} in {db_record.path}'),
file=sys.stderr)
print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
continue
_Output(idb_record, output=args.output)
else:
for db_record in leveldb_record.LevelDBRecord.FromDir(args.source):
record = db_record.record
try:
idb_record = chromium_record.IndexedDBRecord.FromLevelDBRecord(
db_record)
except(
errors.ParserError,
errors.DecoderError,
NotImplementedError) as err:
print((
f'Error parsing Indexeddb record {record.__class__.__name__}: '
f'{err} at offset {record.offset} in {db_record.path}'),
file=sys.stderr)
print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
continue
_Output(idb_record, output=args.output)
for db_record in leveldb_record.FolderReader(
args.source).GetRecords(use_manifest=args.use_manifest):
record = db_record.record
try:
idb_record = chromium_record.IndexedDBRecord.FromLevelDBRecord(
db_record)
except(
errors.ParserError,
errors.DecoderError,
NotImplementedError) as err:
print((
f'Error parsing Indexeddb record {record.__class__.__name__}: '
f'{err} at offset {record.offset} in {db_record.path}'),
file=sys.stderr)
print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
continue
_Output(idb_record, output=args.output)
elif args.format == 'safari':
for db_record in safari_record.Reader(args.source).Records():
for db_record in safari_record.FileReader(args.source).Records():
_Output(db_record, output=args.output)


Expand Down
2 changes: 1 addition & 1 deletion dfindexeddb/indexeddb/safari/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class IndexedDBRecord:
record_id: int


class Reader:
class FileReader:
"""A reader for Safari IndexedDB sqlite3 files.

Attributes:
Expand Down
12 changes: 4 additions & 8 deletions dfindexeddb/leveldb/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,9 @@ def _Output(structure, output):

def DbCommand(args):
"""The CLI for processing leveldb folders."""
if args.use_manifest:
for rec in record.LevelDBRecord.FromManifest(args.source):
_Output(rec, output=args.output)
else:
for rec in record.LevelDBRecord.FromDir(args.source):
_Output(rec, output=args.output)
for leveldb_record in record.FolderReader(
args.source).GetRecords(use_manifest=args.use_manifest):
_Output(leveldb_record, output=args.output)


def LdbCommand(args):
Expand Down Expand Up @@ -257,8 +254,7 @@ def App():
'-v',
'--version_history',
action='store_true',
help='Parses the leveldb version history.'
)
help='Parses the leveldb version history.')
parser_descriptor.set_defaults(func=DescriptorCommand)

args = parser.parse_args()
Expand Down
154 changes: 102 additions & 52 deletions dfindexeddb/leveldb/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,45 +74,30 @@ def FromFile(
else:
print(f'Unsupported file type {file_path.as_posix()}', file=sys.stderr)

@classmethod
def FromDir(
cls,
path: pathlib.Path
) -> Generator[LevelDBRecord, Any, Any]:
"""Yields LevelDBRecords from the given directory.

Args:
path: the file path.
class FolderReader:
"""Parses a given folder as LevelDB."""

Yields:
LevelDBRecords
"""
if not path or not path.is_dir():
raise ValueError(f'{path} is not a directory')
for file_path in path.iterdir():
yield from cls.FromFile(file_path=file_path)
def __init__(self, foldername: pathlib.Path):
if not foldername or not foldername.is_dir():
raise ValueError(f'{foldername} is not a directory')
self.foldername = foldername

@classmethod
def FromManifest(
cls,
path: pathlib.Path
) -> Generator[LevelDBRecord, Any, Any]:
"""Yields LevelDBRecords from the given directory using the manifest.
def LogFiles(self) -> Generator[pathlib.Path, None, None]:
"""Returns the log filenames."""
yield from self.foldername.glob('*.log')

Args:
path: the file path.
def LdbFiles(self) -> Generator[pathlib.Path, None, None]:
"""Returns the ldb filenames."""
yield from self.foldername.glob('*.ldb')

Yields:
LevelDBRecords

Raises:
ParserError: if the CURRENT or MANIFEST-* file does not exist.
ValueError: if path is not a directory.
"""
if not path or not path.is_dir():
raise ValueError(f'{path} is not a directory')
def Manifest(self) -> Generator[pathlib.Path, None, None]:
"""Returns the Manifest filenames."""
yield from self.foldername.glob('MANIFEST-*')

current_path = path / 'CURRENT'
def GetCurrentManifest(self) -> pathlib.Path:
"""Returns the current manifest file."""
current_path = self.foldername / 'CURRENT'
if not current_path.exists():
raise errors.ParserError(f'{current_path!s} does not exist.')

Expand All @@ -122,34 +107,69 @@ def FromManifest(
raise errors.ParserError(
f'{current_path!s} does not contain the expected content')

manifest_path = path / current_manifest
manifest_path = self.foldername / current_manifest
if not manifest_path.exists():
raise errors.ParserError(f'{manifest_path!s} does not exist.')
return manifest_path

def GetLatestVersion(self) -> descriptor.LevelDBVersion:
"""Returns the latest version."""
current_manifest_path = self.GetCurrentManifest()
latest_version = descriptor.FileReader(
str(manifest_path)).GetLatestVersion()
str(current_manifest_path)).GetLatestVersion()
if not latest_version:
raise errors.ParserError(
f'Could not parse a leveldb version from {manifest_path!s}')
f'Could not parse a leveldb version from {current_manifest_path!s}')
return latest_version

def _GetRecordsByFile(
self, filename: pathlib.Path) -> Generator[LevelDBRecord, None, None]:
""""""
if filename.name.endswith('.log'):
yield from self._GetLogRecords(filename)
elif filename.name.endswith('.ldb'):
yield from self._GetLdbRecords(filename)
elif filename.name.startswith('MANIFEST'):
print(f'Ignoring descriptor file {filename.as_posix()}', file=sys.stderr)
elif filename.name in ('LOCK', 'CURRENT', 'LOG', 'LOG.old'):
print(f'Ignoring {filename.as_posix()}', file=sys.stderr)
else:
print(f'Unsupported file type {filename.as_posix()}', file=sys.stderr)

def _GetLogRecords(
self, filename: pathlib.Path) -> Generator[LevelDBRecord, None, None]:
for record in log.FileReader(filename.as_posix()).GetParsedInternalKeys():
yield LevelDBRecord(path=filename.as_posix(), record=record)

def _GetLdbRecords(
self, filename: pathlib.Path) -> Generator[LevelDBRecord, None, None]:
for record in ldb.FileReader(filename.as_posix()).GetKeyValueRecords():
yield LevelDBRecord(path=filename.as_posix(), record=record)

def _RecordsByManifest(self):
"""Yields LevelDBRecords using the current MANIFEST file."""
latest_version = self.GetLatestVersion()

processed_files = set()
# read log records
log_records = []
if latest_version.current_log:
current_log = path / latest_version.current_log
if current_log.exists():
for log_record in cls.FromFile(file_path=current_log):
log_records.append(log_record)
current_log_filename = self.foldername / latest_version.current_log
if current_log_filename.exists():
log_records = list(self._GetLogRecords(filename=current_log_filename))
processed_files.add(current_log_filename)
else:
print('No current log file.', file=sys.stderr)

# read records from the "young" or 0-level
young_records = []
for active_file in latest_version.active_files.get(0, {}).keys():
current_young = path / active_file
if current_young.exists():
for young_record in cls.FromFile(current_young):
young_records.append(young_record)
current_young_filename = self.foldername / active_file
if current_young_filename.exists():
young_records = list(self._GetLdbRecords(current_young_filename))
processed_files.add(current_young_filename)

# update the recovered attribute based on the sequence number and key.
active_records = {}
for record in sorted(
log_records,
Expand Down Expand Up @@ -180,11 +200,41 @@ def FromManifest(
if latest_version.active_files.keys():
for level in range(1, max(latest_version.active_files.keys()) + 1):
for filename in latest_version.active_files.get(level, []):
current_filename = path / filename
for record in cls.FromFile(file_path=current_filename):
if record.record.key in active_records:
record.recovered = True
else:
record.recovered = False
record.level = level
yield record
current_filename = self.foldername / filename
if current_filename.exists():
processed_files.add(current_filename)
for record in self._GetLdbRecords(filename=current_filename):
if record.record.key in active_records:
record.recovered = True
else:
record.recovered = False
record.level = level
yield record
else:
print(f'Could not find {current_filename}.', file=sys.stderr)

# as a final step, parse any other log/ldb files.
for log_file in self.LogFiles():
if log_file in processed_files:
continue
for record in self._GetLogRecords(filename=log_file):
record.recovered = True
yield record

for ldb_file in self.LdbFiles():
if ldb_file in processed_files:
continue
for record in self._GetLdbRecords(filename=ldb_file):
record.recovered = True
yield record

def GetRecords(
self,
use_manifest: bool = False
) -> Generator[LevelDBRecord, None, None]:
"""Yield records."""
if use_manifest:
yield from self._RecordsByManifest()
else:
for filename in self.foldername.iterdir():
yield from LevelDBRecord.FromFile(filename)
2 changes: 1 addition & 1 deletion tests/dfindexeddb/indexeddb/safari/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class SafariIndexedDBTest(unittest.TestCase):
"""Unit tests for Safari IndexedDB encoded sqlite3 databases."""

def setUp(self):
self.db = record.Reader(
self.db = record.FileReader(
'./test_data/indexeddb/safari/17.3.1/IndexedDB.sqlite3')

def test_nonexistent_record(self):
Expand Down