-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathimport-files-on-ceph-servers.py
More file actions
79 lines (66 loc) · 2.32 KB
/
import-files-on-ceph-servers.py
File metadata and controls
79 lines (66 loc) · 2.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import os
import sys
from idb.helpers.logging import getLogger, configure_app_log
logger = getLogger("restore")
from idb.postgres_backend.db import PostgresDB
# When Ceph chewed up a bunch of objects (ticket #2605), we figured out how to
# rebuild them by searching for the file parts on disk. We generated lists of
# files on all ceph nodes with:
# find /srv/ceph -ls > <text file>
# which we then import below into a postgres table in idb-api-beta for rapid
# look ups and searching.
# Table was created manually with below:
#\connect idb_api_beta
#CREATE TABLE ceph_server_files (
# server VARCHAR(16) NOT NULL,
# line INTEGER,
# unk INTEGER,
# perms VARCHAR(16),
# unk2 INTEGER,
# owner_name VARCHAR(16),
# group_name VARCHAR(16),
# size BIGINT,
# day INTEGER,
# month VARCHAR(3),
# year_time VARCHAR(8),
# fullname TEXT NOT NULL,
# filename TEXT NOT NULL
#);
#
#alter table ceph_server_files owner to idigbio
#
#create index index_ceph_on_filename_with_pattern_ops
#on ceph_server_files (filename text_pattern_ops);
# Do I need a unique index?
#CREATE UNIQUE INDEX index_ceph_fullname
#ON ceph_server_files (fullname);
def file_list_iter(fn):
with open(fn, 'r') as f:
for l in f:
fields = l.split()
if "current" in fields[11]:
fields[0] = fields[0][:-1] # trim ":" from end of server name
fields[11] = fields[11].replace("\\\\", "\\") # de-escape slashes produced by `find -ls` to get the real file name
fields.append(os.path.basename(fields[11])) # only fn for prefix searching
yield fields
if __name__ == '__main__':
fn = sys.argv[1]
with PostgresDB() as db:
q = """INSERT INTO ceph_server_files
(server, line, unk, perms, unk2, owner_name, group_name,
size, month, day, year_time, fullname, filename)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON CONFLICT DO NOTHING
"""
c = 1
for l in file_list_iter(fn):
db.execute(q, l)
c += 1
if (c % 100000) == 0:
db.commit()
c = 1
# break
db.commit()
# rows = db.fetchall("SELECT * FROM ceph_server_files LIMIT 3")
# for f in rows:
# print(f)