Skip to content

Commit 3f0bf74

Browse files
committed
Implement File_size_distribution
1 parent 0883085 commit 3f0bf74

File tree

1 file changed

+131
-0
lines changed

1 file changed

+131
-0
lines changed

src/file_size_distribution.nim

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
import os, strutils, math, terminal, fusion/btreetables
2+
3+
const HistogramBlocks = ["","","","","","","",""]
4+
const ProgressChars = ["", "", "", "", "", "", "", ""]
5+
6+
type
7+
Stat = object
8+
filesSeen: BiggestInt
9+
totalSize: BiggestInt
10+
maxSize: BiggestInt
11+
minSize: BiggestInt
12+
13+
type
14+
FsStat = object
15+
stat: Stat
16+
table: CountTable[BiggestInt]
17+
18+
func initStat(): Stat =
19+
Stat(
20+
filesSeen: 0,
21+
totalSize: 0,
22+
maxSize: -1,
23+
minSize: BiggestInt.high()
24+
)
25+
26+
func initFsStat(): FsStat =
27+
FsStat(
28+
stat: initStat(),
29+
table: initCountTable[BiggestInt](1024)
30+
)
31+
32+
template incStatsTmpl(s: Stat; size: BiggestInt) =
33+
s.minSize = min(s.minSize, size)
34+
s.maxSize = max(s.maxSize, size)
35+
s.totalSize += size
36+
s.filesSeen.inc()
37+
38+
func getLog2Stats(fs: FsStat): seq[Stat] =
39+
let bins =
40+
if fs.stat.maxSize < 0:
41+
0
42+
else:
43+
toInt(floor(log2(toBiggestFloat(fs.stat.maxSize)) / 2)) + 1
44+
for n in 0..bins:
45+
result.add(initStat())
46+
for size, n in fs.table.pairs:
47+
for _ in 1..n:
48+
let bin = if size == 0:
49+
0
50+
else:
51+
toInt(floor(log2(toBiggestFloat(size)) / 2)) + 1
52+
incStatsTmpl(result[bin], size)
53+
54+
func drawBar(num, maxNum: Natural; width: Natural): string =
55+
let f = toFloat(num) / toFloat(maxNum) * toFloat(width)
56+
let full = toInt(floor(f))
57+
let tail = f - trunc(f)
58+
var partial = toInt(round(7.0*tail))
59+
if partial == 0 and full == 0 and num > 0:
60+
partial = 1
61+
for _ in 1..full:
62+
result.add(HistogramBlocks[0])
63+
if partial > 0:
64+
result.add(HistogramBlocks[^partial])
65+
66+
proc progressUpdate(mainThreadBusy: ptr bool) {.thread.} =
67+
while mainThreadBusy[]:
68+
for i in 0..7:
69+
stdout.write("\r", ProgressChars[i], " Scanning the file system...")
70+
flushFile(stdout)
71+
sleep(250)
72+
73+
proc walkFs(path: string): FsStat =
74+
var fs = initFsStat()
75+
var check = 0
76+
var thProgress: Thread[ptr bool]
77+
var mainThreadBusy = true
78+
createThread(thProgress, progressUpdate, addr mainThreadBusy)
79+
for file in walkDirRec(path, {pcFile}, {pcDir}):
80+
let fInfo = getFileInfo(file, false)
81+
let fSize = fInfo.size
82+
fs.stat.filesSeen.inc()
83+
check.inc()
84+
fs.stat.totalSize += fSize
85+
fs.stat.maxSize = max(fs.stat.maxSize,fSize)
86+
fs.stat.minSize = min(fs.stat.minSize,fSize)
87+
fs.table.inc(fSize)
88+
mainThreadBusy = false
89+
stdout.write("\r")
90+
result = move(fs)
91+
92+
when isMainModule:
93+
var startPath = if paramCount() > 0:
94+
paramStr(1)
95+
else:
96+
getCurrentDir()
97+
let startInfo = getFileInfo(startPath)
98+
if startInfo.kind != pcDir and not startInfo.permissions.contains(fpUserRead):
99+
echo("Error reading directory ", startPath)
100+
quit()
101+
let fs = walkFs(startPath)
102+
echo("Files scanned: ", fs.stat.filesSeen, ", total: ", formatSize(fs.stat.totalSize))
103+
var stats = fs.getLog2Stats()
104+
echo("Stats for files by size strata; Bars: file count.")
105+
var statStrSeq: seq[(string,BiggestInt)]
106+
var maxNum:BiggestInt = 0
107+
var maxLineLen = 0
108+
for bin, s in stats.pairs:
109+
let maxSize = if s.filesSeen == 0: BiggestInt(0) else: s.maxSize
110+
let line = if bin == 0:
111+
format("$1: Max: $2; $3 files", [
112+
align(formatSize(0), 16),
113+
align(formatSize(maxSize), 11),
114+
$s.filesSeen
115+
])
116+
else:
117+
let curStrata = toInt(2.0.pow(toFloat(bin)*2))
118+
let prevStrata = toInt(2.0.pow(toFloat(bin-1)*2))
119+
format("$1$2: Max: $3; $4 files", [
120+
alignLeft(formatSize(prevStrata) & "<", 8, '.'),
121+
align("<" & formatSize(curStrata), 8, '.'),
122+
align(formatSize(maxSize), 11),
123+
$s.filesSeen
124+
])
125+
maxLineLen = max(maxLineLen, line.len())
126+
maxNum = max(maxNum, s.filesSeen)
127+
statStrSeq.add((line, s.filesSeen))
128+
let maxWidth = terminalWidth() - maxLineLen - 1
129+
for (line, n) in statStrSeq:
130+
let bar = drawBar(n, maxNum, maxWidth)
131+
echo(alignLeft(line, maxLineLen+1, ' '), bar)

0 commit comments

Comments
 (0)