blob: 95fcd21a050df855b0ae96e9d215c43a89e3ae15 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
|
"""This is an experiment to see how often ftok() returns duplicate keys for
different filenames.
"""
import sys
import os
import sysv_ipc
if len(sys.argv) == 2:
start_path = sys.argv[1]
else:
msg = "Start path? [Default = your home directory] "
start_path = raw_input(msg)
if not start_path:
start_path = "~"
# Expand paths that start with a tilde and then absolutize.
start_path = os.path.expanduser(start_path)
start_path = os.path.abspath(start_path)
# For every filename in the tree, generate a key and associate the filename
# with that key via a dictionary.
d = { }
nfilenames = 0
for path, dirnames, filenames in os.walk(start_path):
for filename in filenames:
# Fully qualify the path
filename = os.path.join(path, filename)
nfilenames += 1
#print "Processing %s..." % filename
key = sysv_ipc.ftok(filename, 42, True)
if key not in d:
d[key] = [ ]
d[key].append(filename)
# Print statistics, including files with non-unique keys.
ndups = 0
for key in d:
if len(d[key]) > 1:
ndups += len(d[key])
print key, d[key]
print "Out of {0} unique filenames, I found {1} duplicate keys.".format(nfilenames, ndups)
|