-
Notifications
You must be signed in to change notification settings - Fork 13
/
file_io.py
83 lines (59 loc) · 1.93 KB
/
file_io.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#-*- coding: utf-8 -*-
import glob
import os
import random
import re
random.seed(111)
class _FileSorter:
def __init__(self):
pass
def sort(self, list_of_strs):
list_of_strs.sort(key=self._alphanum_key)
def _tryint(self, s):
try:
return int(s)
except:
return s
def _alphanum_key(self, s):
""" Turn a string into a list of string and number chunks.
"z23a" -> ["z", 23, "a"]
"""
return [ self._tryint(c) for c in re.split('([0-9]+)', s) ]
def list_files(directory, pattern="*.*", n_files_to_sample=None, recursive_option=True, random_order=True):
"""list files in a directory matched in defined pattern.
Parameters
----------
directory : str
filename of json file
pattern : str
regular expression for file matching
n_files_to_sample : int or None
number of files to sample randomly and return.
If this parameter is None, function returns every files.
recursive_option : boolean
option for searching subdirectories. If this option is True,
function searches all subdirectories recursively.
Returns
----------
conf : dict
dictionary containing contents of json file
Examples
--------
"""
if recursive_option == True:
dirs = [path for path, _, _ in os.walk(directory)]
else:
dirs = [directory]
files = []
for dir_ in dirs:
for p in glob.glob(os.path.join(dir_, pattern)):
files.append(p)
_FileSorter().sort(files)
if n_files_to_sample is not None:
if random_order:
files = random.sample(files, n_files_to_sample)
else:
files = files[:n_files_to_sample]
return files
if __name__ == "__main__":
pass