-
Notifications
You must be signed in to change notification settings - Fork 0
/
ojfdb_dict.py
2412 lines (2062 loc) · 94.9 KB
/
ojfdb_dict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 17 17:37:00 2012
Make a database of all the test and their results
@author: dave
"""
#import sys
import os
import pickle
#import logging
#import copy
import string
import shutil
import numpy as np
import matplotlib as mpl
import pandas as pd
import ojfresult
import plotting
import ojf_post
import misc
PATH_DB = 'database/'
OJFPATH_RAW = 'data/raw/'
def symlink_to_hs_folder(source_folder, path_db, symf='symlinks_hs_mimer/'):
"""
Create simlinks to the HS camera footage folders on Mimer
source_folder should be the path to to where all the days are saved:
source_folder = '/x/y/z/02/'
source_folder = '/x/y/z/04/'
on Mimer, path's to HS footage:
02/2012-02-12/HS 0212/
04/2012-04-05/0405_HScamera/
see also make_symlinks_hs further down
"""
# load the database index for the dspace-strain-ojf cases
FILE = open(path_db + 'db_index_symlinks_all_runid.pkl')
db_index_runid = pickle.load(FILE)
FILE.close()
path_db += symf
# create the folder if it doesn't exist
try:
os.mkdir(path_db)
except OSError:
pass
# -------------------------------------------------------------------------
# FEBRUARY, LaCie2Big, Lacie
# -------------------------------------------------------------------------
# for Lacie February, we just pass on the folder directly, they are
# already grouped in one folder
if source_folder.endswith('HighSpeedCamera'):
for result in os.listdir(source_folder):
# give the same name as the dspace-strain-ojf case
runid = '_'.join(result.split('_')[0:3])
print
print runid
# if we can not find the case, keep the original name
try:
resshort = db_index_runid[runid]
print ' ', '_'.join(result.split('_')[0:-1])
print ' ', resshort
except KeyError:
resshort = '_'.join(result.split('_')[0:-1])
print resshort
source = os.path.join(source_folder, result)
target = os.path.join(path_db, resshort)
try:
os.symlink(source, target)
except OSError:
print ' **** file exists ****'
print ' source:', result
print ' target:', resshort
# and we are done, do not get to the next level
return
# -------------------------------------------------------------------------
# ALL OTHER CASES
# -------------------------------------------------------------------------
# all the days are listed in here
for day in os.listdir(source_folder):
# for each day, check if there is HS camera footage available
subfolders = os.listdir(os.path.join(source_folder, day))
for folder in subfolders:
if folder.find('HS ') > -1 or folder.find('_HScamera') > -1:
# now we have the folder containing all the results folders
results = os.listdir(os.path.join(source_folder, day, folder))
for result in results:
# give the same name as the dspace-strain-ojf case
runid = '_'.join(result.split('_')[0:3])
print
print runid
# if we can not find the case, ignore it and just give
# maintain its name
try:
resshort = db_index_runid[runid]
print ' ', '_'.join(result.split('_')[0:-1])
print ' ', resshort
except KeyError:
resshort = '_'.join(result.split('_')[0:-1])
print resshort
source = os.path.join(source_folder, day, folder, result)
target = os.path.join(path_db, resshort)
try:
os.symlink(source, target)
except OSError:
print ' **** file exists ****'
print ' source:', result
print ' target:', resshort
# print source
# print target
# print
def symlink_to_folder(source_folder, path_db, **kwargs):
"""
Create symlinks in one database folder and use consitent naming for the
OJF, DSpace and strain result files. That makes combining all three
result files later a breeze.
This is by far the most safe way since the files are not actually renamed.
"""
df_index = {'source':[], 'runid':[], 'caseid':[]}
db_id = kwargs.get('db_id', 'symlinks')
path_db += db_id + '/'
# create the folder if it doesn't exist
try:
os.mkdir(path_db)
except OSError:
pass
# TODO: move filtering to the build_db stuff
# file ignore list, looks if the keyword occurs in the file name
fileignore = kwargs.get('fileignore',
['zerorun', 'calibration', 'slowdown', 'spindown', 'bladecal', \
'towercal', 'virbations', 'eigen', 'sweep', 'vibration', 'speedup',\
'spinup', 'shutdown', 'startup'])
# folder ignore operates on the first 3 characters of the folder name
folderignore = kwargs.get('folderignore', \
['mea', 'dsp', 'tri', 'cal', 'hs ', 'dc_', 'ojf', 'hs_'])
# save a pickled dictionary that holds all the unique base names
db_index = {}
# and a short version where only the runid is taken
db_index_runid = {}
ignore_root = 'NA'
# cycle through everything we can reach from the target path
for root, dirs, files in os.walk(source_folder, topdown=True):
file_dict = {}
# and also ingore any subfolders, only works with topdown approach
if root.startswith(ignore_root):
#print 'ignore:', root
continue
# do not consider content of folders: triggers, Measurement, dSPACE
folder = root.split('/')[-1]
# cut them all to the same length
#if len(folder) > 5 and folder[0:6] in folderignore:
if len(folder) > 5 and folder[0:3].lower() in folderignore:
ignore_root = root
#print 'ignore:', root
continue
else:
ignore_root = 'NA'
# for each folder, keep all the filenames in a dictionary. On the key
# keep what we have the same for sure: data_runid
for name in files:
# ignore movie files
ext = name[-4:len(name)]
if ext in ['.asf', '.avi', '.rar', '.bmp']:
continue
# the current file is not allowed to have any item occuring in the
# ignore list
nextfile = False
for k in fileignore:
nextfile = False
if name.find(k) > -1:
nextfile = True
break
if nextfile: continue
# prepare the dictionary key
key = name.replace('.log','').replace('.mat','').replace('.csv','')
# ignore if the first item of the key is not the date
try:
int(key.split('_')[0])
except ValueError:
continue
key = '_'.join(key.split('_')[0:3])
# each key can have up to 3 files (dspace, ojf, strain)
if file_dict.has_key(key):
file_dict[key][name] = root
else:
# you can't have the same filename in one dir, so no risk
# of a previously created key
file_dict[key] = {name : root}
# and cycle through al the files in the directory that have to be
# renamed consistantly. Each key is the case id, value are the
# files and their full path
print root
for key, values in file_dict.iteritems():
print ' '+ key
# only consider for renaming if we have more than one file
# but also not more than 3 (than we don't now exactly what is
# going on)
if not len(values) > 1 or not len(values) < 4: continue
# first pass over the files with same id
# always use the mat file as a basis for renaming
basisname = False
for i in file_dict[key]:
print ' ' + i
if i.endswith('.mat'):
basisname = i.replace('.mat', '')
# and the short index
runid = '_'.join(basisname.split('_')[0:3])
db_index[basisname] = runid
# if there is no mat file, take the name of the first we get
if not basisname:
i = file_dict[key].keys()[0]
basisname = i.replace('.csv', '').replace('.log', '')
# and the short index
runid = '_'.join(basisname.split('_')[0:3])
db_index[basisname] = runid
# and also have the inverse index file, probably redundant....
db_index_runid[runid] = basisname
print
# second pass for the actual renamed symlink
for name, rootn in values.iteritems():
ext = name[-4:len(name)]
# extension can be blank for some log files
if ext not in ['.log', '.csv', '.mat']:
newname = basisname + '.log'
else:
newname = basisname + ext
print ' ' + newname
path_source = os.path.join('../../', rootn, name)
path_target = os.path.join(path_db, newname)
# collect all cases as simlinks in the database folder
# this holds the lowest risk of destroying the actual data!
os.symlink(path_source, path_target)
# save in the df database
# remove the root folder from the source path
source_rel = os.path.commonprefix([path_source, source_folder])
df_index['source'].append(source_rel)
df_index['runid'].append(runid)
df_index['caseid'].append(newname)
## do not rename a file if the target already exists
## based on stackoverflow answer
#try:
#with open(root+'/'+newname) as f:
#print ' ' + name
#except IOError:
## it will raise an exception if it does not exist
##os.rename(root+'/'+name, root+'/'+newname)
## or just create a simlink in the big database folder
#print ' ' + newname
# save in the root folder
path_db = path_db.replace(db_id+'/', '')
# first, update the existing file, so results of february and april merge
# based on stackoverflow answer, check if the index file exists
try:
# if it exists, update the file first before saving
FILE = open(path_db + 'db_index_%s.pkl' % db_id)
db_index.update(pickle.load(FILE))
FILE.close()
except IOError:
# no need to update an existing database file
pass
try:
# if it exists, update the file first before saving
FILE = open(path_db + 'db_index_%s_runid.pkl' % db_id)
db_index_runid.update(pickle.load(FILE))
FILE.close()
except IOError:
# no need to update an existing database file
pass
# and save the database index
FILE = open(path_db + 'db_index_%s.pkl' % db_id, 'wb')
pickle.dump(db_index, FILE, protocol=2)
FILE.close()
# and save the database index
FILE = open(path_db + 'db_index_%s_runid.pkl' % db_id, 'wb')
pickle.dump(db_index_runid, FILE, protocol=2)
FILE.close()
def symlinks_to_dcsweep(source_folder, path_db, db_id):
"""
The dc-sweep cases are already grouped in a different folder, now put them
on the same pile as all the rest
"""
path_db += db_id + '/'
# create the folder if it doesn't exist
try:
os.mkdir(path_db)
except OSError:
pass
# save a pickled dictionary that holds all the unique base names
db_index = {}
# and a short version where only the runid is taken
db_index_runid = {}
# becauase each case needs a unique run id
alphabet = []
# and make it go from aa, ab, ac, ... yz, zz
for i in string.ascii_lowercase:
for j in string.ascii_lowercase:
alphabet.append('%s%s' % (i,j))
# ignore the ones from February, they are with the alu blades
folderignore = 'alublades'
# fname 'Measurement_12-Apr-2012_DCycle_0.1_V_8_run_365.mat'
# folder '2012-02-06_06_alublades'
iis = {}
for root, dirs, files in os.walk(source_folder, topdown=True):
folder = root.split('/')[-1]
if folder.find(folderignore) > -1 or len(folder) < 1:
continue
date = ''.join(folder.split('_')[0].split('-')[1:3])
case = ('_'.join(folder.split('_')[1:])) + '_dcsweep'
for fname in sorted(files):
if fname.endswith('.log'):
continue
print fname, ' ->',
fname_parts = fname.split('_')
dc = fname_parts[3]
wind = fname_parts[5]
run = fname_parts[7].replace('.mat', '')
run = format(int(run), '03.0f')
try:
iis[run]
except KeyError:
iis[run] = 0
runa = run + alphabet[iis[run]]
runid = '_'.join([date,'run',runa])
iis[run] += 1
# new '0209_run_020_15ms_dc10_stiffblades_pwm1000_cal_dashboard'
new = '_'.join([date,'run',runa,wind+'ms','dc'+dc,case+'.mat'])
print new
# and make a OJF log file with only the wind speed in it
try:
# for some cases we actually have the source
logsrc = root+'/'+'_'.join([date, 'run', run])+'.log'
logdstname = new.replace('.mat', '.log')
shutil.copy(logsrc, root+'/'+logdstname)
except IOError:
ojfline = '0.0 0.0 0.0 0.0 %s\n' % wind
FILE = open(root+'/'+logdstname, 'w')
FILE.writelines([ojfline]*30)
# and make the symlinks
# relative symbolic links: first two levels up
root_ = os.path.join('../../', root)
os.symlink(os.path.join(root_, fname), path_db+new)
os.symlink(os.path.join(root_, logdstname), path_db+logdstname)
# save in the index file
db_index[new.replace('.mat', '')] = runid
# and also have the inverse index file, probably redundant....
db_index_runid[runid] = new.replace('.mat', '')
# save in the root folder
path_db = path_db.replace(db_id+'/', '')
# first, update the existing file, so results of february and april merge
try:
# if it exists, update the file first before saving
FILE = open(path_db + 'db_index_%s.pkl' % db_id)
db_index_update = pickle.load(FILE)
# overwrite the old entries with new ones! not the other way around
db_index_update.update(db_index)
FILE.close()
except IOError:
# no need to update an existing database file
db_index_update = db_index
try:
# if it exists, update the file first before saving
FILE = open(path_db + 'db_index_%s_runid.pkl' % db_id)
db_index_runid_up = pickle.load(FILE)
# overwrite the old entries with new ones! not the other way around
db_index_runid_up.update(db_index_runid)
FILE.close()
except IOError:
# no need to update an existing database file
db_index_runid_up = db_index_runid
# and save the database index
FILE = open(path_db + 'db_index_%s.pkl' % db_id, 'wb')
pickle.dump(db_index_update, FILE, protocol=2)
FILE.close()
# and save the database index
FILE = open(path_db + 'db_index_%s_runid.pkl' % db_id, 'wb')
pickle.dump(db_index_runid_up, FILE, protocol=2)
FILE.close()
def convert_pkl_index_df(path_db, db_id='symlinks'):
"""
Convert the pickled database index db_index and db_index_runid to a
DataFrame. The database is a dictionary holding (basename,runid)
key/value pairs (runid has it the other way around).
Additionally, the file name is scanned for other known patterns such as
dc, wind speeds, type of blades, type of run, etc. All these values
are then placed in respective columns so you can more easily select only
those cases you are interested in.
"""
# path_db += db_id + '/'
fname = os.path.join(path_db, 'db_index_%s' % db_id)
with open(fname + '.pkl') as f:
db_index = pickle.load(f)
df_dict = {'basename':[], 'runid':[], 'dc':[], 'blades':[], 'yaw_mode':[],
'run_type':[], 'rpm_change':[], 'coning':[], 'yaw_mode2':[],
'the_rest':[], 'windspeed':[], 'sweepid':[], 'month':[],
'day':[], 'runnr':[]}
blades = set(['flexies', 'flex', 'stiffblades', 'stiff', 'samoerai',
'stffblades'])
onoff = set(['spinup', 'spinupfast', 'spinuppartial', 'slowdown',
'speedup', 'shutdown', 'spinningdown', 'startup'])
allitems = set([])
ignore = set(['basename', 'runid', 'month', 'day', 'runnr'])
for basename, runid in db_index.iteritems():
df_dict['basename'].append(basename)
df_dict['runid'].append(runid)
df_dict['runnr'].append(int(runid[9:12]))
df_dict['month'].append(runid[:2])
df_dict['day'].append(runid[2:4])
# get as much out of the file name as possible
items = basename.split('_')
allitems = allitems | set(items)
found = {k:False for k in df_dict.keys()}
therest = []
for k in items:
if k == 'dcsweep':
df_dict['run_type'].append(k)
found['run_type'] = True
if len(runid) > 13:
df_dict['runid'][-1] = runid[:-2]
df_dict['sweepid'].append(runid[-2:])
found['sweepid'] = True
elif k.startswith('dc'):
# in case that fails, we don't know: like when dc is
# something like 0.65-0.70
try:
df_dict['dc'].append(float(k.replace('dc', '')))
except ValueError:
df_dict['dc'].append(-1.0)
found['dc'] = True
elif k in blades:
if k == 'stffblades':
k = 'stiffblades'
df_dict['blades'].append(k)
found['blades'] = True
elif k.find('yaw') > -1:
if not found['yaw_mode']:
df_dict['yaw_mode'].append(k)
found['yaw_mode'] = True
else:
df_dict['yaw_mode2'].append(k)
found['yaw_mode2'] = True
elif k.find('coning') > -1:
df_dict['coning'].append(k)
found['coning'] = True
elif k in onoff and not found['rpm_change']:
df_dict['rpm_change'].append(k)
found['rpm_change'] = True
elif k[-2:] == 'ms':
try:
df_dict['windspeed'].append(float(k[:-2]))
except:
df_dict['windspeed'].append(-1.0)
found['windspeed'] = True
elif basename.find(k) < 0 or runid.find(k) < 0:
therest.append(k)
df_dict['the_rest'].append('_'.join(therest))
found['the_rest'] = True
for key, value in found.iteritems():
if not value and key not in ignore:
# to make sure dc items are floats (no mixing of datatypes)
if key == 'dc' or key == 'windspeed':
df_dict[key].append(-1.0)
else:
df_dict[key].append('')
for k in sorted(allitems):
print(k)
misc.check_df_dict(df_dict)
df = pd.DataFrame(df_dict)
df.sort_values('basename', inplace=True)
df.to_hdf(fname + '.h5', 'table', complevel=9, complib='blosc')
df.to_csv(fname + '.csv', index=False)
df.to_excel(fname + '.xlsx', index=True)
def dc_from_casename(case):
# try to read the dc from the case file name
items = case.split('_')
for k in items:
if k.startswith('dc'):
# in case that fails, we don't know: like when dc is
# something like 0.65-0.70
try:
return float(k.replace('dc', ''))
except ValueError:
return -1.0
return np.nan
def build_db(path_db, prefix, **kwargs):
"""
Create the statistics for each OJF case in the index database
=============================================================
Scan through all cases in the db_index (each case should have symlinks to
the results files in the symlink folder) and evaluate the mean values and
the standard deviations of key parameters.
Yaw laser and tower strain sensors are calibrated.
Parameters
----------
path_db : str
Full path to the to be build database
prefix : str
Identifier for the database index
output : str, default=prefix
Identifier for the figures output path, and the db stats file
calibrate : boolean, default=True
Should the data be calibrated? Set to False if not.
dashplot : boolean, default=False
If True, a dashboard plot will be made for each case
key_inc : list
Keywords that should occur in the database, operator is AND
resample : boolean, default=False
dataframe : boolean, default=False
From a single case combine dSPACE, OJF and blade strain into a single
Pandas DataFrame.
"""
folder_df = kwargs.get('folder_df', 'data/calibrated/DataFrame/')
folder_csv = kwargs.get('folder_csv', 'data/calibrated/CSV/')
output = kwargs.get('output', prefix)
dashplot = kwargs.get('dashplot', False)
calibrate = kwargs.get('calibrate', True)
key_inc = kwargs.get('key_inc', [])
resample = kwargs.get('resample', False)
dataframe = kwargs.get('dataframe', False)
save_df = kwargs.get('save_df', False)
save_df_csv = kwargs.get('save_df_csv', False)
continue_build = kwargs.get('continue_build', True)
db_index_file = kwargs.get('db_index_file', 'db_index_%s.pkl' % prefix)
# read the database
FILE = open(path_db + db_index_file)
db_index = pickle.load(FILE)
FILE.close()
# remove the files we've already done
if continue_build:
source_folder = os.path.join(folder_df)
for root, dirs, files in os.walk(source_folder, topdown=True):
for fname in files:
db_index.pop(fname[:-3])
# respath is where all the symlinks are
respath = path_db + prefix + '/'
# create the figure folder if it doesn't exist
try:
os.mkdir(path_db+'figures_%s/' % output)
except OSError:
pass
try:
os.mkdir(folder_df)
except OSError:
pass
try:
os.mkdir(folder_csv)
except OSError:
pass
# save the statistics in a dict
db_stats = {}
df_stats = None
nr, nrfiles = 0, len(db_index)
# and cycle through all the files present
for resfile in db_index:
# only continue if all the keywords are present in the file name
ignore = False
for key in key_inc:
if not resfile.find(key) > -1:
ignore = True
if ignore:
continue
# if we catch any error, ignore that file for now and go on
nr += 1
print
print '=== %4i/%4i ' % (nr, nrfiles) + 67*'='
print resfile
res = ojfresult.ComboResults(respath, resfile, silent=True, sync=True)
# just in case there is no dspace file, ignore it
if res.nodspacefile:
continue
if calibrate:
res.calibrate()
# for the dc-sweep cases, ditch the first 4 seconds where the rotor
# speed is still changing too much
if res.dspace.campaign == 'dc-sweep':
# 4 seconds of the ones lasting 12 seconds
if res.dspace.time[-1] < 13.0:
cutoff = 4.0
else:
cutoff = 12.0
istart = res.dspace.sample_rate*cutoff
res.dspace.data = res.dspace.data[istart:,:]
res.dspace.time = res.dspace.time[istart:]-res.dspace.time[istart]
if resample:
res._resample()
#except:
#logging.warn('ignored: %s' % resfile)
#logging.warn(sys.exc_info()[0])
#continue
# make a dashboard plot
if dashplot:
res.dashboard_a3(path_db+'figures_%s/' % output)
# calculate all the means, std, min, max and range for each channel
res.statistics()
# stats is already a dictionary
db_stats[resfile] = res.stats
# add the channel discriptions
db_stats[resfile]['dspace labels_ch'] = res.dspace.labels_ch
# incase there is no OJF data
try:
db_stats[resfile]['ojf labels'] = res.ojf.labels
except AttributeError:
pass
if dataframe:
if save_df:
ftarget = os.path.join(folder_df, resfile + '.h5')
else:
ftarget = None
df = res.to_df(ftarget, complevel=9, complib='blosc')
if save_df_csv:
df.to_csv(os.path.join(folder_csv, resfile + '.csv'))
if df_stats is None:
# only take the unique entries, cnames contains all possible
# mappings
all_c_columns = list(set(res.dspace.cnames.values()))
all_c_columns.append('time')
# also add cnames for OJF and BLADE
if not res.isojfdata:
res.ojf = ojfresult.OJFLogFile(ojffile=None)
all_c_columns.extend(res.ojf.cnames)
if not res.isbladedata:
res.blade = ojfresult.BladeStrainFile(None)
all_c_columns.extend(res.blade.cnames)
stats_mean = {col:[] for col in all_c_columns}
stats_mean['index'] = []
stats_min = {col:[] for col in all_c_columns}
stats_min['index'] = []
stats_max = {col:[] for col in all_c_columns}
stats_max['index'] = []
stats_std = {col:[] for col in all_c_columns}
stats_std['index'] = []
stats_range = {col:[] for col in all_c_columns}
stats_range['index'] = []
df_stats = True
def add_stats(stats, df_dict):
df_dict['index'].append(resfile)
for col in stats.index:
if col == 'time':
df_dict[col].append(res.dspace.time[-1])
else:
df_dict[col].append(stats[col])
# and empty items for those for which there is no data
for col in (set(all_c_columns) - {str(k) for k in stats.index}):
if col == 'duty_cycle':
dc = dc_from_casename(resfile)
df_dict[col].append(dc)
else:
df_dict[col].append(np.nan)
return df_dict
stats_mean = add_stats(df.mean(), stats_mean)
stats_min = add_stats(df.min(), stats_min)
stats_max = add_stats(df.max(), stats_max)
stats_std = add_stats(df.std(), stats_std)
stats_range = add_stats(df.max()-df.min(), stats_range)
# if nr > 100:
# break
if df_stats:
try:
fname = os.path.join(path_db, 'db_stats_%s_mean.h5' % output)
df = pd.DataFrame(stats_mean)
df.to_hdf(fname, 'table', compression=9, complib='blosc')
fname = os.path.join(path_db, 'db_stats_%s_mean.xlsx' % output)
df.to_excel(fname)
fname = os.path.join(path_db, 'db_stats_%s_min.h5' % output)
df = pd.DataFrame(stats_min)
df.to_hdf(fname, 'table', compression=9, complib='blosc')
fname = os.path.join(path_db, 'db_stats_%s_min.xlsx' % output)
df.to_excel(fname)
fname = os.path.join(path_db, 'db_stats_%s_max.h5' % output)
df = pd.DataFrame(stats_max)
df.to_hdf(fname, 'table', compression=9, complib='blosc')
fname = os.path.join(path_db, 'db_stats_%s_max.xlsx' % output)
df.to_excel(fname)
fname = os.path.join(path_db, 'db_stats_%s_std.h5' % output)
df = pd.DataFrame(stats_std)
df.to_hdf(fname, 'table', compression=9, complib='blosc')
fname = os.path.join(path_db, 'db_stats_%s_std.xlsx' % output)
df.to_excel(fname)
fname = os.path.join(path_db, 'db_stats_%s_range.h5' % output)
df = pd.DataFrame(stats_range)
df.to_hdf(fname, 'table', compression=9, complib='blosc')
fname = os.path.join(path_db, 'db_stats_%s_range.xlsx' % output)
df.to_excel(fname)
except ValueError:
print('stead_mean')
misc.check_df_dict(stats_mean)
print('stats_min')
misc.check_df_dict(stats_min)
print('stats_max')
misc.check_df_dict(stats_max)
print('stats_std')
misc.check_df_dict(stats_std)
print('stats_range')
misc.check_df_dict(stats_range)
# load an existing database first, update
try:
# if it exists, update the file first before saving
FILE = open(path_db + 'db_stats_%s.pkl' % output)
db_stats_update = pickle.load(FILE)
# overwrite the old entries with new ones! not the other way around
db_stats_update.update(db_stats)
FILE.close()
except IOError:
# no need to update an existing database file
db_stats_update = db_stats
# and save the database stats
FILE = open(path_db + 'db_stats_%s.pkl' % output, 'wb')
pickle.dump(db_stats_update, FILE, protocol=2)
FILE.close()
class ojf_db:
"""
OJF database class
==================
The OJF statistics database has following structure
db_stats = {ojf_resfile : stats_dict}
stats_dict has the following keys:
'blade max', 'blade mean', 'blade min', 'blade range', 'blade std',
'dspace labels_ch', 'dspace max', 'dspace mean', 'dspace min',
'dspace range', 'dspace std', 'ojf labels', 'ojf max', 'ojf mean',
'ojf min', 'ojf range', 'ojf std'
The corresponding values are the statistical values for the channels
described in the lables keys.
"""
def __init__(self, prefix, **kwargs):
"""
"""
debug = kwargs.get('debug', False)
path_db = kwargs.get('path_db', 'database/')
FILE = open(path_db + 'db_stats_%s.pkl' % prefix)
self.db_stats = pickle.load(FILE)
FILE.close()
self.path_db = path_db
self.debug = debug
self.prefix = prefix
def ct(self, data):
"""
Parameters
----------
data : ndarray
ojf_db.select output
"""
data_headers = {'wind':0, 'RPM':1, 'dc':2, 'volt':3, 'amp':4, 'FA':5,
'SS':6, 'yaw':7, 'power':8, 'temp':9, 'B2 root':10,
'B2 30':11, 'B1 root':12, 'B1 30':13, 'static_p':14}
ifa = data_headers['FA']
iwind = data_headers['wind']
# convert the tower FA bending moment to rotor thrust
thrust = data[ifa,:] / ojf_post.model.momemt_arm_rotor
# TODO: calculate rho from wind tunnel temperature and static pressure
# rho = R*T / P R_dryair = 287.058
rho = 1.225
V = data[iwind,:]
# and normalize to get the thrust coefficient
return thrust / (0.5*rho*V*V*ojf_post.model.A)
def tsr(self, data):
r"""
Tip Speed Ratio lambda :math:`\lambda=\frac{V_{tip}}{V}`, or we can
also write it as :math:`\lambda=\frac{R\Omega_{RPM}\pi/30}{V}`
Parameters
----------
data : ndarray
ojf_db.select output
"""
data_headers = {'wind':0, 'RPM':1, 'dc':2, 'volt':3, 'amp':4, 'FA':5,
'SS':6, 'yaw':7, 'power':8, 'temp':9, 'B2 root':10,
'B2 30':11, 'B1 root':12, 'B1 30':13, 'static_p':14}
irpm = data_headers['RPM']
iwind = data_headers['wind']
R = ojf_post.model.blade_radius
return R*data[irpm,:]*np.pi/(data[iwind,:]*30.0)
def select(self, months, include, exclude, valuedict={}, verbose=True,
runs_inc=[], values_std={}):
"""
Make an array holding wind, rpm and dc values for each entry in the
database, filtered with the search terms occuring in case name
Note that the verbose plotting does not show the final merged dc
column. It shows data available from the dspace field and the dc
obtained from the case name.
This method allows to search and select the database based on only
a few values of the results: wind, RPM, dc, volt and amp.
The operator among the search criteria then the following logic
needs to be evaluate to True:
months and include and runs_inc and not exclude
Parameters
----------
months : list
Allowable items are '02' and/or '04'
include : list
list of strings with keywords that have to be included in the
case name. Operator is AND
exclude : list
list of strings with keywords that have to be excluded in the
case name. Operator is OR
valuedict : dict, default={}
In- or exclude any mean values in the statistics file. Allowable
entries on the keys are wind, RPM, dc, volt, amp, FA, SS, yaw,
power, temp, B2 root, B2 30, B1 root, or B1 30. If the value is a
list, it indicates the upper and lower bounds of the allowed
interval. Lower bound is inclusive, upper bound exclusive.
runs_inc : list or set, default=[]
Run number id's of that need to be included. Operator is OR. The
list should be populated with strings, and not integers. Note that
some run id's contain characters, such as 358b for instance.
Note that sets is faster than a list.
values_std : dict, default={}
Same as valuedict, but now selection based on the standard
deviation. Both valuedict and values_std have to evaluate True
if the case needs to be accepted.
Returns
-------
data : ndarray(14,n)
Holding wind, RPM, dc, volt, amp, FA, SS, yaw, power, temp,
B2 root, B2 30, B1 root, and B1 30. DC is set to -1 if no data is
available. The Duty Cycle has been constructed from the dc dspace
field or the one mentioned in the case name if the former was not
available.
case_arr : ndarray(n)
Case names corresponding to the data in the data array
data_headers : dict
Column headers for the data array.
{'wind':0, 'RPM':1, 'dc':2, 'volt':3, 'amp':4, 'FA':5,
'SS':6, 'yaw':7, 'power':8, 'temp':9, 'B2 root':10,
'B2 30':11, 'B1 root':12, 'B1 30':13}
"""
def get_data(statval, statpar='mean'):
"""
Get that statistcal data from one single OJF measurements
Parameters
----------
statval : dict
A dictionary holding the statistics for that case
statpar : str, default='mean'
Valid entries are max, mean, min, range, std
Returns
-------