Source code for dumpBin

#!/usr/bin/python
from optparse import OptionParser
import datetime
import numpy as np
import sys

from calibrateData import calibratePPCTime


[docs]def parseCMDOpts(): # http://docs.python.org/library/optparse.html usage="usage: %prog [options] bin_files" description="""This script processes ML-CORK bin files""" epilog="""Examples:""" help_c="""Force number of paro channels""" help_n="""Skip stats--they can take some time and generate some clutter""" help_I="""Force RTC ID #. Supply id as hex integer (e.g. 0x8C). Default: 5th byte in file""" help_oldCORK="""Assume data is from an old ML-CORK where the readings do not end with 0x00""" help_d="""Remove detected spikes by inserting linear interpolation.""" help_p="""Plots the data. May not work if you do not have the right libaries installed""" help_i="""Just output statistics (and plot, optionally) and not the actual data""" help_t="""Print calibrated timestamps on every line emulating NC logfiles, at the same time.""" help_f="""Modify timestamp format defaults is '%Y%m%d %H:%M:%S'. Use -f '%Y%m%dT%H%M%S.000Z' to emulate NEPTUNE Canada log files. """ help_b="""Safe a binary file skipping problematic records""" parser=OptionParser(usage=usage, description=description, epilog=epilog) parser.add_option("-I","--RTC_ID",type="int",default=None,help=help_I) parser.add_option("-c","--n_channels",type="int",dest='nchannels',default=None,help=help_c) parser.add_option("-n","--no_stats",action="store_false",dest='statistics',default=True,help=help_n) parser.add_option("-s","--spaces",action="store_true",dest='spaces',default=False) parser.add_option("-a","--print_all",action="store_true",dest='printAll',default=False) parser.add_option("-p","--plot_data",action="store_true",dest='doPlots',default=False,\ help=help_p) parser.add_option("-i","--info_only",action="store_true",dest='info',default=False,help=help_i) parser.add_option("-o","--old_ML-CORK",action="store_true",dest='oldCORK',default=False,help=help_oldCORK) parser.add_option("-t","--timestamps",action="store_true",dest='writeTimestamp',default=False,\ help=help_t) parser.add_option("-f","--timestampFMT",type="string",dest='timestampFMT',\ default='%Y-%m-%d %H:%M:%S',help=help_f) parser.add_option("-d","--despike",action="store_true",dest='interpSpikes', default=False,help=help_d) parser.add_option("-b","--binary_file",type="string",default=None,dest='binaryFile',help=help_b) # '%Y%m%d %H:%M:%S' # '%Y%m%dT%H%M%S.000Z' NC format return parser.parse_args()
[docs]def readBinFile(binFileName='1027C_Weekend.bin'): # from struct import unpack #binFile= open(binFileName,'rb') #DataStr=binFile.read() #binFile.close() #Data=array('B') #Data.fromstring(DataStr) Data= np.fromfile(file=binFileName, dtype=np.uint8) return Data
[docs]def calCoeffs(loggerID): pass
[docs]def recordLength(Data, loggerID=None): if not loggerID: loggerID=Data[4] IdIdx=np.where(Data == loggerID)[0] recLen=int(np.round(np.median(np.diff(IdIdx)))) return recLen
[docs]def getStatistics(Data, loggerID=None,do_plots=False,interp_spikes=False): if not loggerID: loggerID=Data[4] print 'RTC ID: %02X' % loggerID print 'Data bytes: %d' % len(Data) recLen=recordLength(Data,loggerID) print 'Bytes per record: %d' % recLen NRecs=len(Data)/float(recLen) print 'Possible records: %.2f' % NRecs NRecs=np.floor(NRecs) NParo=np.round(((recLen-9)/4)) print 'Parosci channels: %d' % NParo print 'Aligned IDs: %d' % len(np.where(Data[4::recLen]==loggerID)[0]) print 'Aligned trailing 00s: %d' % len(np.where(Data[recLen-1::recLen]==0x00)[0]) #view=Data.view(dtype=[('date', np.int32),('id',np.int8),('Ti_1',np.int8),('Ti_2',np.int16),('Paros',np.int32,NParo),('Zero',np.int8)]) #vData=Data.view(dtype=[('date', 'i1', (1,28))]) #np.reshape(Data,(NRecs,-1)) #print NRecs*recLen #print Data.shape #print Data.dtype vData=Data[0:recLen*NRecs].view() vData.dtype=[('Time','>u4'),('IDTemp','>u4'),('Data','>u4',(1,NParo)),('Zeros','>u1')] #vData.dtype=[('rec',('date', np.int32,(1,7)),('Zeros',np.int8))] #vData.dtype=[('date', '>u1',(1,16)),('date2', '>u1',(1,1))] #print vData.shape print 'Time of first alinged reading: %s' % calibratePPCTime(vData['Time'][0]).strftime(options.timestampFMT) print 'Time of last alinged reading: %s' % calibratePPCTime(vData['Time'][-1]).strftime(options.timestampFMT) #print NParo*'%X ' % tuple(vData['Data'][1][0].tolist()) #print '%d' % len(vData) NZeros=len(np.where(vData['Data']==0)[0]) print "%d (%f %%) Paro readings are zero" % (NZeros, 100*NZeros/(NRecs*NParo)) Dt=np.diff(vData['Time']) try: Dts, idxs =np.unique(Dt, return_inverse=True) print "Time differences\n Dt --- #" for i in range(0,len(Dts)): print '%3d --- %7d' % (Dts[i], len(np.where(idxs==i)[0])) except: print 'Old vergsion of numpy, cannot do gap analysis...!' # --- remove ID from internal temperatures ---- Ti=-2.95083e-006 * np.bitwise_and(vData['IDTemp'],0x00FFFFFF) + 40.0678 Freqs=vData['Data'].reshape((NRecs,-1)) # Create sample spikes #Freqs[1][0]=0 #Freqs[2][0]=0 #Freqs[3][0]=0 #Freqs[1][4]=0 print Freqs.shape print Freqs for SensNo in range(Freqs.shape[1]): print '=== Sensor %d ===' % SensNo i=1 spikeDetect=np.where(np.abs(np.diff(Freqs[0:,SensNo]/1e9,axis=0))>0.01) # print spikeDetect while i< len(spikeDetect[0]): if (spikeDetect[0][i]-spikeDetect[0][i-1])<=3: # Spike can be three samples long, at most... if Freqs[spikeDetect[0][i]][SensNo] == 0: print 'Zero spike!' else: print 'Spike!!!' # Has to be loop!!! spikeDetect[0][i-1]+1:spikeDetect[0][i] # Freqs[spikeDetect[0][i-1]+1][SensNo]=0 spikeStart=spikeDetect[0][i-1]+1 spikeEnd=spikeDetect[0][i] spikeRange=range(spikeStart,spikeEnd+1) spikeFill=np.interp(spikeRange,[spikeStart-1, spikeEnd+1],[Freqs[spikeStart-1,SensNo],Freqs[spikeEnd+1,SensNo]]) print spikeRange, spikeFill if interp_spikes: Freqs[spikeRange,SensNo]=spikeFill i+=2 else: # No spike print 'No Spike...' i+=1 #print np.float(Freqs) spikes=np.where(np.abs(np.diff(Freqs/1e9,axis=0))>0.01) NFreqs=Freqs.copy() print NFreqs[0:5] print NFreqs.shape for i in range(0,NParo): NFreqs[...,:,i]=Freqs[...,:,i]-np.median(Freqs[...,:,i]) print np.median(Freqs[...,:,i]) if do_plots: try: import matplotlib.pyplot as plt from matplotlib.dates import AutoDateLocator, AutoDateFormatter #print '%X' % Ti[0] ax1=plt.subplot(311) t=[calibratePPCTime(Secs) for Secs in vData['Time']] td=(vData['Time']-vData['Time'][0])/86400.0 #quit() #plt.plot_date(t, Ti, fmt='bo',xdate=True,linestyle='-',marker='None') #dateLoc=AutoDateLocator() #ax1.xaxis.set_major_locator(dateLoc) #ax1.xaxis.set_major_formatter(AutoDateFormatter(dateLoc)) plt.plot(td,Ti) #plt.plot(t,Ti,label='Ti') plt.ylabel('T int') plt.subplot(312, sharex=ax1) plt.plot(td,Freqs,label=' ') plt.legend() #plt.plot(td,np.abs(np.diff(Freqs/1e9,axis=0)),label='Fre') #(Freqs+4294967296)*4.656612873e-9 #-2206988218 #plt.plot(t,NFreqs-2206988218,label='Frequs') plt.subplot(313, sharex=ax1) # #plt.plot(td[1:],np.abs(np.diff(Freqs/1e9,axis=0))) plt.plot(td[spikes[0]],spikes[1]+1,'*',label='spike') plt.plot(td[spikes[0]],spikes[1]+1,'+',label='zero') plt.grid('on') plt.ylabel('Freq channel') plt.xlabel('Days since %s' % t[0].strftime(options.timestampFMT)) plt.legend() plt.show() except: print 'Could not do plot, you probably have to install matplotlib!!!' print 'Error: ', sys.exc_info()
[docs]def stripTrash(Data): loggerID=0x5c IdIdx=np.where(Data == loggerID)[0]
if __name__=='__main__': (options, args) = parseCMDOpts() if args: Data=readBinFile(args[0]) else: Data=readBinFile() if options.statistics: getStatistics(Data,do_plots=options.doPlots,interp_spikes=options.interpSpikes) if options.info: # Don't return the actual data and quit right here quit() if options.binaryFile: binFile=open(options.binaryFile,'wb') NBytes=len(Data) print NBytes print type(Data) np.set_printoptions(threshold=10000) loggerID=options.RTC_ID if not loggerID: loggerID=Data[4] if not options.nchannels: recLen=recordLength(Data,loggerID=loggerID) else: recLen=8+4*options.nchannels if not options.oldCORK: recLen += 1 print recLen # recType=np.dtype([('t',np.uint32),('ID_Ti',np.uint32),('Freqs',(np.uint32,(1,(recLen-9)/4))),('trailer',np.uint8)]); # print recType # a=np.arange(2,10) # print a.dtype # newRec=np.array(Data[0:29]) # newRec.dtype=recType #np.append(newRec,Data[0:29]) #,dtype=recType # print newRec # quit() IdIdx=np.where(Data == loggerID)[0] # Patch the data with a few records of logger IDs at the to simplify consistency checking Data=np.concatenate((Data,loggerID*np.ones(3*recLen, dtype=Data.dtype)),axis=1) #IdxGood=np.where(np.logical_and(((Data[IdIdx[0:-2]]-Data[IdIdx[0:-2]+recLen]) == 0), (Data[IdIdx[0:-2]+recLen-5]==0)))[0] if options.oldCORK: IdxGood=np.where(((Data[IdIdx]-Data[IdIdx+recLen]) == 0))[0] else: IdxGood=np.where(np.logical_and(((Data[IdIdx]-Data[IdIdx+recLen]) == 0), (Data[IdIdx+recLen-5]==0)))[0] #IdxGood=np.where(Data[IdIdx[0:-2]+recLen-5]==0)[0] IdIdx=IdIdx[IdxGood]-4 lastIdx=-recLen recordErrors=0 goodRecords=0 LastTime=0 print IdIdx.flat[0] print len(IdIdx.flat) for idx in IdIdx.flat: dIdx=(idx-lastIdx) if dIdx < recLen: # assuming match by accident continue CurrTime=int(4*'%02x' % tuple(Data[idx:idx+4].tolist()),16) if CurrTime < LastTime: print "-> Time Problem" LastTime=CurrTime if options.writeTimestamp: sys.stdout.write('%s ' % calibratePPCTime(CurrTime).strftime(options.timestampFMT)) # '%Y%m%d %H:%M:%S' # '%Y%m%dT%H%M%S.000Z' NC format if not (dIdx == recLen): recordErrors += 1 if options.printAll: print '=================== %d =======================' % dIdx if not options.writeTimestamp: print calibratePPCTime(CurrTime).strftime(options.timestampFMT) if lastIdx<0: # Required if the bin file starts with garbage lastIdx=0 garbage=tuple(Data[lastIdx:idx].tolist()) print len(garbage)*'%02x' % garbage print '--------------------------------------------' if options.oldCORK: zeroFmt='' else: zeroFmt=r'%02X' if not options.spaces: print (((recLen)/4*'%02X%02X%02X%02X')+zeroFmt) \ % tuple(Data[idx:idx+recLen].tolist()) else: print ( '%02X%02X%02X%02X %02X %02X%02X%02X '+ \ ((recLen-8)/4*'%02X%02X%02X%02X ')+zeroFmt) \ % tuple(Data[idx:idx+recLen].tolist()) if options.binaryFile: #Data[idx:idx+recLen].tofile(binFile) #if calibratePPCTime(CurrTime).second == 0: # only return data that is sampled on the minute binFile.write(Data[idx:idx+recLen].tostring(order=None)) goodRecords += 1 if goodRecords == 1: firstTime=CurrTime lastIdx=idx print "Possible Records: %.1f" % (NBytes/float(recLen)) print "Good Records: %d (%s -> %s)" % (goodRecords, calibratePPCTime(firstTime).strftime(options.timestampFMT), calibratePPCTime(CurrTime).strftime(options.timestampFMT)) print "%d unused bytes (%.1f records) at the end" % (NBytes-(lastIdx+recLen), (NBytes-(lastIdx+recLen))/float(recLen)) print "Record errors: %d" % recordErrors if options.binaryFile: binFile.close() # Change type of an array # b.dtype=np.dtype([('a',np.int16),('b',np.int16),('c',np.int32)])