Source code for dumpBin

#!/usr/bin/python
from optparse import OptionParser
import datetime
import numpy as np
import sys

from calibrateData import calibratePPCTime


[docs]def parseCMDOpts():
    # http://docs.python.org/library/optparse.html
    usage="usage: %prog [options] bin_files"
    description="""This script processes ML-CORK bin files"""
    epilog="""Examples:"""
    
    help_c="""Force number of paro channels"""
    help_n="""Skip stats--they can take some time and generate some clutter"""
    help_I="""Force RTC ID #. Supply id as hex integer (e.g. 0x8C). Default: 5th byte in file"""
    help_oldCORK="""Assume data is from an old ML-CORK where the readings do not end with 0x00"""
    help_d="""Remove detected spikes by inserting linear interpolation."""
    help_p="""Plots the data. May not work if you do not have the right libaries installed"""
    help_i="""Just output statistics (and plot, optionally) and not the actual data"""
    help_t="""Print calibrated timestamps on every line emulating NC logfiles, at the same time."""
    help_f="""Modify timestamp format defaults is '%Y%m%d %H:%M:%S'.
              Use -f '%Y%m%dT%H%M%S.000Z' to emulate NEPTUNE Canada log files. """
    help_b="""Safe a binary file skipping problematic records"""
    
    parser=OptionParser(usage=usage, description=description, epilog=epilog)
    parser.add_option("-I","--RTC_ID",type="int",default=None,help=help_I)
    parser.add_option("-c","--n_channels",type="int",dest='nchannels',default=None,help=help_c)
    parser.add_option("-n","--no_stats",action="store_false",dest='statistics',default=True,help=help_n)
    parser.add_option("-s","--spaces",action="store_true",dest='spaces',default=False)
    parser.add_option("-a","--print_all",action="store_true",dest='printAll',default=False)
    parser.add_option("-p","--plot_data",action="store_true",dest='doPlots',default=False,\
                                                                                help=help_p)
    parser.add_option("-i","--info_only",action="store_true",dest='info',default=False,help=help_i)
    parser.add_option("-o","--old_ML-CORK",action="store_true",dest='oldCORK',default=False,help=help_oldCORK)
    parser.add_option("-t","--timestamps",action="store_true",dest='writeTimestamp',default=False,\
                                                                                            help=help_t)
    parser.add_option("-f","--timestampFMT",type="string",dest='timestampFMT',\
                                    default='%Y-%m-%d %H:%M:%S',help=help_f)
    parser.add_option("-d","--despike",action="store_true",dest='interpSpikes', default=False,help=help_d)
    parser.add_option("-b","--binary_file",type="string",default=None,dest='binaryFile',help=help_b)
    
    # '%Y%m%d %H:%M:%S'
    # '%Y%m%dT%H%M%S.000Z' NC format
    return parser.parse_args()
    

[docs]def readBinFile(binFileName='1027C_Weekend.bin'):
    # from struct import unpack
    #binFile= open(binFileName,'rb')
    #DataStr=binFile.read()
    #binFile.close()
    #Data=array('B')
    #Data.fromstring(DataStr)
    Data= np.fromfile(file=binFileName, dtype=np.uint8)
    return Data
    
[docs]def calCoeffs(loggerID):
    pass

[docs]def recordLength(Data, loggerID=None):
    if not loggerID:
        loggerID=Data[4]
    
    IdIdx=np.where(Data == loggerID)[0]
    recLen=int(np.round(np.median(np.diff(IdIdx))))    
    return recLen
        
[docs]def getStatistics(Data, loggerID=None,do_plots=False,interp_spikes=False):
    if not loggerID:
        loggerID=Data[4]
    
    print 'RTC ID: %02X' % loggerID
    print 'Data bytes: %d' % len(Data)
    recLen=recordLength(Data,loggerID)
    print 'Bytes per record: %d' % recLen
    NRecs=len(Data)/float(recLen)
    print 'Possible records: %.2f' % NRecs
    NRecs=np.floor(NRecs)
    NParo=np.round(((recLen-9)/4))
    print 'Parosci channels: %d' % NParo
    print 'Aligned IDs: %d' % len(np.where(Data[4::recLen]==loggerID)[0])
    print 'Aligned trailing 00s: %d' % len(np.where(Data[recLen-1::recLen]==0x00)[0])
    #view=Data.view(dtype=[('date', np.int32),('id',np.int8),('Ti_1',np.int8),('Ti_2',np.int16),('Paros',np.int32,NParo),('Zero',np.int8)])
    #vData=Data.view(dtype=[('date', 'i1', (1,28))])
    #np.reshape(Data,(NRecs,-1))
    #print NRecs*recLen
    
    #print Data.shape
    #print Data.dtype
    
    vData=Data[0:recLen*NRecs].view()
    vData.dtype=[('Time','>u4'),('IDTemp','>u4'),('Data','>u4',(1,NParo)),('Zeros','>u1')]
    #vData.dtype=[('rec',('date', np.int32,(1,7)),('Zeros',np.int8))]
    #vData.dtype=[('date', '>u1',(1,16)),('date2', '>u1',(1,1))]
    #print vData.shape
    
    print 'Time of first alinged reading: %s' % calibratePPCTime(vData['Time'][0]).strftime(options.timestampFMT) 
    print 'Time of last  alinged reading: %s' % calibratePPCTime(vData['Time'][-1]).strftime(options.timestampFMT) 
    
    #print NParo*'%X ' % tuple(vData['Data'][1][0].tolist())
    #print '%d' % len(vData)
    
    NZeros=len(np.where(vData['Data']==0)[0])
    
    print "%d (%f %%) Paro readings are zero" % (NZeros, 100*NZeros/(NRecs*NParo))
    
    Dt=np.diff(vData['Time'])
    try:
        Dts, idxs =np.unique(Dt, return_inverse=True)
        print "Time differences\n Dt ---      #"
        for i in range(0,len(Dts)):
            print '%3d --- %7d' % (Dts[i], len(np.where(idxs==i)[0]))
    except:
        print 'Old vergsion of numpy, cannot do gap analysis...!'
    
    # --- remove ID from internal temperatures ----     
    Ti=-2.95083e-006 * np.bitwise_and(vData['IDTemp'],0x00FFFFFF) + 40.0678
    Freqs=vData['Data'].reshape((NRecs,-1))
    
    # Create sample spikes
    #Freqs[1][0]=0
    #Freqs[2][0]=0
    #Freqs[3][0]=0
    #Freqs[1][4]=0
    
    print Freqs.shape
    print Freqs
    for SensNo in range(Freqs.shape[1]):
        print '=== Sensor %d ===' % SensNo
        i=1
        spikeDetect=np.where(np.abs(np.diff(Freqs[0:,SensNo]/1e9,axis=0))>0.01)
        # print spikeDetect
        while i< len(spikeDetect[0]):
            if (spikeDetect[0][i]-spikeDetect[0][i-1])<=3: 
                # Spike can be three samples long, at most...
                if Freqs[spikeDetect[0][i]][SensNo] == 0:
                    print 'Zero spike!'
                else:
                    print 'Spike!!!'
                    # Has to be loop!!! spikeDetect[0][i-1]+1:spikeDetect[0][i]
                    # Freqs[spikeDetect[0][i-1]+1][SensNo]=0
                spikeStart=spikeDetect[0][i-1]+1
                spikeEnd=spikeDetect[0][i]
                spikeRange=range(spikeStart,spikeEnd+1)
                spikeFill=np.interp(spikeRange,[spikeStart-1, spikeEnd+1],[Freqs[spikeStart-1,SensNo],Freqs[spikeEnd+1,SensNo]])
                print spikeRange, spikeFill
                if interp_spikes:
                    Freqs[spikeRange,SensNo]=spikeFill
                i+=2
            else:
                # No spike
                print 'No Spike...'
                i+=1
    #print np.float(Freqs)    
    spikes=np.where(np.abs(np.diff(Freqs/1e9,axis=0))>0.01)
    
    NFreqs=Freqs.copy()
    print NFreqs[0:5]
    print NFreqs.shape
    for i in range(0,NParo):
        NFreqs[...,:,i]=Freqs[...,:,i]-np.median(Freqs[...,:,i])
        print np.median(Freqs[...,:,i])
    
    if do_plots:
        try:
            import matplotlib.pyplot as plt
            from matplotlib.dates import AutoDateLocator, AutoDateFormatter
            #print '%X' % Ti[0]
            ax1=plt.subplot(311)
            t=[calibratePPCTime(Secs) for Secs in vData['Time']]
            td=(vData['Time']-vData['Time'][0])/86400.0
            #quit()
            #plt.plot_date(t, Ti, fmt='bo',xdate=True,linestyle='-',marker='None')
            #dateLoc=AutoDateLocator()
            #ax1.xaxis.set_major_locator(dateLoc)
            #ax1.xaxis.set_major_formatter(AutoDateFormatter(dateLoc))
            plt.plot(td,Ti)
            #plt.plot(t,Ti,label='Ti')
            plt.ylabel('T int')
            
            plt.subplot(312, sharex=ax1)
            plt.plot(td,Freqs,label=' ')
            plt.legend()
            #plt.plot(td,np.abs(np.diff(Freqs/1e9,axis=0)),label='Fre')
            #(Freqs+4294967296)*4.656612873e-9
            #-2206988218
            #plt.plot(t,NFreqs-2206988218,label='Frequs')
            
            plt.subplot(313, sharex=ax1) #
            #plt.plot(td[1:],np.abs(np.diff(Freqs/1e9,axis=0)))
            plt.plot(td[spikes[0]],spikes[1]+1,'*',label='spike')
            plt.plot(td[spikes[0]],spikes[1]+1,'+',label='zero')
            plt.grid('on')
            plt.ylabel('Freq channel')
            plt.xlabel('Days since %s' % t[0].strftime(options.timestampFMT))
            plt.legend()
            plt.show()
        except:
            print 'Could not do plot, you probably have to install matplotlib!!!'
            print 'Error: ',  sys.exc_info()


[docs]def stripTrash(Data):
    loggerID=0x5c
    IdIdx=np.where(Data == loggerID)[0]
    
    
if __name__=='__main__':
    (options, args) = parseCMDOpts()
    
    if args:
        Data=readBinFile(args[0])
    else:
        Data=readBinFile()
    
    if options.statistics:
        getStatistics(Data,do_plots=options.doPlots,interp_spikes=options.interpSpikes)
    
    if options.info:
        # Don't return the actual data and quit right here
        quit()
    
    if options.binaryFile:
        binFile=open(options.binaryFile,'wb')
    
    NBytes=len(Data)
    print NBytes
    print type(Data)
    np.set_printoptions(threshold=10000)
    
    loggerID=options.RTC_ID    
    if not loggerID:
        loggerID=Data[4]
        
    if not options.nchannels:    
        recLen=recordLength(Data,loggerID=loggerID)
    else:
        recLen=8+4*options.nchannels
        if not options.oldCORK:
            recLen += 1
    
    print recLen
#    recType=np.dtype([('t',np.uint32),('ID_Ti',np.uint32),('Freqs',(np.uint32,(1,(recLen-9)/4))),('trailer',np.uint8)]);
#    print recType
#    a=np.arange(2,10)
#    print a.dtype
#    newRec=np.array(Data[0:29])
#    newRec.dtype=recType
    #np.append(newRec,Data[0:29])
    #,dtype=recType
#    print newRec
#    quit()

    IdIdx=np.where(Data == loggerID)[0]
    
    # Patch the data with a few records of logger IDs at the to simplify consistency checking
    Data=np.concatenate((Data,loggerID*np.ones(3*recLen, dtype=Data.dtype)),axis=1)
    #IdxGood=np.where(np.logical_and(((Data[IdIdx[0:-2]]-Data[IdIdx[0:-2]+recLen]) == 0), (Data[IdIdx[0:-2]+recLen-5]==0)))[0]
    if options.oldCORK:
        IdxGood=np.where(((Data[IdIdx]-Data[IdIdx+recLen]) == 0))[0]
    else:
        IdxGood=np.where(np.logical_and(((Data[IdIdx]-Data[IdIdx+recLen]) == 0), (Data[IdIdx+recLen-5]==0)))[0]
        
    #IdxGood=np.where(Data[IdIdx[0:-2]+recLen-5]==0)[0]
    IdIdx=IdIdx[IdxGood]-4
    lastIdx=-recLen
    recordErrors=0
    goodRecords=0
    LastTime=0
    print IdIdx.flat[0]
    print len(IdIdx.flat)
    for idx in IdIdx.flat:
        dIdx=(idx-lastIdx)
        if dIdx < recLen:
            # assuming match by accident
            continue
            
        CurrTime=int(4*'%02x' % tuple(Data[idx:idx+4].tolist()),16)
        if CurrTime < LastTime:
            print "-> Time Problem"
        LastTime=CurrTime
        
        if options.writeTimestamp:
            sys.stdout.write('%s ' % calibratePPCTime(CurrTime).strftime(options.timestampFMT))
            # '%Y%m%d %H:%M:%S'
            # '%Y%m%dT%H%M%S.000Z' NC format
            
        if not (dIdx == recLen):
            recordErrors += 1
            if options.printAll:
                print '=================== %d =======================' % dIdx
                if not options.writeTimestamp:
                    print calibratePPCTime(CurrTime).strftime(options.timestampFMT)
          
                if lastIdx<0:
                    # Required if the bin file starts with garbage
                    lastIdx=0
                
                garbage=tuple(Data[lastIdx:idx].tolist())
                print len(garbage)*'%02x' % garbage
                print '--------------------------------------------'
        if options.oldCORK:
            zeroFmt=''
        else:
            zeroFmt=r'%02X'
        if not options.spaces:
            print (((recLen)/4*'%02X%02X%02X%02X')+zeroFmt) \
                       % tuple(Data[idx:idx+recLen].tolist())
        else:
            print ( '%02X%02X%02X%02X %02X %02X%02X%02X '+  \
                   ((recLen-8)/4*'%02X%02X%02X%02X ')+zeroFmt) \
                     % tuple(Data[idx:idx+recLen].tolist())
        if options.binaryFile:
            #Data[idx:idx+recLen].tofile(binFile)
            #if calibratePPCTime(CurrTime).second == 0: # only return data that is sampled on the minute
            binFile.write(Data[idx:idx+recLen].tostring(order=None))
            
        goodRecords += 1
        if goodRecords == 1:
            firstTime=CurrTime
            
        lastIdx=idx
    
    print "Possible Records: %.1f" % (NBytes/float(recLen))
    print "Good Records:     %d   (%s -> %s)" % (goodRecords,
                        calibratePPCTime(firstTime).strftime(options.timestampFMT),
                        calibratePPCTime(CurrTime).strftime(options.timestampFMT))
    
    print "%d unused bytes (%.1f records) at the end" % (NBytes-(lastIdx+recLen), (NBytes-(lastIdx+recLen))/float(recLen))
    print "Record errors: %d" % recordErrors
    
    if options.binaryFile:
        binFile.close()
    
# Change type of an array        
#    b.dtype=np.dtype([('a',np.int16),('b',np.int16),('c',np.int32)])
Navigation

Quick search

Source code for dumpBin

Navigation