Author: mdoray Date: 2012-11-05 15:39:51 +0100 (Mon, 05 Nov 2012) New Revision: 772 Url: http://forge.codelutin.com/repositories/revision/echobase/772 Log: Modified: R-EchoBase/R-echobaseLegacyCheck.r R-EchoBase/R-echobase_functions.r Modified: R-EchoBase/R-echobaseLegacyCheck.r =================================================================== --- R-EchoBase/R-echobaseLegacyCheck.r 2012-10-30 14:43:55 UTC (rev 771) +++ R-EchoBase/R-echobaseLegacyCheck.r 2012-11-05 14:39:51 UTC (rev 772) @@ -119,27 +119,13 @@ Voyages <- dbGetQuery(con,sqlVoyages) head(Voyages) -#Import all esdu cells +#Import all transits #-------------------------- -sqlCellsIndirect="SELECT voyage.name AS voyage_name, vessel.name AS vessel_name, -acousticinstrument.transceivermodel, acousticinstrument.frequency, -dataprocessing.processingtemplate, dataprocessing.processingdescription, -dataprocessing.sounderconstant, dataprocessing.eithresholdlow, -dataprocessing.acousticdensityunit, cell.name AS cell_name, -celltype.name AS celltype, celltype.id AS cell_id, cell.topiaid, -cell.cell FROM public.voyage AS voyage, public.mission AS mission, -public.transit AS transit, public.transect AS transect, -public.dataacquisition AS dataacquisition, -public.dataprocessing AS dataprocessing, public.cell AS cell, -public.vessel AS vessel, public.acousticinstrument AS acousticinstrument, -public.celltype AS celltype WHERE voyage.mission = mission.topiaid AND transit.voyage = voyage.topiaid AND transect.transit = transit.topiaid AND dataacquisition.transect = transect.topiaid AND dataprocessing.dataacquisition = dataacquisition.topiaid AND cell.dataprocessing = dataprocessing.topiaid AND transect.vessel = vessel.topiaid AND dataacquisition.acousticinstrument = acousticinstrument.topiaid AND cell.celltype = celltype.topiaid AND celltype.id = 'Esdu'" +sqlTransits="SELECT transit.startTime,transit.endTime,voyage.name, voyage.startdate, voyage.enddate FROM +public.voyage AS voyage, public.mission AS mission, public.transit AS transit +WHERE voyage.mission = mission.topiaid AND transit.voyage = voyage.topiaid" +Transits <- dbGetQuery(con,sqlTransits) -EsduCellsIndirect <- dbGetQuery(con,sqlCellsIndirect) -head(EsduCellsIndirect) -(NesduCells=dim(EsduCellsIndirect)[1]) -table(EsduCellsIndirect$voyage_name) -#EsduCellsIndirect[EsduCellsIndirect$voyage_name=='PELGAS2010','cell_name'] - #Import all data acquisition/processing #-------------------------- sqlDataAcqProc="SELECT voyage.name, dataprocessing.processingtemplate, dataprocessing.digitthreshold, dataprocessing.eithresholdlow, dataprocessing.eithresholdhigh, dataprocessing.transceiverprocessinggain, dataprocessing.processingsoftwareversion, dataprocessing.transceiverprocessingsacorrection, dataprocessing.transceiverprocessingabsorption, dataprocessing.transceiverprocessingabsorptiondescription, dataprocessing.transducerprocessingbeamangleathwartship, dataprocessing.transducerprocessingpsi, dataprocessing.transducerprocessingbeamanglealongship FROM public.voyage AS voyage, public.mission AS mission, public.transit AS transit, public.transect AS transect, public.dataacquisition AS dataacquisition, public.dataprocessing AS dataprocessing WHERE voyage.mission = mission.topiaid AND transit.voyage = voyage.topiaid AND transect.transit = transit.topiaid AND dataacquisition.transect = transect.topiaid AND dataprocessing.dataacquisition = dataacquisition.topiaid" @@ -169,7 +155,14 @@ EsduCells <- dbGetQuery(con,sqlEsduCells) head(EsduCells) dim(EsduCells) +# 33942 esdus vs. 34524 esdus in original baracouda, minus 577 bad esdus = 34847 valid esdus +#duplicated Esdu names? length(unique(EsduCells$cell_name)) +desdus=EsduCells$cell_name[duplicated(EsduCells$cell_name)] +desdus.year=substr(desdus,1,4) +table(desdus.year) +length(desdus) +# 630 duplicated esdus #Import all esdu cells data #-------------------------- @@ -183,32 +176,36 @@ public.mission AS mission, public.transit AS transit, public.transect AS transect, public.dataacquisition AS dataacquisition, public.dataprocessing AS dataprocessing, public.cell AS cell, public.vessel AS vessel, public.acousticinstrument AS acousticinstrument, public.celltype AS celltype, public.data AS data, public.datametadata AS datametadata WHERE voyage.mission = mission.topiaid AND transit.voyage = voyage.topiaid AND transect.transit = transit.topiaid AND dataacquisition.transect = transect.topiaid AND dataprocessing.dataacquisition = dataacquisition.topiaid AND cell.dataprocessing = dataprocessing.topiaid AND transect.vessel = vessel.topiaid AND dataacquisition.acousticinstrument = acousticinstrument.topiaid AND cell.celltype = celltype.topiaid AND data.cell = cell.topiaid AND data.datametadata = datametadata.topiaid AND celltype.id = 'Esdu'" EsduCellsData <- dbGetQuery(con,sqlEsduCellsData) head(EsduCellsData) +dim(EsduCellsData) length(unique(EsduCellsData$cell_name)) - -#same as esdu no. +# 33312 esdus with data +length(unique(EsduCellsData$cell_name)) +# 33312 unique esdus with data unique(EsduCellsData$cell_type) +#date time conversion stesdus=EsduCellsData[EsduCellsData$data_type=='TimeStart','data_value'] head(stesdus) -#date time conversion -#turn off locale-specific days of week for date import -Sys.setlocale("LC_TIME", "C") -tesdus <- strptime(paste(substr(stesdus,1,10),substr(stesdus,26,29), - substr(stesdus,12,19)),format="%a %b %d %Y %H:%M:%S") -tesdus <- strptime(paste(substr(stesdus,1,19)),format="%Y-%m-%d %H:%M:%S") -class(tesdus) -plot(tesdus,EsduCellsData[EsduCellsData$data_type=='TimeStart','data_value']) +#turn off locale-specific days of week for date import: deprecated +#Sys.setlocale("LC_TIME", "C") +# tesdus <- strptime(paste(substr(stesdus,1,10),substr(stesdus,26,29), +# substr(stesdus,12,19)),format="%a %b %d %Y %H:%M:%S") +tesdus <- strptime(stesdus,format="%Y-%m-%d %H:%M:%OS") + +#esdus in wide format esdus=cells4humans(df=EsduCellsData,idvar='cell_name') head(esdus) dim(esdus) esdus$esdu.id=paste(esdus$voyage_name,esdus$LongitudeStart,esdus$LatitudeStart) +esdus$t <- strptime(esdus$TimeStart,format="%Y-%m-%d %H:%M:%OS") +# No. of esdus per survey +esdus.survey=table(esdus$voyage_name) (NesduCellsData=dim(EsduCellsData)[1]) -#duplicated cell names? yes, for same PM/AM hour... +# No duplicated cell names with data unique(table(EsduCellsData$cell_name)) duplicated.EsduCellData=names(table(EsduCellsData$cell_name))[table(EsduCellsData$cell_name)==10] decd=EsduCellsData[EsduCellsData$cell_name%in%duplicated.EsduCellData,] -decd[decd$cell_name=='1983-04-24 07:03:00Z',] #import baracouda EI data path='~/.gvfs/q sur nantes/Projects/Acoustic biomass assessment/Methods/Database/Echobase/Baracouda2Echobase/RequetesAcoustics/resultats/VoyageEchotype.txt' @@ -222,24 +219,73 @@ df=EI.bara$LGDMC,xname="LGDMC",asNewColumn=FALSE) EI.bara$LATDMC=correct.positions( df=EI.bara$LATDMC,yname="LATDMC",asNewColumn=FALSE) - + +#import baracouda esdus data +#-------------------------------- +path.esdubara='~/.gvfs/donnees2 sur nantes/Campagnes/bd/Echobase/EchobaseLegacyCheck/BaracoudaExports/EI_ESU.txt' +esdus.bara2=read.table(path.esdubara,sep=';',header=TRUE) +head(esdus.bara2) +dim(esdus.bara2) +esdus.bara2$tesdus <- strptime(esdus.bara2$DHTU_ESDU,format="%d/%m/%Y %H:%M:%S") +# 35524 esdus in baracouda +esdus.bara2$DHTU_ESDU[duplicated(esdus.bara2$DHTU_ESDU)] +#no duplicated esdus + esdus.bara=EI.bara[EI.bara$LIBELLE_TRANCHE=='TOTAL',] esdus.bara$esdu.id=paste(esdus.bara$CAMPAGNE,esdus.bara$LGDMC,esdus.bara$LATDMC) -head(esdus.bara) -#date time conversion -baratime=esdus.bara$DHTU_ESDU +dim(esdus.bara) +# 26448 esdus with EI data in baracouda -bara.tesdus <- strptime(baratime,format="%d/%m/%Y %H:%M:%S") +dfs=esdus.bara[esdus.bara$CAMPAGNE=='PELGAS2004',] + +# Summary no. of esdus per cruise +#--------------------------------- +names(esdus.bara2) +tbaraEsdus=table(substr(esdus.bara2$DHTU_ESDU,7,10)) +tbaraEI=table(esdus.bara$CAMPAGNE) +esdus.survey=table(esdus$voyage_name) +tesdus2=esdus.survey[names(esdus.survey)%in%names(tbara)] +esdus.survey.comp=cbind(tesdus2,tbaraEI) +#duplicated esdus +esdus.bara$DHTU_ESDU[duplicated(esdus.bara$DHTU_ESDU)] + +esdus.bara$tesdus <- strptime(esdus.bara$DHTU_ESDU,format="%d/%m/%Y %H:%M:%S") table(is.element(bara.tesdus,tesdus)) -missing.esdus=esdus.bara[!is.element(bara.tesdus,tesdus),] + +#Missing esdus in Echobase +#---------------------------- +plot(esdus.bara2$tesdus,esdus.bara2$TOTAL) +points(esdus$t,esdus$NASC,pch=16,col=2) +missing.esdusEI1=esdus.bara[!is.element(esdus.bara$tesdus,tesdus),] + +dim(missing.esdusEI1) +names(esdus.bara) +esdus.bara$year=substr(esdus.bara[,"DHTU_ESDU"],7,10) +esdus.bara[,"DHTU_ESDU"][duplicated(esdus.bara[,"DHTU_ESDU"])] +missing.esdusEI2=esdus.bara[!is.element(esdus.bara$tesdus,substr(esdus$cell_name,1,19)),] +dim(missing.esdusEI2) +missing.esdus2=esdus.bara2[!is.element(esdus.bara2$tesdus,substr(esdus$cell_name,1,19)),] +dim(missing.esdus2) dim(esdus.bara) dim(missing.esdus) head(missing.esdus) path.missing='/home/mathieubuntu/Documents/Echobase/EchobaseLegacyCheck/BaracoudaExports/missingEsdus.txt' write.table(missing.esdus,path.missing,sep=';',row.names=FALSE) +dfs=esdus[esdus$voyage_name=='PELGAS2004',] +names(dfs) +dim(dfs) +dfs2=esdus.bara[esdus.bara$CAMPAGNE=='PELGAS2004',] +dim(dfs2) +names(dfs2) +dfs3=dfs2[!dfs2$tesdus%in%substr(dfs$cell_name,1,19),] +dim(dfs3) +plot(dfs2$LGDMC,dfs2$LATDMC) +points(dfs3$LGDMC,dfs3$LATDMC,pch=16) +names(esdus.bara$DHTU_ESDU) + x11() plot(esdus.bara$LGDMC,esdus.bara$LATDMC,pch=16) points(missing.esdus$LGDMC,missing.esdus$LATDMC,pch=16,col=2) @@ -279,7 +325,7 @@ dfs=missing.esdus[substr(missing.esdus$DHTU_ESDU,4,5)%in%'07',] dim(dfs) dfs=esdus.bara[substr(esdus.bara$DHTU_ESDU,4,5)%in%'07',] -#***EI data from July in Pelgas2008 and 2009... removed*** +#***577 EI data from July in Pelgas2008 and 2009... removed*** missing.esdus2=missing.esdus[!substr(missing.esdus$DHTU_ESDU,4,5)%in%'07',] plot(dfs$LGDMC,dfs$LATDMC) coast() @@ -383,6 +429,20 @@ Mcsv.base=Mcsv.base[,names(Mcsv.base)!='echo'] names(Mcsv.base) +#Check for missing esdus +table(substr(Mcsv.base[,"MOVIES_EILayer\\cellset\\datestart"],7,10)) +dft=Mcsv.base +dft$year=substr(dft[,"MOVIES_EILayer\\cellset\\datestart"],7,10) +dftu=unique(dft[,c("MOVIES_EILayer\\cellset\\datestart","year")]) +correct1=table(dftu$year)[-1] +esdus.survey.comps=data.frame(esdus.survey.comp[-seq(8),]) +esdus.survey.comps=cbind(esdus.survey.comps,correct1) +esdus.survey.comps$missing=esdus.survey.comps[,2]-esdus.survey.comps[,1] +sum(esdus.survey.comps$missing) + +es=Mcsv.base[substr(Mcsv.base[,"MOVIES_EILayer\\cellset\\datestart"],7,10)=='2004',] +length(unique(es[,"MOVIES_EILayer\\cellset\\datestart"])) + #check export file #----------------------- #All esdus cells Modified: R-EchoBase/R-echobase_functions.r =================================================================== --- R-EchoBase/R-echobase_functions.r 2012-10-30 14:43:55 UTC (rev 771) +++ R-EchoBase/R-echobase_functions.r 2012-11-05 14:39:51 UTC (rev 772) @@ -3,7 +3,8 @@ cells4humans=function(df,v.names="data_value",idvar="EIcell_id",correcXY=FALSE, timevar="data_type",snames=c("voyage_name","esdu_name", - "cell_type","cell_name","esdu_data_type",idvar)){ + "cell_type","cell_name","esdu_data_type",idvar, + "TimeStart","TimeEnd")){ #Cells in wide format #----------------------------------------- df.wide=reshape(df,v.names =v.names , idvar = idvar,
participants (1)
-
mdoray@users.forge.codelutin.com