#!/bin/bash # Last modification 20130507 by Kevin Krieger # Modified from Kevin Sterne's sftp script # # This script is designed to log on to the University of Saskatchewan # SuperDARN mirror using sftp in order to check for and download new # rawacf files for a specific radar. # # Call the script like so with the following arguments: # /path/to/script/sync_radar_data YYYY MM /path/to/local/data/YYYY/MM/ [RADAR] # Argument 1 is the year you wish to sync # Argument 2 is the month you wish to sync # Argument 3 is a path to a local directory you wish to sync the data to # Argument 4 is the optional radar code, omit to sync all rawacf files # # The script will check the arguments and if there are errors with the # arguments (for example, if the year is in the future, or the month is # not 1-12, or if the directory or radar don't exist) then it will fail with # an error message. # The logic is as follows: # 1) Download the hashes file from the server's year/month directory # 2) Go through the hashes file to remove any lines that don't contain the # radar rawacf files we want. Then check to see if it contains any files that # are not available locally, or that have different hashes for the radar. # 3) All the file names produced from step 2) should be downloaded from the server # and placed in the local rawacf directory. ############################################################################## # Initialize some variables ############################################################################## # What is the year requested? YEAR=$1 # What is the month requested? MONTH=$2 # What is the local rawacf data directory? LOCALDATADIR=$3 # What is the radar requested? RADAR=$4 # What hash program are we using? HASHPROG=/usr/bin/sha1sum # Date, time and other stuff STARTTIME=`date +%s` DATE_TIME=`date +%Y%m%d.%H%M` DATE_UTC=`date -u` CURDAY=`date +%d` CURHOUR=`date +%H` CURMIN=`date +%M` CURYEAR=`date +%Y` CURMONTHNAME=`date +%B` CURMONTH=`date +%m` MONTHNAME=`date --date=${YEAR}${MONTH}01 +%B` ############################################################################## # Do some error checking on the arguments ############################################################################## # Echo the date for logging purposes echo ${DATE_UTC} # Check to make sure the year is appropriate if [[ ${YEAR} -gt ${CURYEAR} ]] then echo "Error: Year is in the future. Exiting" exit fi # Check the month MONTHTOCHECK=`echo $MONTH | sed 's/^0*//'` if [[ ${MONTHTOCHECK} -lt 1 || ${MONTHTOCHECK} -gt 12 ]] then echo "Error: Month invalid. Exiting" exit fi # Check the local data directory if [ ! -d ${LOCALDATADIR} ]; then echo "Error: Local directory invalid. Exiting" exit fi # Rather than checking that the radar exists here, just let the # rest of the script passively do the error checking, since the list # of radars is likely to change in the future. ############################################################################## # Now that we know the arguments are likely correct, initialize some more # variables. ############################################################################## # What sftp program are we using? SFTP=/usr/bin/sftp # What is the username to connect with on the sftp server? USER=test2 # What is the hostname to connect to (the sftp server)? REMOTEHOST=superdarn-cssdp.usask.ca # What is the base directory name on the sftp server? REMOTEDIRBASE=/sddata/raw # What is the full directory name on the sftp server (given year and month)? REMOTEDIR=${REMOTEDIRBASE}/${YEAR}/${MONTH} # What will the hashes file name be called? HASHESFILE=${YEAR}${MONTH}.hashes ############################################################################## # Execute step 1) Download the hashes file from the server's # year/month directory # Get the hashes file that contains hashes for all the files in the remote # directory. If it doesn't exist, bail with error message. # The -p flag preserves modification times, access times, and modes. ############################################################################## ${SFTP} -p ${USER}@${REMOTEHOST}:${REMOTEDIR}/${HASHESFILE} ${LOCALDATADIR} RETURN_VALUE=$? if [[ ${RETURN_VALUE} -ne 0 ]] then echo "Error: Could not download ${HASHESFILE} from ${REMOTEHOST} as ${USER}. Exiting" exit fi ############################################################################## # Go through the hashes file, removing any lines that don't contain a file # from the specified RADAR. This will work even if the ${RADAR} argument # has been omitted from the command line - in which case only lines not # matching .rawacf.bz2 will be removed. ############################################################################## cd ${LOCALDATADIR} # Remove lines not matching the ones we want PATTERN='\.'${RADAR}'.*\.rawacf\.bz2' echo "Removing lines not matching ${PATTERN} from ${HASHESFILE}." sed -i.original "/$PATTERN/!d" ${HASHESFILE} ############################################################################## # Execute step 2) Check to see if the file contains # any files that are not available locally, or that have different hashes. # Using the hashes program, go through hashes file, determine which # files need to be downloaded and which files are different from local files. # We first need to change to the local directory where the data files # are to be sync'd to ############################################################################## HASHESCHECKFILE=${HASHESFILE}.check HASHESTIMESTART=`date +%s` # Redirect errors to /dev/null and the useful output to a ".check" file echo "Comparing local and remote hashes..." ${HASHPROG} -c --quiet ${HASHESFILE} 1> ${HASHESCHECKFILE} 2> /dev/null HASHESTIMEEND=`date +%s` ############################################################################## # Now we are ready to go through each line of the ".check" file # and see if: # 1) The files don't exist or couldn't be read: "FAILED open or read" message # 2) The files' hashes don't match the mirror's files' hashes. # 3) If not 1) or 2), then we don't know how to deal with the line # When we find these files that need to be downloaded, write them to a # file ".tosync" for syncing. ############################################################################## FILESTOSYNC=${HASHESFILE}.tosync # Empty any previous sync file first. echo -n > ${FILESTOSYNC} echo "Checking which files need to be sync'd..." cat ${HASHESCHECKFILE} | while read LINE; do RAWFILE=`echo ${LINE} | awk -F':' '{print $1}'` if [[ "${LINE}" == *"FAILED open or read" ]] then echo "${RAWFILE} not found locally, adding to ${FILESTOSYNC}" echo ${RAWFILE} >> ${FILESTOSYNC} elif [[ "${LINE}" == *"FAILED" ]] then echo "${RAWFILE} hash doesn't match, adding to ${FILESTOSYNC}" echo ${RAWFILE} >> ${FILESTOSYNC} else echo "ERROR! I don't know how to deal with this -> ${LINE}" fi done ############################################################################## # Now that we have the files required to download, let's put together # an sftp batch command file, ".batch" ############################################################################## SFTPBATCH=${HASHESFILE}.batch echo "Building sftp batch file..." # Empty any previous batch file first. echo -n > ${SFTPBATCH} # First we need to change to proper directory echo "cd ${REMOTEDIR}" >> ${SFTPBATCH} # Next we need to place get commands for each file required totalFiles=0 for ITEM in `cat ${FILESTOSYNC}` do echo "-get ${ITEM}" >> ${SFTPBATCH} totalFiles=`expr ${totalFiles} + 1` done # Finally, we exit from sftp echo "exit" >> ${SFTPBATCH} echo "Total files: ${totalFiles}" ############################################################################## # Finally, we are ready to execute the sftp batch file and download the # required files. Redirect errors to a file ".errors". Do not execute # if there were zero files that required downloading. ############################################################################## SYNCTIMESTART=`date +%s` SFTPERRORS=${HASHESFILE}.errors echo "Synchronizing files..." if [[ ${totalFiles} -gt 0 ]] then ${SFTP} -p -b ${SFTPBATCH} ${USER}@${REMOTEHOST} 2> ${SFTPERRORS} fi ############################################################################## # Print out some useful information and do any last minute cleanup. ############################################################################## ENDTIME=`date +%s` HASHESTIME=`expr ${HASHESTIMEEND} - ${HASHESTIMESTART}` SYNCTIME=`expr ${ENDTIME} - ${SYNCTIMESTART}` TOTALTIME=`expr ${ENDTIME} - ${STARTTIME}` echo "Time to compute local hashes: ${HASHESTIME} seconds" echo "Time to sync required files: ${SYNCTIME} seconds" echo "Total time to execute script: ${TOTALTIME} seconds" # Uncomment the following to remove the files generated by this script. #rm -v ${HASHESFILE} #rm -v ${SFTPBATCH} #rm -v ${SFTPERRORS} #rm -v ${HASHESCHECKFILE} #rm -v ${FILESTOSYNC} #rm -v ${HASHESFILE}.original exit