#!/bin/bash

# (c) S. Parkin 15/02/19 -- 15/04/24:  SADABS-AUTO - a SADABS automator.
#
# A 'bash' script that automates George Sheldrick's SADABS absorption correction 
# program.  SADABS-AUTO sets a few parameters and then calls a separate 'expect' 
# script, SADABS-AUTO-RUN, which in turn spawns a SADABS job. SADABS-AUTO-RUN is 
# used to bypass the usual interactive SADABS session.  SADABS-AUTO launches via 
# a command-line call like this:
#
# sadabs-auto filename even odd muR Laue weights
# 
# Where 'filename' is the filename used for the dataset (without the extension), 
# 'even' is an even integer (2,4,6,8), 'odd' is an odd integer (1,3,5,7) and muR 
# is a real (non-negative) value for equivalent muR.  'Laue' sets the Laue group 
# number for SADABS using one of Sheldrick's 14 categories.  The 'Laue' entry is 
# optional on the command line but if you use it you should try to get it right. 
# Automated SADABS will fail if you use too high a Laue class. If no 'Laue' type 
# is given then SADABS-AUTO will try to figure it out using information given in 
# a CIF (if a CIF is available).  Failing that it will ask for manual input. The 
# 'weights' input allows you to alter the SADABS weighting scheme, but will only 
# accept choices (0-6) because (7-11) require manual input and thus are not good
# for automation.
# 
# The script will also attempt to figure out a reasonable number for muR if none 
# is specified on the command line. The error trapping seems to work quite well,
# but it is probably better to enter it on the command line.
#
# NOTE:  There are lots of different versions of SADABS around.  This version of 
# SADABS-AUTO is specific to SADABS version 14.4.  Other SADABS versions require  
# changes to the companion 'expect' script, SADABS-AUTO-RUN. 

# Add a header for SADABS-AUTO to the usual SADABS header.
#
echo " "
echo " =========================================================================="
echo " SADABS-AUTO: SADABS automation by bash/expect scripts: S.Parkin 2015/02/19"
echo " --------------------------------------------------------------------------"

# The first task is to pull some information from the command line that was used 
# to start SADABS-AUTO. Currently this is the filename, the even & odd spherical 
# harmonic order, an average muR value, the Laue group number, and the weighting 
# scheme type (if suitable values were supplied). If no number for muR is given, 
# SADABS-AUTO tries to calculate a sensible value so long as a suitable CIF file 
# is present. If no weighting scheme entry is given, or if the supplied value is 
# nonsense, then the default [5] is used. See the section starting below at line 
# 215 or so.
#
filename=$1
highest_even_order=$2
highest_odd_order=$3
muR_equivalent=$4
sadabs_laue_group_number=$5
weighting_scheme=$6
refinement_cycles=$7

if [ ! $filename ]; then
  echo
  echo " No filename was given.  SADABS-AUTO is launched like this: "
  echo 
  echo " sadabs-auto filename even odd muR Laue weight"
  echo  
  echo "   'filename' is the filename used for the dataset (no extension) "
  echo "   'even' is an even integer [2,4,6,8] "
  echo "   'odd' is an odd integer [1,3,5,7] " 
  echo "   'muR' is a reasonable value for the equivalent muR "
  echo "   'Laue' sets the Laue group number for SADABS [1 - 14] "
  echo "   'weight' sets the type of weighting scheme [1 - 6] "
  echo
  echo " For the Laue group, use one of the following numbers: "
  echo
  echo "    [1] -1                         [8] -3m (rhombohedral axes)          "
  echo "    [2] 2/m (Y unique)             [9] -31m (Z unique)                  "
  echo "    [3] mmm                        [10] -3m1 (Z unique)                 "
  echo "    [4] 4/m (Z unique)             [11] 6/m (Z unique)                  "
  echo "    [5] 4/mmm (Z unique)           [12] 6/mmm (Z unique)                "
  echo "    [6] -3 (rhombohedral axes)     [13] m3                              "
  echo "    [7] -3 (Z unique)              [14] m3m                             "
  echo 
  echo " You need to at least specify a filename.  SADABS-AUTO will now quit. "
  echo " =========================================================================="
  echo
  exit
fi

# The stem of the filename is used to set the filename for the CIF [used to find 
# some information about the structure (see below)].  It is also used to set the 
# name of the file that contains all the integrated data. For kappaCCD data this
# is a file with a '.sad' suffix (created by Sheldrick's x2sad program), whereas 
# for Bruker machines it is a file with '_0m.raw' appended to the filename stem. 
# The machine_flag is set to the first letter of the filename (either 'k' or 'x' 
# for the UK X-Ray Laboratory) so that the right file (either *.sad or *_0m.raw) 
# is used for the SADABS job. Datafiles from a synchrotron (e.g. the ALS) should 
# start with the letter 's', and the assumption is that the data files come from 
# a Bruker instrument.
# 
cif_file=$filename".cif"
machine_flag=${filename:0:1}
if [ $machine_flag == "k" ]; then
  raw_file=$filename".sad"
#elif [ $machine_flag == "x" ]; then
#  raw_file=$filename"_0m.raw"
#elif [ $machine_flag == "s" ]; then
#  raw_file=$filename"_0m.raw"
else
   raw_file=$filename"_0m.raw"
fi

# Check that the Laue group number from the command line is an integer between 1 
# and 14. Write a warning about setting too high a value. If no value was found, 
# then write a different message.
#
if [[ $5 =~ ^[0-9]+$ ]] && [ "$5" -ge 1 -a "$5" -le 14 ]; then
  echo
  echo " If the Laue group number supplied on the command line is too high then the "
  echo " automated SADABS process will fail! A Laue group number of "$sadabs_laue_group_number" was supplied. "
else
  echo 
  echo " Either no Laue group number was supplied, or the value was outside of the "
  echo " acceptable range.  SADABS-AUTO will either figure it out or it will ask. "

# For fully automated use, SADABS-AUTO will either get the Laue group number off 
# the command line or it must figure it out from a SHELXL-written CIF.  If there 
# is no suitable CIF available, SADABS-AUTO asks the user to enter it. 
#
  if [ ! -f "$cif_file" ]; then
    echo
    echo " For fully automated use SADABS-AUTO requires that the 'Laue group number'"
    echo " is either given on the command line or that a suitable SHELXL-written CIF" 
    echo " is available in the current directory.  No suitable CIF was found, so you"
    echo " need to enter the Laue group number now. Please enter a number [1-14], as" 
    echo " per the following SADABS classifications: "
    echo " "
    echo " [1] -1                           [8] -3m (rhombohedral axes)          "
    echo " [2] 2/m (Y unique)               [9] -31m (Z unique)                  "
    echo " [3] mmm                          [10] -3m1 (Z unique)                 "
    echo " [4] 4/m (Z unique)               [11] 6/m (Z unique)                  "
    echo " [5] 4/mmm (Z unique)             [12] 6/mmm (Z unique)                "
    echo " [6] -3 (rhombohedral axes)       [13] m3                              "
    echo " [7] -3 (Z unique)                [14] m3m                             "
    echo 
    echo -n " Enter Laue group number: "
    read sadabs_laue_group_number  
  
    while [[ "1 2 3 4 5 6 7 8 9 10 11 12 13 14" != *"$sadabs_laue_group_number"* ]] || [[ $sadabs_laue_group_number == 0 ]]; do
      echo -n " An integer between 1 and 14 (inclusive) is required: "
      read sadabs_laue_group_number
    done
  else

# Assign a few variables. Not all of these will be needed for the final version.
# NOTE:  Those that are not used will be removed once everything works properly.  
# In the meantime, any unused variables do no harm and so can be ignored.
#
    lambda=`echo $(grep "_diffrn_radiation_wavelength" $cif_file) | cut -d' ' -f 2`
    a=`echo $(grep "_cell_length_a" $cif_file) | awk '{print $2}' | sed 's/(.*//'`
    b=`echo $(grep "_cell_length_b" $cif_file) | awk '{print $2}' | sed 's/(.*//'`
    c=`echo $(grep "_cell_length_c" $cif_file) | awk '{print $2}' | sed 's/(.*//'`
    alpha=`echo $(grep "_cell_angle_alpha" $cif_file) | awk '{print $2}' | sed 's/(.*//'`
    beta=`echo $(grep "_cell_angle_beta" $cif_file) | awk '{print $2}' | sed 's/(.*//'`
    gamma=`echo $(grep "_cell_angle_gamma" $cif_file) | awk '{print $2}' | sed 's/(.*//'`
    mu=`echo $(grep "_exptl_absorpt_coefficient_mu" $cif_file) | awk '{print $2}'`
    max_size=`echo $(grep "_exptl_crystal_size_max" $cif_file) | awk '{print $2}'`
    mid_size=`echo $(grep "_exptl_crystal_size_mid" $cif_file) | awk '{print $2}'`
    min_size=`echo $(grep "_exptl_crystal_size_min" $cif_file) | awk '{print $2}'`
    crystal_system=`echo $(grep "_space_group_crystal_system" $cif_file) | awk '{print $2}'`
    space_group_number=`echo $(grep "_space_group_IT_number" $cif_file) | awk '{print $2}'`
    three_bar_one_m="149 151 153 157 159 162 164"
    three_bar_m_one="150 152 154 156 158 163 165"
 
# If it was not entered on the command line, or was not entered manually, then a 
# value for the Laue group number will be obtained from information taken from a 
# CIF.  The series of if...fi conditional tests below seem to work well but they 
# need to be tested with higher symmetry, especially for trigonal space groups. 
#
    if [ $crystal_system == "triclinic" ]; then
      sadabs_laue_group_number=1
    elif [ $crystal_system == "monoclinic" ]; then
      sadabs_laue_group_number=2
    elif [ $crystal_system == "orthorhombic" ]; then
      sadabs_laue_group_number=3
    elif [ $crystal_system == "tetragonal" ]; then
      if [ $space_group_number -le 88 ]; then
        sadabs_laue_group_number=4
      elif [ $space_group_number -ge 89 ]; then
        sadabs_laue_group_number=5
      fi
    elif [ $crystal_system == "trigonal" ]; then
      if [[ $alpha == $gamma ]] && [[ $space_group_number -le 148 ]]; then 
        sadabs_laue_group_number=6
      elif [[ $gamma == "120" ]] && [[ $space_group_number -le 147 ]]; then
        sadabs_laue_group_number=7
      elif [[ $alpha == $gamma ]] && [[ $space_group_number -ge 155 ]]; then
        sadabs_laue_group_number=8
      elif [[ $gamma == "120" ]] && [[ $three_bar_one_m == *"$space_group_number"* ]]; then
        sadabs_laue_group_number=9
      elif [[ $gamma == "120" ]] && [[ $three_bar_m_one == *"$space_group_number"* ]]; then
        sadabs_laue_group_number=10
      fi
    elif [ $crystal_system == "hexagonal" ]; then
      if [ $space_group_number -le 176 ]; then
        sadabs_laue_group_number=11
      elif [ $space_group_number -ge 177 ]; then
        sadabs_laue_group_number=12
      fi
    elif [ $crystal_system == "cubic" ]; then
      if [ $space_group_number -le 206 ]; then
        sadabs_laue_group_number=13
      elif [ $space_group_number -ge 207 ]; then
        sadabs_laue_group_number=14
      fi
    fi
  fi
fi

# The muR equivalent is best set from the command line.  However, it is possible 
# to calculate a good starting point for muR using something like the following:
#
# muR_equivalent=`echo "scale=3; (($max_size + $mid_size + $min_size) / 6) * $mu" | bc`
# muR_equivalent=`echo "scale=3; (($max_size + $mid_size + ($min_size * 5.5)) / 15) * $mu" | bc`
#
# NOTE: Sheldrick recommends a value for R that is biased closer to half the min 
# dimension.  The example in the SADABS paper [J. Appl. Cryst. (2015), 48, 3-10] 
# suggests that for a 0.1 mm x 0.2 mm x 0.3 mm crystal, a value for R of 0.07 mm 
# is appropriate.  In that example, the smallest dimension is up-weighted by 5.5 
# times.  This is an approximation of course, but if we take this recommendation 
# at face value, the second assignment above would be preferred (see below).
#
if ! echo "$muR_equivalent" | grep -qE ^\-?[0-9]?\.?[0-9]+$; then
  echo
  echo " muR was either not given or it was invalid. Either the default will be used "
  echo " or if a suitable CIF file is present an attempt will be made to calculate a "
  echo " suitable value. " 
  echo 
  muR_equivalent=`echo "scale=3; (($max_size + $mid_size + ($min_size * 5.5)) / 15) * $mu" | bc`
fi

# Include an option to change the type of weighting scheme used by SADABS. There 
# are 11 choices, which are explained in the usual SADABS interactive blurbs but 
# weighting schemes 7--11 require manual input, and so they are not suitable for 
# automation. If no entry is made on the command line, if 7--11 is specified, or 
# if the entry is garbage then SADABS-AUTO will default to weighting scheme (5). 
# In my tests, which are fairly extensive, the default (5) is the most general.
#
if ! ( [[ $6 =~ ^[0-9]+$ ]] && [[ $6 -ge 0 ]] && [[ $6 -le 6 ]] ); then
  echo " Weighting scheme either not given or invalid. Will use default value of 5. "
  weighting_scheme=""
fi 

# Run SADABS using SADABS-AUTO-RUN (i.e. the companion 'expect' script) with the 
# Laue group number, the even/odd spherical harmonic orders and the mean muR all 
# specified on the command line.
# NOTE: For kappaCCD data, $raw_file is a *.sad file.
# NOTE: For X8 Proteum data, $raw_file is a *_0m.raw file.
#
sadabs-auto-run $raw_file $sadabs_laue_group_number $highest_even_order $highest_odd_order $muR_equivalent $weighting_scheme $refinement_cycles
#sadabs-auto-run-2012 $raw_file $sadabs_laue_group_number $highest_even_order $highest_odd_order $muR_equivalent
#sadabs-auto-run-2014 $raw_file $sadabs_laue_group_number $highest_even_order $highest_odd_order $muR_equivalent

# Clean up the filenames:
#
if [ -f "sadabs-auto-temp" ]; then
  if [ $machine_flag == "k" ]; then
    mv $filename".sad.abs" $filename.abs
    mv $filename".sad.hkl" $filename.hkl
  elif [ $machine_flag == "x" ]; then
    mv $filename"_0m.raw.abs" $filename.abs
    mv $filename"_0m.raw.hkl" $filename.hkl
  fi
  echo
fi

echo " SADABS-AUTO-RUN, and hence SADABS, was fed the following parameters: "
echo
echo "   Highest even spherical harmonic   " $highest_even_order
echo "   Highest odd spherical harmonic    " $highest_odd_order
echo "   Mean equivalent-sphere muR value  " $muR_equivalent
echo "   SADABS-specific Laue group number " $sadabs_laue_group_number
if [ $weighting_scheme ]; then 
  echo "   SADABS weighting scheme used type " $weighting_scheme
else 
  echo "   SADABS weighting scheme used type  5 (default) "
fi
if [ -f "sadabs-auto-temp" ]; then
  echo
  echo " SADABS-AUTO has renamed the output files to" $filename.hkl "and" $filename.abs
  rm sadabs-auto-temp
else
  echo
  echo " SADABS-AUTO failed because too high of a Laue group number was specified. "
  rm $raw_file.abs
fi

if [ -f "$cif_file" ]; then
# Grep the Tmin and Tmax values from the appropriate *.abs file.  The values are 
# the 6th and 7th fields, and truncate them to three decimal places. 
#
  tminmax=`grep "Estimated minimum and maximum transmission:" $filename.abs` 
  tmin=`echo $tminmax | awk '{printf($6)}'`
  tmax=`echo $tminmax | awk '{printf($7)}'`
  mu=`echo $(grep "_exptl_absorpt_coefficient_mu" $cif_file) | awk '{print $2}'`
  longest=`echo "scale=3; l($tmin)/-$mu" | bc -l`
  shortest=`echo "scale=3; l($tmax)/-$mu" | bc -l`  
  echo 
  echo " Based on the max. transmission coefficient, the shortest crystal dimension "
  echo " is predicted to be about "$shortest" mm. If this isn't close then something isn't "
  echo " right, so you should try a different muR value. If the estimate is too big"
  echo " then use a smaller muR.  Spherical harmonic orders could also be tweaked." 
fi

# Add a SADABS/SADABS-AUTO footer to the SADABS screen output.
#
echo
echo " --------------------------------------------------------------------------"
echo " SADABS-2014/4 - Bruker AXS area detector scaling and absorption correction"
echo " SADABS-AUTO: SADABS automation by bash/expect scripts: S.Parkin 2015/02/19"
echo " =========================================================================="
echo

exit
