#!/usr/bin/env bash
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------


##############################################################
# This script is part of the SystemDS binary release. It is
# meant to work out of the box when unzipping the
# systemds-<version>.zip (or tbz) file.
#
# Make configuration changes here:
##############################################################

#  If not set by env,  set to 1 to run spark-submit instead of local java
#  This should be used if "-exec SPARK" or "-exec HYBRID" is required
if [ -z "$SYSDS_DISTRIBUTED" ]; then
  SYSDS_DISTRIBUTED=0
fi

# if not set by env, set to 1 to disable setup output of this script
if [ -z "$SYSDS_QUIET" ]; then
  SYSDS_QUIET=0
fi

# an echo toggle
print_out()
{
  if [ $SYSDS_QUIET == 0 ]; then
    echo "$1"
  fi
}

if [ -n "$SYSTEMDS_STANDALONE_OPTS" ]; then
	print_out "Overriding SYSTEMDS_STANDALONE_OPTS with env var: $SYSTEMDS_STANDALONE_OPTS"
else
  # specify paramteters  to java when running locally here
  SYSTEMDS_STANDALONE_OPTS="\
      -Xmx4g\
      -Xms4g\
      -Xmn400m"
fi

if [ -n "${SYSTEMDS_DISTRIBUTED_OPTS}" ]; then
	print_out "Overriding SYSTEMDS_DISTRIBUTED_OPTS with env var $SYSTEMDS_DISTRIBUTED_OPTS"
else
  # specify parameters to pass to spark-submit when running on spark here
  SYSTEMDS_DISTRIBUTED_OPTS="\
      --master yarn \
      --deploy-mode client\
      --driver-memory 96g \
      --num-executors 4 \
      --executor-memory 64g \
      --executor-cores 16"
fi


##############################################################
# No need to touch the content below. These commands launch
# SystemDS based on the settings above.
##############################################################


#-------------------------------------------------------------
# some helper functions

# error help print
printUsageExit()
{
cat << EOF
Usage: $0 [SystemDS.jar] [-f] <dml-filename> [arguments] [-help]

    SystemDS.jar - Specify a custom SystemDS.jar file (this will be prepended to the classpath
                   or fed to spark-submit
    dml-filename - The script file to run
    arguments    - The arguments specified after the DML script are passed to SystemDS.
                   Specify parameters that need to go to java/spark-submit by editing this
                   run script.
    -help        - Print this usage message and exit

Worker Usage: $0 WORKER [SystemDS.jar] <portnumber> [arguments] [-help]

    port         - The port to open for the federated worker.

Set custom launch configuration by setting/editing SYSTEMDS_STANDALONE_OPTS and/or SYSTEMDS_DISTRIBUTED_OPTS

Set the environment variable SYSDS_DISTRIBUTED=1 to run spark-submit instead of local java
Set SYSDS_QUIET=1 to omit extra information printed by this run script.
EOF
  exit 1
}

# print an error if no argument is supplied.
if [ -z "$1" ] ; then
    echo "Wrong Usage.";
    printUsageExit;
fi

while getopts "h:f:" options; do
  case $options in
    h ) echo Warning: Help requested. Will exit after usage message;
        printUsageExit
        ;;
    \? ) echo Warning: Help requested. Will exit after usage message;
        printUsageExit
        ;;
    f )
        # silently remove -f (this variant is triggered if there's no
        # jar file or WORKER as first parameter)
        shift
        ;;
    * ) echo Error: Unexpected error while processing options;
  esac
done

# Peel off first and/or second argument so that $@ contains arguments to DML script
if  echo "$1" | grep -q "jar"; then
  SYSTEMDS_JAR_FILE=$1
  shift
  # handle optional '-f' before DML file (for consistency)
  if  echo "$1" | grep -q "\-f"; then
    shift
    SCRIPT_FILE=$1
    shift
  else
    SCRIPT_FILE=$1
    shift
  fi
elif echo "$1" | grep -q "WORKER"; then
  WORKER=1
  shift
  if echo "$1" | grep -q "jar"; then
    SYSTEMDS_JAR_FILE=$1
    shift
  fi
  PORT=$1
  re='^[0-9]+$'
  if ! [[ $PORT =~ $re ]] ; then
    echo "error: Port is not a number"
    printUsageExit
  fi
  shift
else
  # handle optional '-f' before DML file (for consistency)
  if  echo "$1" | grep -q "\-f"; then
    shift
    SCRIPT_FILE=$1
    shift
  else
    SCRIPT_FILE=$1
    shift
  fi
fi

if [ -z "$WORKER" ] ; then
  WORKER=0
fi

if [ -z "$SYSTEMDS_ROOT" ] ; then
  SYSTEMDS_ROOT=.
	print_out "SYSTEMDS_ROOT not set defaulting to current dir $(pwd)"
else
  # construct a relative path
	SYSTEMDS_ROOT=$(realpath --relative-to=. ${SYSTEMDS_ROOT})
fi;

# when using find, look in the directories in this order
DIR_SEARCH_ORDER="conf lib $SYSTEMDS_ROOT/conf $SYSTEMDS_ROOT/target"

# find me a SystemDS jar file to run
if [ -z "$SYSTEMDS_JAR_FILE" ];then
  SYSTEMDS_JAR_FILE=$(find $DIR_SEARCH_ORDER -iname "systemds.jar" 2> /dev/null | tail -n 1)
  if [ -z "$SYSTEMDS_JAR_FILE" ];then
    SYSTEMDS_JAR_FILE=$(find $DIR_SEARCH_ORDER -iname "systemds-?.?.?.jar" 2> /dev/null | head -n 1)
    if [ -z "$SYSTEMDS_JAR_FILE" ];then
      SYSTEMDS_JAR_FILE=$(find $DIR_SEARCH_ORDER -iname "systemds-?.?.?-SNAPSHOT.jar" 2> /dev/null | head -n 1)
    fi
  fi
else
	print_out "Using user supplied systemds jar file $SYSTEMDS_JAR_FILE"
fi

# check if log4j config file exists, otherwise unset
# to run with a non fatal complaint by SystemDS
if [ -z "$LOG4JPROP" ] ; then
  LOG4JPROP=$(find $DIR_SEARCH_ORDER -iname "log4j*properties" 2> /dev/null | head -n 1)
  if [ -z "${LOG4JPROP}" ]; then
    LOG4JPROP=""
  else
    LOG4JPROP="-Dlog4j.configuration=file:$LOG4JPROP"
  fi
else
  # L4J was set by env var. Unset if that setting is wrong
  LOG4JPROP2=$(find "$LOG4JPROP")
  if [ -z "${LOG4JPROP2}" ]; then
    LOG4JPROP=""
  else
      LOG4JPROP="-Dlog4j.configuration=file:$LOG4JPROP2"
  fi
fi

if [ -z "$CONFIG_FILE" ] ; then
  # same as above: set config file param if the file exists
  CONFIG_FILE=$(find $DIR_SEARCH_ORDER -iname "SystemDS*config*.xml" 2> /dev/null  | head -n 1)
  if [ -z "$CONFIG_FILE" ]; then
    CONFIG_FILE=""
  else
    CONFIG_FILE="--config $CONFIG_FILE"
  fi
else
  # CONFIG_FILE was set by env var. Unset if that setting is wrong
  CONFIG_FILE2=$(find "$CONFIG_FILE")
  if [ -z "${CONFIG_FILE2}" ]; then
    CONFIG_FILE=""
  else
    CONFIG_FILE="--config $CONFIG_FILE"
  fi
fi

# find absolute path to hadoop home in SYSTEMDS_ROOT
if [ -z "$HADOOP_HOME" ]; then
  HADOOP_HOME=$(realpath "$(find "$SYSTEMDS_ROOT" -iname hadoop | tail -n 1 )")
  export HADOOP_HOME
fi
# add hadoop home to path and lib path for loading hadoop jni
HADOOP_REL=$(realpath --relative-to=. "$HADOOP_HOME")

# default directory separator unix style
DIR_SEP=/
# detect operating system to set correct path separator
if [ "$OSTYPE" == "win32" ] ||  [ "$OSTYPE" == "msys" ] ||  [ "$OSTYPE" == "cygwin" ]; then
  PATH_SEP=\;
  DIR_SEP=\\
  HADOOP_REL="${HADOOP_REL////\\}"
else
  PATH_SEP=:
fi

# make the jar path relative to skip issues with Windows paths
JARNAME=$(basename "$SYSTEMDS_JAR_FILE")

# relative path to jar file
SYSTEMDS_JAR_FILE=$(realpath --relative-to=. "$(dirname "$SYSTEMDS_JAR_FILE")")${DIR_SEP}${JARNAME}

NATIVE_LIBS="$SYSTEMDS_ROOT${DIR_SEP}target${DIR_SEP}classes${DIR_SEP}lib"
export PATH=${HADOOP_REL}${DIR_SEP}bin${PATH_SEP}${PATH}${PATH_SEP}$NATIVE_LIBS
export LD_LIBRARY_PATH=${HADOOP_REL}${DIR_SEP}bin${PATH_SEP}${LD_LIBRARY_PATH}

# set java class path
CLASSPATH="${SYSTEMDS_JAR_FILE}${PATH_SEP} \
          ${SYSTEMDS_ROOT}${DIR_SEP}lib${DIR_SEP}*${PATH_SEP} \
          ${SYSTEMDS_ROOT}${DIR_SEP}target${DIR_SEP}lib${DIR_SEP}*"
# trim whitespace (introduced by the line breaks above)
CLASSPATH=$(echo "${CLASSPATH}" | tr -d '[:space:]')

print_out "###############################################################################"
print_out "#  SYSTEMDS_ROOT= $SYSTEMDS_ROOT"
print_out "#  SYSTEMDS_JAR_FILE= $SYSTEMDS_JAR_FILE"
print_out "#  CONFIG_FILE= $CONFIG_FILE"
print_out "#  LOG4JPROP= $LOG4JPROP"
print_out "#  CLASSPATH= $CLASSPATH"
print_out "#  HADOOP_HOME= $HADOOP_HOME"

#build the command to run
if [ $WORKER == 1 ]; then
  print_out "#"
  print_out "#  starting Fedederated worker on port $PORT"
  print_out "###############################################################################"
  CMD=" \
  java $SYSTEMDS_STANDALONE_OPTS \
  -cp $CLASSPATH \
  $LOG4JPROP \
  org.apache.sysds.api.DMLScript \
  -w $PORT \
  $*"
  print_out "Executing command: $CMD"
  print_out  ""

elif [ $SYSDS_DISTRIBUTED == 0 ]; then
  print_out "#"
  print_out "#  Running script $SCRIPT_FILE locally with opts: $*"
  print_out "###############################################################################"
  CMD=" \
  java $SYSTEMDS_STANDALONE_OPTS \
  -cp $CLASSPATH \
  $LOG4JPROP \
  org.apache.sysds.api.DMLScript \
  -f $SCRIPT_FILE \
  -exec singlenode \
  $CONFIG_FILE \
  $*"
  print_out "Executing command:  $CMD"
  print_out ""
else
  print_out "#"
  print_out "#  Running script $SCRIPT_FILE distributed with opts: $*"
  print_out "###############################################################################"
  export SPARK_MAJOR_VERSION=2
  CMD=" \
  spark-submit $SYSTEMDS_DISTRIBUTED_OPTS \
  $SYSTEMDS_JAR_FILE \
  -f $SCRIPT_FILE \
  $*"
  print_out "Executing command: $CMD"
  print_out  ""
fi

# run
$CMD
