# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
# 
#   http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

# -*- mode: ruby -*-
# vi: set ft=ruby :

#
# Builds a single node Rya on Accumulo on Hadoop and Zookeeper.
# Deploys Rya, and the Eclipse rdf4j-workbench on Tomcat accessible from the host's browser.
# See the accompanying readme for URL's, verification, and troubleshooting.
#
# Note: Machine's ip is 192.168.33.10
# username : vagrant
# password : vagrant
#
# accumulo instance : dev
# accumulo username : root
# accumulo password : root

Vagrant.configure(2) do |config|

  config.vm.box = "bento/ubuntu-18.04"
  config.vm.provider "virtualbox" do |vb|
    vb.name = "rya-example-box"
    vb.memory = "4096"
    vb.cpus = 4
  end

  config.vm.network :private_network, ip: "192.168.33.10"
  config.vm.hostname = "rya-example-box"

  config.vm.provision "shell", inline: <<-SHELL
    set -x  ## turn on command echo with expanded variables
    # List of dependency versions
    export ACCUMULO_VERSION=1.6.6
    export HADOOP_VERSION=2.7.2
    ### IMPORTANT:  Verify this exists in the Maven repos.  As you know, I am writing this before it exists.
    export RYA_EXAMPLE_VERSION=4.0.1
    export RDF4J_VERSION=2.5.5
    export ZOOKEEPER_VERSION=3.4.5-cdh5.16.1
    mavenRepoUrl=https://repo1.maven.org/maven2/
    echo "Updating host file with permanent ip"
    sudo sed -i 's/127.0.1.1/192.168.33.10/' /etc/hosts
    cat >> /etc/hosts <<EOF
127.0.0.1     localhost
192.168.33.10 zoo1 zoo2 zoo3 leader1
EOF
    sudo -E apt-get -qq update
    echo "Installing Java installer..."
    # if you want dev tools like javac, change this to openjdk-8-jdk-headless 
    sudo -E apt-get install -y openjdk-8-jre || exit $?
    
    echo "Installing Tomcat..."
    sudo useradd -r -m -U -d /opt/tomcat -s /bin/false tomcat
    sudo -E apt-get install -y tomcat9  || exit $?
    sudo usermod -a -G tomcat vagrant
    echo "Installing Unzip..."
    apt-get install unzip || exit $?
    echo "Setting up environment..."
    export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/jre
    export HADOOP_HOME=/home/vagrant/hadoop-${HADOOP_VERSION}
    export ZOOKEEPER_HOME=/home/vagrant/zookeeper-${ZOOKEEPER_VERSION}
    export ZOO_LOG_DIR=${ZOOKEEPER_HOME}/logs/
    export ACCUMULO_HOME=/home/vagrant/accumulo-${ACCUMULO_VERSION}
    export PATHADD=$JAVA_HOME/bin:$ZOOKEEPER_HOME/bin:$ACCUMULO_HOME/bin:$HADOOP_HOME/bin
    export PATH=$PATH:$PATHADD
    export HADOOP_PREFIX="$HADOOP_HOME"
    export HADOOP_CONF_DIR="$HADOOP_PREFIX/etc/hadoop"
    export ACCUMULO_LOG_DIR=$ACCUMULO_HOME/logs
    export ACCUMULO_TSERVER_OPTS="-Xmx384m -Xms384m "
    export ACCUMULO_MASTER_OPTS="-Xmx128m -Xms128m"
    export ACCUMULO_MONITOR_OPTS="-Xmx64m -Xms64m"
    export ACCUMULO_GC_OPTS="-Xmx64m -Xms64m"
    export ACCUMULO_GENERAL_OPTS="-XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=75 -Djava.net.preferIPv4Stack=true"
    export ACCUMULO_OTHER_OPTS="-Xmx128m -Xms64m"
    export ACCUMULO_KILL_CMD='kill -9 %p'
    # Shell environment includes Accumulo resources.
    ACCUMULO_RC=/home/vagrant/.accumulo_rc.sh
    cat > ${ACCUMULO_RC}  <<EOF
        export JAVA_HOME=\'$JAVA_HOME\'
        export HADOOP_HOME=\'$HADOOP_HOME\'
        export ZOOKEEPER_HOME=\'$ZOOKEEPER_HOME\'
        export ZOO_LOG_DIR=\'$ZOO_LOG_DIR\'
        export ACCUMULO_HOME=\'$ACCUMULO_HOME\'
        export PATH=\\$PATH:$PATHADD
        export HADOOP_PREFIX=\'$HADOOP_PREFIX\'
        export HADOOP_CONF_DIR=\'$HADOOP_CONF_DIR\'
        export ACCUMULO_LOG_DIR=\'$ACCUMULO_LOG_DIR\'
        export ACCUMULO_TSERVER_OPTS=\'$ACCUMULO_TSERVER_OPTS\'
        export ACCUMULO_MASTER_OPTS=\'$ACCUMULO_MASTER_OPTS\'
        export ACCUMULO_MONITOR_OPTS=\'$ACCUMULO_MONITOR_OPTS\'
        export ACCUMULO_GC_OPTS=\'$ACCUMULO_GC_OPTS\'
        export ACCUMULO_GENERAL_OPTS=\'$ACCUMULO_GENERAL_OPTS\'
        export ACCUMULO_OTHER_OPTS=\'$ACCUMULO_OTHER_OPTS\'
        export ACCUMULO_KILL_CMD=\'$ACCUMULO_KILL_CMD\'
        
        ### command to list the 7 correct java processes: tomcat-catalina, zookeeper, and 5 Accumulo: tracer, master, monitor, tserver, gc.
        function ryaps() { ps -ef | grep java | tr ' ' '\\n' | egrep '^org\\.apache|^tracer|^master|^monitor|^tserver|^gc' | sed '/\\.Main/ N ; s/\\n/ /' ; }
EOF
    source ${ACCUMULO_RC} || exit 151
    # include it at the beginning of both shell configuration files.
    for BASHRC in /home/vagrant/.bashrc /home/vagrant/.bash_profile ;
    do touch ${BASHRC} ;
        cat - ${BASHRC} > ${BASHRC}.new <<EOF  && mv ${BASHRC}.new ${BASHRC} || exit 152
source ${ACCUMULO_RC}
EOF
    done
    echo "Acquiring and Extracting ..."
    
    function echoerr() { printf "%s\n" "$*" >&2; }
    function download {
      ### curl --fail treat http status >= 400 as an error. --location follow redirects status>=300
      curl --silent --show-error --fail --location "$@"
      if [ $? -ne 0 ]; then
        echoerr "--------------------------"
        echoerr "-"
        echoerr "- download failed" "$@"
        echoerr "-"
        echoerr "-"   exiting ...
        echoerr "-"
        echoerr "--------------------------"
        exit 800
      fi
    }
    ### wait for a directory to exist or 60 seconds timeout
    function waitForDeploy {
        waitfordir="$1"
        timeout=60
        while [[ ! -d  "$waitfordir" ]]  
        do
            sleep 5
            let timeout-=5
            if [[ $timeout -le "0" ]]; then 
                echo "Timeout waiting for war to deploy, $waitfordir still does not exist."; 
                exit 401 
            fi
        done
    }
    echo "- Hadoop"
    hadoopUrl=https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz
    if [[ ! -d ${HADOOP_HOME} ]] ; then 
        echo "Downloading $hadoopUrl"
        download $hadoopUrl \
        | tar -zxC /home/vagrant || exit 101
    fi
    echo "- Zookeeper"
    zookeeperUrl=http://archive.cloudera.com/cdh5/cdh/5/zookeeper-${ZOOKEEPER_VERSION}.tar.gz
    if [[ ! -d ${ZOOKEEPER_HOME} ]] ; then
        echo "Downloading $zookeeperUrl"
        download $zookeeperUrl \
        | tar -zxC /home/vagrant || exit 102
    fi
    echo "- Accumulo"
    accumuloUrl=https://archive.apache.org/dist/accumulo/${ACCUMULO_VERSION}/accumulo-${ACCUMULO_VERSION}-bin.tar.gz
    if [[ ! -d ${ACCUMULO_HOME} ]] ; then
        echo "Downloading $accumuloUrl"
        download $accumuloUrl \
        | tar -zxC /home/vagrant || exit 103
    fi
    echo "Configuring Zookeeper..."
    sudo mkdir --parents /var/zookeeper
    sudo chown vagrant:vagrant /var/zookeeper
    sudo cp ${ZOOKEEPER_HOME}/conf/zoo_sample.cfg ${ZOOKEEPER_HOME}/conf/zoo.cfg
    # Change the dataDir to /var/zookeeper -- strange that backslashes need to be escaped only on this line.
    sudo sed -i 's/^\\s*dataDir\\s*=.*$/dataDir=\\/var\\/zookeeper/' ${ZOOKEEPER_HOME}/conf/zoo.cfg  || exit 104
    # Conflicts with Accumulo and maybe Zookeeper
    sudo rm --force ${HADOOP_HOME}/share/hadoop/common/lib/slf4j-api-1.7.10.jar
    sudo rm --force ${HADOOP_HOME}/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar
    # Assure logs are creatable and writeable
    sudo mkdir --parents ${ZOO_LOG_DIR}
    sudo touch "${ZOO_LOG_DIR}/zookeeper.out"
    sudo chmod -R a+wX  ${ZOO_LOG_DIR}
    echo "Running Zookeeper..."
    sudo -E ${ZOOKEEPER_HOME}/bin/zkServer.sh start
    echo "Configuring Accumulo..."
    cp ${ACCUMULO_HOME}/conf/examples/1GB/standalone/* ${ACCUMULO_HOME}/conf/
    rm --force ${ACCUMULO_HOME}/conf/accumulo-site.xml
    cat >> ${ACCUMULO_HOME}/conf/accumulo-site.xml <<EOF
        <configuration>
            <property><name>instance.dfs.uri</name><value>file:///</value></property>
            <property><name>instance.dfs.dir</name><value>/data/accumulo</value></property>
            <property><name>instance.zookeeper.host</name><value>localhost:2181</value></property>
            <property><name>instance.secret</name><value>DONTTELL</value></property>
            <property><name>tserver.port.search</name><value>true</value></property>
            <property><name>logger.dir.walog</name><value>/data/accumulo/walogs</value></property>
            <property><name>tserver.cache.data.size</name><value>15M</value></property>
            <property><name>tserver.cache.index.size</name><value>15M</value></property>
            <property><name>tserver.memory.maps.max</name><value>256M</value></property>
            <property><name>tserver.walog.max.size</name><value>256M</value></property>
            <property><name>tserver.memory.maps.native.enabled</name><value>false</value></property>
            <property><name>trace.token.property.password</name><value>root</value></property>
            <property><name>gc.cycle.delay</name><value>4s</value></property>
            <property><name>gc.cycle.start</name><value>0s</value></property>
            <property><name>tserver.compaction.major.delay</name><value>3</value></property>
            <property><name>general.classpaths</name><value>
            /data/accumulo/lib/[^.].*.jar,
            ${HADOOP_HOME}/share/hadoop/common/.*.jar,
            ${HADOOP_HOME}/share/hadoop/common/lib/.*.jar,
            ${HADOOP_HOME}/share/hadoop/hdfs/.*.jar,
            ${HADOOP_HOME}/share/hadoop/mapreduce/.*.jar,
            ${HADOOP_HOME}/share/hadoop/yarn/.*.jar,
            ${ACCUMULO_HOME}/server/target/classes/,
            ${ACCUMULO_HOME}/lib/accumulo-server.jar,
            ${ACCUMULO_HOME}/core/target/classes/,
            ${ACCUMULO_HOME}/lib/accumulo-core.jar,
            ${ACCUMULO_HOME}/start/target/classes/,
            ${ACCUMULO_HOME}/lib/accumulo-start.jar,
            ${ACCUMULO_HOME}/fate/target/classes/,
            ${ACCUMULO_HOME}/lib/accumulo-fate.jar,
            ${ACCUMULO_HOME}/proxy/target/classes/,
            ${ACCUMULO_HOME}/lib/accumulo-proxy.jar,
            ${ACCUMULO_HOME}/lib/[^.].*.jar,
            /home/vagrant/${ZOOKEEPER_HOME}/zookeeper[^.].*.jar,
            $HADOOP_CONF_DIR,
            ${HADOOP_HOME}/[^.].*.jar,
            ${HADOOP_HOME}/lib/[^.].*.jar,
            </value></property>
            <property><name>general.dynamic.classpaths</name><value>/data/accumulo/lib/ext/[^.].*.jar</value></property>
            <property><name>trace.port.client</name><value>0</value></property>
            <property><name>monitor.port.client</name><value>0</value></property>
            <property><name>master.port.client</name><value>0</value></property>
            <property><name>tserver.port.client</name><value>0</value></property>
            <property><name>gc.port.client</name><value>0</value></property>
        </configuration>
EOF
    cat > ${ACCUMULO_HOME}/conf/masters <<EOF
rya-example-box
EOF
    cat > ${ACCUMULO_HOME}/conf/slaves <<EOF
rya-example-box
EOF
    sudo mkdir --parents /data
    sudo chown vagrant:vagrant /data
    mkdir --parents /data/accumulo/lib/ext
    
    sudo chmod -R a+rwX ${ACCUMULO_HOME}/logs/
    echo "Starting Accumulo..."
    echo "Init will fail during a re-provision, but you can ignore it:  'FATAL: It appears the directories [...] were previously initialized.'"
    echo "Also, you may see an indefinitely repeating: 'Waiting for accumulo to be initialized' which means Accumulo won't start."
    echo "Either issue can be resolved by removing the directory: 'sudo rm -r /data/accumulo' then re-provision. Warning: this will erase all Rya/Accumulo data."
    ${ACCUMULO_HOME}/bin/accumulo init --instance-name dev --password root
    ${ACCUMULO_HOME}/bin/start-all.sh  || exit 107
    sudo chmod -R a+rwX ${ACCUMULO_HOME}/logs/
    echo 'Done!'
    # --------------------
    echo "Installing RDF4J Server"
    # this is the rdf4j data dir.  It might need to be registered with tomcat to write there.
    export rdf4jServerBase=/opt/tomcat/
    sudo install -d -o tomcat -m u=rwx,go=rx ${rdf4jServerBase}
    sudo -u tomcat mkdir --parents ${rdf4jServerBase}/server/logs
    sudo -u tomcat mkdir --parents ${rdf4jServerBase}/.RDF4J/server/logs
    # tomcat v9 requires explicit permissions to the file system.
    sudo mkdir --parents          /etc/systemd/system/tomcat9.service.d
    cat <<EOF | sudo tee --append /etc/systemd/system/tomcat9.service.d/logging-allow.conf
[Service]
ReadWritePaths=${rdf4jServerBase}
EOF
    echo 'Restarting Tomcat (or starting)'
    sudo -E systemctl daemon-reload     || exit 1080
    sudo -E systemctl enable tomcat9    || exit 1081
    sudo -E systemctl restart tomcat9   || exit 1082
    rdf4jwar=/var/lib/tomcat9/webapps/rdf4j-server.war
    if [[ ! -s $rdf4jwar ]] ; then
        echo "Downloading RDF4J Server"
        download --output $rdf4jwar ${mavenRepoUrl}org/eclipse/rdf4j/rdf4j-http-server/${RDF4J_VERSION}/rdf4j-http-server-${RDF4J_VERSION}.war || exit 110
    fi
    echo "RDF4J http server deployed at http://rya-example-box:8080/rdf4j-server"
    # --------------------
    echo "Installing RDF4J Workbench"
    workbench=/var/lib/tomcat9/webapps/rdf4j-workbench.war
    if [[ ! -s $workbench ]] ; then
        echo "Downloading RDF4J Workbench"
        download --output $workbench ${mavenRepoUrl}org/eclipse/rdf4j/rdf4j-http-workbench/${RDF4J_VERSION}/rdf4j-http-workbench-${RDF4J_VERSION}.war || exit 111
    fi
    echo "RDF4J workbench deployed at http://rya-example-box:8080/rdf4j-workbench"
    # ----------------------
    echo "Installing Rya"
    ryaIndexing=rya.indexing.example-${RYA_EXAMPLE_VERSION}-distribution
    # Place in the vagrant folder from your dev project, example: extras/indexingExample/target/rya.indexing.example-4.0.1-distribution.zip
    if [[ -s "/vagrant/${ryaIndexing}.zip" ]] ; then
        echo "Using found local file:  /vagrant/${ryaIndexing}.zip "
        cp "/vagrant/${ryaIndexing}.zip"  ./
    fi
    if [[ ! -s ${ryaIndexing}.zip ]] ; then
        echo "Downloading ${ryaIndexing}.zip quietly, this will take some minutes with no output..."
        download --output ${ryaIndexing}.zip ${mavenRepoUrl}org/apache/rya/rya.indexing.example/${RYA_EXAMPLE_VERSION}/${ryaIndexing}.zip || exit 112
    fi
    sudo mkdir --parents ${ryaIndexing}
    sudo unzip -q -o ${ryaIndexing}.zip -d ${ryaIndexing}
    # before continuing, wait for tomcat to deploy wars:
    waitForDeploy /var/lib/tomcat9/webapps/rdf4j-workbench/WEB-INF/lib/
    waitForDeploy /var/lib/tomcat9/webapps/rdf4j-server/WEB-INF/lib/
    # soft linking the files doesn't seem to work in tomcat, so we copy them instead :(
    sudo -u tomcat cp ${ryaIndexing}/dist/lib/* /var/lib/tomcat9/webapps/rdf4j-workbench/WEB-INF/lib/ || exit 113
    sudo -u tomcat cp ${ryaIndexing}/dist/lib/* /var/lib/tomcat9/webapps/rdf4j-server/WEB-INF/lib/    || exit 114
    # These are older libs that break tomcat and rdf4j that come with Rya above.
    sudo rm --force /var/lib/tomcat9/webapps/rdf4j-workbench/WEB-INF/lib/servlet-api-2.5.jar
    sudo rm --force /var/lib/tomcat9/webapps/rdf4j-workbench/WEB-INF/lib/jsp-api-2.1.jar
    s=/var/lib/tomcat9/webapps/rdf4j-server/WEB-INF/lib/
    sudo rm --force $s/servlet-api-2.5.jar   $s/lib/jsp-api-2.1.jar
    sudo find "$s" -name 'spring-*-3.*.jar' -exec sudo rm --force {} +
    sudo rm $s/spring-aop-4.2.1.RELEASE.jar $s/rdf4j-http-server-spring-2.3.1.jar
    sudo rm --force  $s/slf4j-log4j12-1.7.25.jar   $s/slf4j-api-1.7.25.jar  $s/jcabi-log-0.14.jar   $s/jcl-over-slf4j-1.7.25.jar   $s/minlog-1.3.0.jar   $s/commons-logging-1.1.1.jar   $s/log4j-1.2.16.jar
    sudo chown -R tomcat:tomcat /var/lib/tomcat9/webapps/rdf4j-workbench/WEB-INF/lib/
    sudo chown -R tomcat:tomcat /var/lib/tomcat9/webapps/rdf4j-server/WEB-INF/lib/
    # ----------------------
    echo "Downloading and installing new templates for RDF4J WorkBench"
    ryaVagrant=rya.vagrant.example-${RYA_EXAMPLE_VERSION}
    # Place in the vagrant folder from your dev project: extras/vagrantExample/target/rya.vagrant.example-*.jar 
    if [[ -s "/vagrant/${ryaVagrant}.jar" ]] ; then
        echo "Using found local file:  /vagrant/${ryaVagrant}.jar "
        cp "/vagrant/${ryaVagrant}.jar"  ./
    fi
    if [[ ! -s ${ryaVagrant}.jar ]] ; then
        echo "Downloading ${ryaVagrant}.jar"
        download --output ${ryaVagrant}.jar ${mavenRepoUrl}org/apache/rya/rya.vagrant.example/${RYA_EXAMPLE_VERSION}/${ryaVagrant}.jar || exit 120
    fi
    sudo mkdir --parents ${ryaVagrant}
    sudo unzip -q -o ${ryaVagrant}.jar -d ${ryaVagrant}
    waitForDeploy /var/lib/tomcat9/webapps/rdf4j-workbench/transformations
    sudo -u tomcat cp ${ryaVagrant}/*.xsl /var/lib/tomcat9/webapps/rdf4j-workbench/transformations/
    sudo chown tomcat:tomcat /var/lib/tomcat9/webapps/rdf4j-workbench/transformations/*
    # ----------------------
    echo "Deploying Rya Web"
    ryaWar=web.rya-${RYA_EXAMPLE_VERSION}.war
    # Place in the vagrant folder from your dev project: web/web.rya/target/web.rya.war  rename to add version.
    if [[ -s "/vagrant/${ryaWar}" ]] ; then
        echo "Using found local file: /vagrant/${ryaWar} "
        cp "/vagrant/${ryaWar}"  ./
    fi
    if [[ ! -s ${ryaWar} ]] ; then
        echo "Downloading ${ryaWar}"
        ###download https://www.dropbox.com/s/jrmgfey8ch1vrd6/${ryaWar}?raw=1 --output ${ryaWar} || exit 121
        download ${mavenRepoUrl}org/apache/rya/web.rya/${RYA_EXAMPLE_VERSION}/${ryaWar} --output ${ryaWar} || exit 121
    fi
    # remove this so we can wait for it to be recreated.
    sudo rm -rf /var/lib/tomcat9/webapps/web.rya/WEB-INF/classes
    sudo -u tomcat cp    ${ryaWar} /var/lib/tomcat9/webapps/web.rya.war
    # Wait for the war to deploy
    waitForDeploy /var/lib/tomcat9/webapps/web.rya/WEB-INF/classes/
    # These are older libs that break tomcat
    sudo rm --force /var/lib/tomcat9/webapps/web.rya/WEB-INF/lib/servlet-api-2.5*.jar
    sudo rm --force /var/lib/tomcat9/webapps/web.rya/WEB-INF/lib/jsp-api-2.1.jar
    echo "Modify Rya Web Config"
    sudo chmod -R a+rwX /var/lib/tomcat9/webapps/web.rya/
    cat > /var/lib/tomcat9/webapps/web.rya/WEB-INF/classes/environment.properties <<EOF
instance.name=dev
instance.zk=localhost:2181
instance.username=root
instance.password=root
rya.tableprefix=rya_
rya.displayqueryplan=true
EOF
    echo "Rya web deployed at http://rya-example-box:8080/web.rya/sparqlQuery.jsp"
    # restart tomcat
    systemctl restart tomcat9   || exit $?
    echo "Finished and ready to use!"
    echo "You can re-apply these settings without losing data by running the command 'vagrant provision'"
  SHELL
end
