You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

41 lines
1.6 KiB
Docker

FROM anapsix/alpine-java:8
ENV SPARK_VERSION=2.4.3 \
HADOOP_VERSION=2.7 \
TISPARK_PYTHON_VERSION=2.0 \
SPARK_HOME=/opt/spark \
SPARK_NO_DAEMONIZE=true \
SPARK_MASTER_PORT=7077 \
SPARK_MASTER_HOST=0.0.0.0 \
SPARK_MASTER_WEBUI_PORT=8080
ADD tispark-tests /opt/tispark-tests
# base image only contains busybox version nohup and ps
# spark scripts needs nohup in coreutils and ps in procps
# and we can use mysql-client to test tidb connection
RUN apk --no-cache add \
coreutils \
mysql-client \
procps \
python \
py-pip \
R
RUN wget -q https://download.pingcap.org/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
&& tar zxf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz -C /opt/ \
&& ln -s /opt/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} ${SPARK_HOME} \
&& wget -q http://download.pingcap.org/tispark-assembly-latest-linux-amd64.tar.gz \
&& tar zxf ./tispark-assembly-latest-linux-amd64.tar.gz -C /opt/ \
&& cp /opt/assembly/target/tispark-assembly-*.jar ${SPARK_HOME}/jars \
&& wget -q http://download.pingcap.org/tispark-sample-data.tar.gz \
&& tar zxf tispark-sample-data.tar.gz -C ${SPARK_HOME}/data/ \
&& rm -rf /opt/assembly/ spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz tispark-latest-linux-amd64.tar.gz tispark-sample-data.tar.gz
ADD spark-${SPARK_VERSION}/session.py ${SPARK_HOME}/python/pyspark/sql/
ADD conf/log4j.properties /opt/spark/conf/log4j.properties
ENV PYTHONPATH=${SPARK_HOME}/python/lib/py4j-0.10.4-src.zip:${SPARK_HOME}/python:$PYTHONPATH
WORKDIR ${SPARK_HOME}