diff options
Diffstat (limited to 'qa/workunits/hadoop')
-rwxr-xr-x | qa/workunits/hadoop/repl.sh | 42 | ||||
-rwxr-xr-x | qa/workunits/hadoop/terasort.sh | 76 | ||||
-rwxr-xr-x | qa/workunits/hadoop/wordcount.sh | 35 |
3 files changed, 153 insertions, 0 deletions
diff --git a/qa/workunits/hadoop/repl.sh b/qa/workunits/hadoop/repl.sh new file mode 100755 index 000000000..84f6150ab --- /dev/null +++ b/qa/workunits/hadoop/repl.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +set -e +set -x + +# bail if $TESTDIR is not set as this test will fail in that scenario +[ -z $TESTDIR ] && { echo "\$TESTDIR needs to be set, but is not. Exiting."; exit 1; } + +# if HADOOP_PREFIX is not set, use default +[ -z $HADOOP_PREFIX ] && { HADOOP_PREFIX=$TESTDIR/hadoop; } + +# create pools with different replication factors +for repl in 2 3 7 8 9; do + name=hadoop.$repl + ceph osd pool create $name 8 8 + ceph osd pool set $name size $repl + + id=`ceph osd dump | sed -n "s/^pool \([0-9]*\) '$name'.*/\1/p"` + ceph fs add_data_pool cephfs $id +done + +# create a file in each of the pools +for repl in 2 3 7 8 9; do + name=hadoop.$repl + $HADOOP_PREFIX/bin/hadoop fs -rm -f /$name.dat + dd if=/dev/zero bs=1048576 count=1 | \ + $HADOOP_PREFIX/bin/hadoop fs -Dceph.data.pools="$name" \ + -put - /$name.dat +done + +# check that hadoop reports replication matching +# that of the pool the file was written into +for repl in 2 3 7 8 9; do + name=hadoop.$repl + repl2=$($HADOOP_PREFIX/bin/hadoop fs -ls /$name.dat | awk '{print $2}') + if [ $repl -ne $repl2 ]; then + echo "replication factors didn't match!" + exit 1 + fi +done + +exit 0 diff --git a/qa/workunits/hadoop/terasort.sh b/qa/workunits/hadoop/terasort.sh new file mode 100755 index 000000000..3d6988a21 --- /dev/null +++ b/qa/workunits/hadoop/terasort.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash + +set -e +set -x + +INPUT=/terasort-input +OUTPUT=/terasort-output +REPORT=/tersort-report + +num_records=100000 +[ ! -z $NUM_RECORDS ] && num_records=$NUM_RECORDS + +# bail if $TESTDIR is not set as this test will fail in that scenario +[ -z $TESTDIR ] && { echo "\$TESTDIR needs to be set, but is not. Exiting."; exit 1; } + +# if HADOOP_PREFIX is not set, use default +[ -z $HADOOP_PREFIX ] && { HADOOP_PREFIX=$TESTDIR/hadoop; } + +# Nuke hadoop directories +$HADOOP_PREFIX/bin/hadoop fs -rm -r $INPUT $OUTPUT $REPORT || true + +# Generate terasort data +# +#-Ddfs.blocksize=512M \ +#-Dio.file.buffer.size=131072 \ +#-Dmapreduce.map.java.opts=-Xmx1536m \ +#-Dmapreduce.map.memory.mb=2048 \ +#-Dmapreduce.task.io.sort.mb=256 \ +#-Dyarn.app.mapreduce.am.resource.mb=1024 \ +#-Dmapred.map.tasks=64 \ +$HADOOP_PREFIX/bin/hadoop jar \ + $HADOOP_PREFIX/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar \ + teragen \ + -Dmapred.map.tasks=9 \ + $num_records \ + $INPUT + +# Run the sort job +# +#-Ddfs.blocksize=512M \ +#-Dio.file.buffer.size=131072 \ +#-Dmapreduce.map.java.opts=-Xmx1536m \ +#-Dmapreduce.map.memory.mb=2048 \ +#-Dmapreduce.map.output.compress=true \ +#-Dmapreduce.map.output.compress.codec=org.apache.hadoop.io.compress.Lz4Codec \ +#-Dmapreduce.reduce.java.opts=-Xmx1536m \ +#-Dmapreduce.reduce.memory.mb=2048 \ +#-Dmapreduce.task.io.sort.factor=100 \ +#-Dmapreduce.task.io.sort.mb=768 \ +#-Dyarn.app.mapreduce.am.resource.mb=1024 \ +#-Dmapred.reduce.tasks=100 \ +#-Dmapreduce.terasort.output.replication=1 \ +$HADOOP_PREFIX/bin/hadoop jar \ + $HADOOP_PREFIX/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar \ + terasort \ + -Dmapred.reduce.tasks=10 \ + $INPUT $OUTPUT + +# Validate the sorted data +# +#-Ddfs.blocksize=512M \ +#-Dio.file.buffer.size=131072 \ +#-Dmapreduce.map.java.opts=-Xmx1536m \ +#-Dmapreduce.map.memory.mb=2048 \ +#-Dmapreduce.reduce.java.opts=-Xmx1536m \ +#-Dmapreduce.reduce.memory.mb=2048 \ +#-Dmapreduce.task.io.sort.mb=256 \ +#-Dyarn.app.mapreduce.am.resource.mb=1024 \ +#-Dmapred.reduce.tasks=1 \ +$HADOOP_PREFIX/bin/hadoop jar \ + $HADOOP_PREFIX/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar \ + teravalidate \ + -Dmapred.reduce.tasks=1 \ + $OUTPUT $REPORT + +exit 0 diff --git a/qa/workunits/hadoop/wordcount.sh b/qa/workunits/hadoop/wordcount.sh new file mode 100755 index 000000000..616b08af2 --- /dev/null +++ b/qa/workunits/hadoop/wordcount.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash + +set -e +set -x + +WC_INPUT=/wc_input +WC_OUTPUT=/wc_output +DATA_INPUT=$(mktemp -d) + +echo "starting hadoop-wordcount test" + +# bail if $TESTDIR is not set as this test will fail in that scenario +[ -z $TESTDIR ] && { echo "\$TESTDIR needs to be set, but is not. Exiting."; exit 1; } + +# if HADOOP_PREFIX is not set, use default +[ -z $HADOOP_PREFIX ] && { HADOOP_PREFIX=$TESTDIR/hadoop; } + +# Nuke hadoop directories +$HADOOP_PREFIX/bin/hadoop fs -rm -r $WC_INPUT $WC_OUTPUT || true + +# Fetch and import testing data set +curl http://download.ceph.com/qa/hadoop_input_files.tar | tar xf - -C $DATA_INPUT +$HADOOP_PREFIX/bin/hadoop fs -copyFromLocal $DATA_INPUT $WC_INPUT +rm -rf $DATA_INPUT + +# Run the job +$HADOOP_PREFIX/bin/hadoop jar \ + $HADOOP_PREFIX/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar \ + wordcount $WC_INPUT $WC_OUTPUT + +# Cleanup +$HADOOP_PREFIX/bin/hadoop fs -rm -r $WC_INPUT $WC_OUTPUT || true + +echo "completed hadoop-wordcount test" +exit 0 |