summaryrefslogtreecommitdiffstats
path: root/qa/workunits/hadoop/terasort.sh
blob: 3d6988a21d7fb3de7167229228c02378c7d6cca7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/env bash

set -e
set -x

INPUT=/terasort-input
OUTPUT=/terasort-output
REPORT=/tersort-report

num_records=100000
[ ! -z $NUM_RECORDS ] && num_records=$NUM_RECORDS

# bail if $TESTDIR is not set as this test will fail in that scenario
[ -z $TESTDIR ] && { echo "\$TESTDIR needs to be set, but is not. Exiting."; exit 1; }

# if HADOOP_PREFIX is not set, use default
[ -z $HADOOP_PREFIX ] && { HADOOP_PREFIX=$TESTDIR/hadoop; }

# Nuke hadoop directories
$HADOOP_PREFIX/bin/hadoop fs -rm -r $INPUT $OUTPUT $REPORT || true

# Generate terasort data
#
#-Ddfs.blocksize=512M \
#-Dio.file.buffer.size=131072 \
#-Dmapreduce.map.java.opts=-Xmx1536m \
#-Dmapreduce.map.memory.mb=2048 \
#-Dmapreduce.task.io.sort.mb=256 \
#-Dyarn.app.mapreduce.am.resource.mb=1024 \
#-Dmapred.map.tasks=64 \
$HADOOP_PREFIX/bin/hadoop jar \
  $HADOOP_PREFIX/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar \
  teragen \
  -Dmapred.map.tasks=9 \
  $num_records \
  $INPUT

# Run the sort job
#
#-Ddfs.blocksize=512M \
#-Dio.file.buffer.size=131072 \
#-Dmapreduce.map.java.opts=-Xmx1536m \
#-Dmapreduce.map.memory.mb=2048 \
#-Dmapreduce.map.output.compress=true \
#-Dmapreduce.map.output.compress.codec=org.apache.hadoop.io.compress.Lz4Codec \
#-Dmapreduce.reduce.java.opts=-Xmx1536m \
#-Dmapreduce.reduce.memory.mb=2048 \
#-Dmapreduce.task.io.sort.factor=100 \
#-Dmapreduce.task.io.sort.mb=768 \
#-Dyarn.app.mapreduce.am.resource.mb=1024 \
#-Dmapred.reduce.tasks=100 \
#-Dmapreduce.terasort.output.replication=1 \
$HADOOP_PREFIX/bin/hadoop jar \
  $HADOOP_PREFIX/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar \
  terasort \
  -Dmapred.reduce.tasks=10 \
  $INPUT $OUTPUT

# Validate the sorted data
#
#-Ddfs.blocksize=512M \
#-Dio.file.buffer.size=131072 \
#-Dmapreduce.map.java.opts=-Xmx1536m \
#-Dmapreduce.map.memory.mb=2048 \
#-Dmapreduce.reduce.java.opts=-Xmx1536m \
#-Dmapreduce.reduce.memory.mb=2048 \
#-Dmapreduce.task.io.sort.mb=256 \
#-Dyarn.app.mapreduce.am.resource.mb=1024 \
#-Dmapred.reduce.tasks=1 \
$HADOOP_PREFIX/bin/hadoop jar \
  $HADOOP_PREFIX/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar \
  teravalidate \
  -Dmapred.reduce.tasks=1 \
  $OUTPUT $REPORT

exit 0