Monday, 20 October 2014

MapReduce Code Testing

MR code testing using MRunit

MR framework provides the MRunit testing for test your MR code. You can test your code in
local environment and then can run it on cluster.

import java.util.Iterator;
import java.util.List;
import junit.framework.TestCase;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.lib.IdentityMapper;
import org.junit.Before;
import org.junit.Test;
import org.apache.hadoop.mrunit.*;
import org.apache.hadoop.mrunit.types.Pair;

public class MRJobTest {

  private MapDriver<LongWritable, Text, Text, Text> mapDriver;
  private ReduceDriver<Text, Text, Text, Text> reduceDriver;
  private MapReduceDriver<LongWritable, Text, Text, Text, Text, Text> mapReduceDriver;

public class InvertedIndexMapper extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text> {
  public static final int RETAIlER_INDEX = 0;

  public void map(LongWritable longWritable, Text text, OutputCollector<Text, Text> outputCollector, Reporter reporter) throws IOException {
     final String[] record = StringUtils.split(text.toString(), ",");
     final String retailer = record[RETAIlER_INDEX];
     for (int i = 1; i < record.length; i++) {
        final String keyword = record[i];
        outputCollector.collect(new Text(keyword), new Text(retailer));

 public class InvertedIndexReducer extends MapReduceBase implements Reducer<Text, Text, Text, Text> {

  public void reduce(Text text, Iterator<Text> textIterator, OutputCollector<Text, Text> outputCollector, Reporter reporter)
   throws IOException {
  // TODO Auto-generated method stub
    final String retailers = StringUtils.join(textIterator, ',');
    outputCollector.collect(text, new Text(retailers));

public void setUp() throws Exception {

  final InvertedIndexMapper mapper = new InvertedIndexMapper();
  final InvertedIndexReducer reducer = new InvertedIndexReducer();

  mapDriver = MapDriver.newMapDriver(mapper);
  reduceDriver = ReduceDriver.newReduceDriver(reducer);
  mapReduceDriver = MapReduceDriver.newMapReduceDriver(mapper, reducer);

public void testMapperWithSingleKeyAndValue() throws Exception {
  final LongWritable inputKey = new LongWritable(0);
  final Text inputValue = new Text(",groceries,clothes");

  final Text outputKey = new Text("groceries");
  final Text outputValue = new Text("");

  mapDriver.withInput(inputKey, inputValue);
  mapDriver.withOutput(outputKey, outputValue);


public void testMapperWithSingleInputAndMultipleOutput() throws Exception {
  final LongWritable key = new LongWritable(0);
 mapDriver.withInput(key, new Text(",books,music,toys,ebooks,movies,computers"));
  final List<Pair<Text, Text>> result =;

  final Pair<Text, Text> books = new Pair<Text, Text>(new Text("books"), new Text(""));
  final Pair<Text, Text> toys = new Pair<Text, Text>(new Text("toys"), new Text(""));

  .contains(books, toys);

public void testReducer() throws Exception {
 final Text inputKey = new Text("books");
 final ImmutableList<Text> inputValue = ImmutableList.of(new Text(""), new Text(""));

 final List<Pair<Text, Text>> result =;
 //final Pair<Text, Text> pair2 = new Pair<Text, Text>(inputKey, new Text(","));
  reduceDriver.withOutput(inputKey, new Text(","));
 /* assertThat(result)
  .containsExactly(pair2); */

Note: Add the mrunit jar and dependent jar.
