Sunday, July 28, 2013

Custom Writable : SetWritable

Recently, I needed to have the output key of mapper to be a Set.

While there are chances that in newer versions of hadoop we will have this pre-implemented, but it isn't available yet, so I wrote my own.

In addition to overriding readFields() and write() , other methods need to be overridden are:
  • compareTo()
  • equals()
  • hashCode()

Following is the SetWritable class:

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;

import org.apache.hadoop.io.WritableComparable;

/**
 * 
 * @author amar
 */
public class SetWritable implements WritableComparable<SetWritable> {

 private Set<Integer> itemSet;

 /**
  * Constructor.
  */
 public SetWritable() {

 }

 /**
  * Constructor.
  * 
  * @param itemSet
  */
 public SetWritable(Set<Integer> itemSet) {

  this.itemSet = itemSet;
 }

 @Override
 public String toString() {

  return itemSet.toString();
 }

 @Override
 public void readFields(DataInput in) throws IOException {

  // First clear the set. Otherwise we will just accumulate
  // entries every time this method is called.
  if (this.itemSet != null) {
   this.itemSet.clear();
  } else {
   this.itemSet = new HashSet<Integer>();
  }
  int count = in.readInt();
  while (count-- > 0) {
   itemSet.add(in.readInt());
  }
 }

 @Override
 public void write(DataOutput out) throws IOException {

  out.writeInt(itemSet.size());
  for (int item : itemSet) {
   out.writeInt(item);
  }
 }

 @Override
 public int compareTo(ItemSetKey o) {

  if (itemSet.equals(o.itemSet))
   return 0;
  else
   return 1;
 }

 @Override
 public boolean equals(Object other) {

  if (this == other)
   return true;

  if (other == null || (this.getClass() != other.getClass())) {
   return false;
  }

  ItemSetKey guest = (ItemSetKey) other;
  return (this.itemSet.equals(guest.itemSet));
 }

 @Override
 public int hashCode() {

  int result = 0;
  result = this.itemSet.hashCode();
  return result;
 }

 /**
  * Gets the itemSet.
  * 
  * @return itemSet.
  */
 public Set<Integer> getItemSet() {

  return itemSet;
 }

 public void setItemSet(Set<Integer> itemSet) {

  this.itemSet = itemSet;
 }
}

Now, in run(), MapOutputKeyClass needs to be set as follows:

job.setMapOutputKeyClass(ItemSetKey.class);

Any feedback, good or bad is most welcome.

Name

Email *

Message *