hadoop mapreduce自定义key,排序的问题

有一个*join*操作的例子
//原始数据
a.txt:
shoes 1
router 2
settopbox 3

b.txt:
1 125
2 232
1 132
2 200
3 578
2 265
3 610
1 157
1 175
3 582

一个表中装了key和对应的name,另一个表装了key和对应的value  

我自定义了一个Key保存了Key和Value,setSortComparatorClass设置为用Value排序
自定义key类型的CompareTo方法也实现为用value排序  
setGroupingComparatorClass和Partitioner设置为按照Key来

我的设想是*让每个key的对个数据为keyname*  ,这样迭代是首先把keyname拿到,然后context.write

但是结果只有一个key实现了我的设想,不知道发生了什么
//GroupingSortComparator
public static class MyComparator extends WritableComparator {
public MyComparator() {
super(MyKey.class, true);
}

@SuppressWarnings("rawtypes")
@Override
public int compare(WritableComparable a, WritableComparable b) {
// TODO Auto-generated method stub
MyKey ma = (MyKey) a;
MyKey mb = (MyKey) b;
return ma.getKey().toString().compareTo(mb.getKey().toString());
}

}
//SortComparator
public static class MyValueComparator extends WritableComparator{
public MyValueComparator(){
super(MyKey.class,true);
}
@SuppressWarnings("rawtypes")
@Override
public int compare(WritableComparable a,WritableComparable b){
Text v1 = ((MyKey)a).getValue();
Text v2 = ((MyKey)b).getValue();
if(Character.isDigit(v1.toString().charAt(0))&&Character.isDigit(v2.toString().charAt(0))){
return v1.compareTo(v2);
}
else if(Character.isLetter(v1.toString().charAt(0))&&Character.isLetter(v2.toString().charAt(0))){
return v1.compareTo(v2);
}
else if(Character.isDigit(v1.toString().charAt(0))&&Character.isLetter(v2.toString().charAt(0))){
return -v1.compareTo(v2);
}
else if(Character.isLetter(v1.toString().charAt(0))&&Character.isDigit(v2.toString().charAt(0))){
return -v1.compareTo(v2);
}
else{
return -1;
}
}
}
//自定义key
public static class MyKey implements WritableComparable<MyKey> {
private Text tkey = null;
private Text tValue = null;

public MyKey() {
tkey = new Text();
tValue = new Text();
}

public Text getKey() {
return this.tkey;
}

public void setKey(Text key) {
this.tkey = key;
}

public Text getValue() {
return this.tValue;
}

public void setValue(Text value) {
this.tValue = value;
}

public void readFields(DataInput arg0) throws IOException {
// TODO Auto-generated method stub
tkey.readFields(arg0);
tValue.readFields(arg0);
}

public void write(DataOutput arg0) throws IOException {
// TODO Auto-generated method stub
tkey.write(arg0);
tValue.write(arg0);
}

public int compareTo(MyKey o) {
// TODO Auto-generated method stub
return tValue.compareTo(o.getValue());
//return tkey.compareTo(o.getKey());
}
}
 
//自定义partitioner
public static class MyPartitioner extends Partitioner<MyKey, Text> {

@Override
public int getPartition(MyKey key, Text value, int numPartitions) {
// TODO Auto-generated method stub
return key.getKey().hashCode() % numPartitions;
}

}
 
//reducer
public void reduce(MyKey key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
String newKey = null;
int count = 0;
for (Text value : values) {
if(count==0){
newKey = value.toString();
count++;
}
else {
context.write(new Text(newKey), key.getValue());
}
//context.write(key.getKey(),key.getValue());
}
}
//最终的结果(问题所在)

shoes 125
shoes 132
shoes 157
shoes 175
200 232
200 265
578 582
578 610

jnchen

赞同来自:

解决了,实际上这是一个mapreduce二次排序的问题,setSortComparatorClass的排序类需要对自定义key中的key和value分别排序,如果单一的排key或者value,效果和不自定义排序一样。 改写MyValueComparator如下
public static class MyValueComparator extends WritableComparator {
                public MyValueComparator() {
                        super(MyKey.class, true);
                }

                @SuppressWarnings("rawtypes")
                @Override
                public int compare(WritableComparable a, WritableComparable b) {
                        if (!((MyKey) a).getKey().toString().equals(((MyKey) b).getKey().toString())){
                                return ((MyKey) a).getKey().compareTo(((MyKey) b).getKey());
                        } else {
                                Text v1 = ((MyKey) a).getValue();
                                Text v2 = ((MyKey) b).getValue();

                                if (Character.isDigit(v1.toString().charAt(0))
                                                && Character.isDigit(v2.toString().charAt(0))) {
                                        return v1.compareTo(v2);
                                } else if (Character.isLetter(v1.toString().charAt(0))
                                                && Character.isLetter(v2.toString().charAt(0))) {
                                        return v1.compareTo(v2);
                                } else if (Character.isDigit(v1.toString().charAt(0))
                                                && Character.isLetter(v2.toString().charAt(0))) {
                                        return -v1.compareTo(v2);
                                } else if (Character.isLetter(v1.toString().charAt(0))
                                                && Character.isDigit(v2.toString().charAt(0))) {
                                        return -v1.compareTo(v2);
                                } else {
                                        return -1;
                                }
                        }
                }
        }

要回复问题请先登录注册