hadoop mapreduce自定义key,排序的问题

有一个*join*操作的例子
//原始数据
a.txt:
shoes 1
router 2
settopbox 3

b.txt:
1 125
2 232
1 132
2 200
3 578
2 265
3 610
1 157
1 175
3 582
一个表中装了key和对应的name,另一个表装了key和对应的value  

我自定义了一个Key保存了Key和Value,setSortComparatorClass设置为用Value排序
自定义key类型的CompareTo方法也实现为用value排序  
setGroupingComparatorClass和Partitioner设置为按照Key来

我的设想是*让每个key的对个数据为keyname*  ,这样迭代是首先把keyname拿到,然后context.write

但是结果只有一个key实现了我的设想,不知道发生了什么//GroupingSortComparator
public static class MyComparator extends WritableComparator {
public MyComparator() {
super(MyKey.class, true);
}

@SuppressWarnings("rawtypes")
@Override
public int compare(WritableComparable a, WritableComparable b) {
// TODO Auto-generated method stub
MyKey ma = (MyKey) a;
MyKey mb = (MyKey) b;
return ma.getKey().toString().compareTo(mb.getKey().toString());
}

}//SortComparator
public static class MyValueComparator extends WritableComparator{
public MyValueComparator(){
super(MyKey.class,true);
}
@SuppressWarnings("rawtypes")
@Override
public int compare(WritableComparable a,WritableComparable b){
Text v1 = ((MyKey)a).getValue();
Text v2 = ((MyKey)b).getValue();
if(Character.isDigit(v1.toString().charAt(0))&&Character.isDigit(v2.toString().charAt(0))){
return v1.compareTo(v2);
}
else if(Character.isLetter(v1.toString().charAt(0))&&Character.isLetter(v2.toString().charAt(0))){
return v1.compareTo(v2);
}
else if(Character.isDigit(v1.toString().charAt(0))&&Character.isLetter(v2.toString().charAt(0))){
return -v1.compareTo(v2);
}
else if(Character.isLetter(v1.toString().charAt(0))&&Character.isDigit(v2.toString().charAt(0))){
return -v1.compareTo(v2);
}
else{
return -1;
}
}
}//自定义key
public static class MyKey implements WritableComparable {
private Text tkey = null;
private Text tValue = null;

public MyKey() {
tkey = new Text();
tValue = new Text();
}

public Text getKey() {
return this.tkey;
}

public void setKey(Text key) {
this.tkey = key;
}

public Text getValue() {
return this.tValue;
}

public void setValue(Text value) {
this.tValue = value;
}

public void readFields(DataInput arg0) throws IOException {
// TODO Auto-generated method stub
tkey.readFields(arg0);
tValue.readFields(arg0);
}

public void write(DataOutput arg0) throws IOException {
// TODO Auto-generated method stub
tkey.write(arg0);
tValue.write(arg0);
}

public int compareTo(MyKey o) {
// TODO Auto-generated method stub
return tValue.compareTo(o.getValue());
//return tkey.compareTo(o.getKey());
}
} //自定义partitioner
public static class MyPartitioner extends Partitioner {

@Override
public int getPartition(MyKey key, Text value, int numPartitions) {
// TODO Auto-generated method stub
return key.getKey().hashCode() % numPartitions;
}

} //reducer
public void reduce(MyKey key, Iterable values, Context context)
throws IOException, InterruptedException {
String newKey = null;
int count = 0;
for (Text value : values) {
if(count==0){
newKey = value.toString();
count++;
}
else {
context.write(new Text(newKey), key.getValue());
}
//context.write(key.getKey(),key.getValue());
}
}
//最终的结果(问题所在)

shoes 125
shoes 132
shoes 157
shoes 175
200 232
200 265
578 582
578 610
已邀请:
解决了,实际上这是一个mapreduce二次排序的问题,setSortComparatorClass的排序类需要对自定义key中的key和value分别排序,如果单一的排key或者value,效果和不自定义排序一样。
改写MyValueComparator如下
public static class MyValueComparator extends WritableComparator {
public MyValueComparator() {
super(MyKey.class, true);
}

@SuppressWarnings("rawtypes")
@Override
public int compare(WritableComparable a, WritableComparable b) {
if (!((MyKey) a).getKey().toString().equals(((MyKey) b).getKey().toString())){
return ((MyKey) a).getKey().compareTo(((MyKey) b).getKey());
} else {
Text v1 = ((MyKey) a).getValue();
Text v2 = ((MyKey) b).getValue();

if (Character.isDigit(v1.toString().charAt(0))
&& Character.isDigit(v2.toString().charAt(0))) {
return v1.compareTo(v2);
} else if (Character.isLetter(v1.toString().charAt(0))
&& Character.isLetter(v2.toString().charAt(0))) {
return v1.compareTo(v2);
} else if (Character.isDigit(v1.toString().charAt(0))
&& Character.isLetter(v2.toString().charAt(0))) {
return -v1.compareTo(v2);
} else if (Character.isLetter(v1.toString().charAt(0))
&& Character.isDigit(v2.toString().charAt(0))) {
return -v1.compareTo(v2);
} else {
return -1;
}
}
}
}

要回复问题请先登录注册