面试题
面试题:1、任意使用mapreduce或hive或storm,根据日志求20150501,当天每个用户访问页面的次数的top10
create table test(userid string,pageid string,visitdate int) row format delimited fields terminated by ',' stored as textfile;
hive sql:select * from (select userid,pageid,count(*),row_number() over(partition by userid order by pageid) rank fromtest where visitdate = '20150501'group byuserid,pageid) t where t.rank <=10执行时间136second
spark-sql:执行时间0.5second SET spark.sql.shuffle.partitions=1
备注:hive分组聚合函数和Oracl的使用方法完全一样
字段名称:userid、pageid、visitdate
数据:
aa,222,20150501
aa,223,20150501
aa,224,20150501
bb,225,20150502
bb,226,20150501
bb,227,20150502
cc,228,20150501
cc,229,20150501
cc,230,20150501
dd,231,20150502
dd,232,20150501
dd,233,20150502
ee,234,20150501
ee,235,20150501
ee,236,20150501
ee,237,20150502
ff,338,20150501
ff,339,2015050
mapreduce
map可以分割,reduce可以统计 rdd实现:1、没有实现top102、有会的没?
val readfile = sc.textFile("hdfs://master:9000/201505012.txt")
val readfile2 = readfile.map(_.split(",")).filter(a=>a(2) <= "20150501").map(a=>((a(0),a(1)),1)).foldByKey(0)(_+_).sortBy(a=>(a._1._1,a._2)).map(a=>(a._1._1,(a._1._2,a._2))).groupByKey().collect 本帖最后由 玉溪 于 2016-11-23 13:43 编辑
mapperRdeuce实现:RunJob.java
package com.zyf.wc;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.StringUtils;
public class RunJob{
static class HotMapper extends Mapper<LongWritable, Text, Text, KeyPair>{
private KeyPair bean = new KeyPair();
protected void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException{
String line = value.toString();
// 切分字段
String[] ss = StringUtils.split(line);
String userid = ss + ss + ss;
String pageid = ss;
String visitdate = ss;
String visitdatecount = "1";
bean.set(userid, pageid,visitdate,visitdatecount);
context.write(new Text(userid),bean);
}
}
static class HotReduce extends Reducer<Text, KeyPair, Text, KeyPair>{
private KeyPair bean = new KeyPair();
protected void reduce(Text key,Iterable<KeyPair> values,Context context) throws IOException,InterruptedException{
String re_userid = "";
String re_pageid = "";
String re_visitdate = "";
int count = 0;
for(KeyPair bean:values){
re_userid = bean.getUserid();
re_pageid = bean.getPageid();
re_visitdate = bean.getVisitdate();
count += Integer.parseInt(bean.getVisitdatecount());
}
bean.set(re_userid,re_pageid,re_visitdate,Integer.toString(count));
String newkey = key.toString().substring(5,13);
if(newkey.equals("20150501") && Integer.parseInt(bean.getVisitdatecount()) <= 10){
context.write(new Text(key.toString().substring(0,5)),bean);
}
}
}
public static void main(String[] args){
Configuration conf = new Configuration();
try{
Job job = new Job(conf);
job.setJobName("hot");
job.setJarByClass(RunJob.class);
job.setMapperClass(HotMapper.class);
job.setReducerClass(HotReduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(KeyPair.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(KeyPair.class);
job.setNumReduceTasks(3);//reduce
job.setPartitionerClass(FirstPartition.class);
FileInputFormat.addInputPath(job,new Path("hdfs://192.168.1.30:9000/mr/input/201505012.txt"));
FileOutputFormat.setOutputPath(job,new Path("hdfs://192.168.1.30:9000/mr/output/"));
System.exit(job.waitForCompletion(true)?0:1);
}catch(Exception e){
e.printStackTrace();
}
}
}
--------------------------------------------------------------------------------------------------------------------------------
KeyPair.java
package com.zyf.wc;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class KeyPair implements WritableComparable<KeyPair>{
private String userid;
private String pageid;
private String visitdate;
private String visitdatecount;
public String getUserid() {
return userid;
}
public void setUserid(String userid) {
this.userid = userid;
}
public String getPageid() {
return pageid;
}
public void setPageid(String pageid) {
this.pageid = pageid;
}
public String getVisitdate() {
return visitdate;
}
public void setVisitdate(String visitdate) {
this.visitdate = visitdate;
}
public String getVisitdatecount() {
return visitdatecount;
}
public void setVisitdatecount(String visitdatecount) {
this.visitdatecount = visitdatecount;
}
public void set(String userid,String pageid,String visitdate,String visitdatecount){
this.userid = userid;
this.pageid = pageid;
this.visitdate = visitdate;
this.visitdatecount = visitdatecount;
}
/**
* 反序列化,从字节流中读出各个数据字段 读出的顺序应该跟序列化时写入的顺序保持一致
*/
@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
userid = in.readUTF();
pageid = in.readUTF();
visitdate = in.readUTF();
visitdatecount = in.readUTF();
}
/**
* 序列化,将数据字段以字节流写出去
*/
@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
out.writeUTF(userid);
out.writeUTF(pageid);
out.writeUTF(visitdate);
out.writeUTF(visitdatecount);
}
@Override
public int compareTo(KeyPair o) {
// TODO Auto-generated method stub
return 0;
}
public String toString(){
return visitdatecount;
}
}
------------------------------------------------------------------------------------------------------------------------
FirstPartition.java
package com.zyf.wc;
import java.util.HashMap;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
publicclass FirstPartition extends Partitioner<Text,KeyPair>{
private static HashMap<String,Integer> areaMap = new HashMap<>();
static{
areaMap.put("aa", 0);
areaMap.put("bb", 1);
areaMap.put("cc", 2);
}
@Override
public int getPartition(Text key, KeyPair values, int numPartitions) {
Integer provinceCode = areaMap.get(key.toString().substring(0,2));
return provinceCode==null?4:provinceCode;
}
}
------------------------------------------------------------------------------------------------------------
测试数:
aa,226,20150501
aa,226,20150501
aa,226,20150501
aa,226,20150501
aa,226,20150501
aa,227,20150502
aa,228,20150501
aa,228,20150501
aa,228,20150501
aa,228,20150501
aa,228,20150501
aa,228,20150501
aa,229,20150501
aa,222,20150501
aa,223,20150501
aa,223,20150501
aa,224,20150501
aa,224,20150501
aa,224,20150501
aa,225,20150502
aa,229,20150501
aa,229,20150501
aa,229,20150501
aa,229,20150501
aa,229,20150501
aa,229,20150501
aa,230,20150501
aa,232,20150501
aa,232,20150501
aa,232,20150501
aa,232,20150501
aa,232,20150501
aa,232,20150501
aa,232,20150501
aa,233,20150502
aa,234,20150501
aa,234,20150501
aa,234,20150501
aa,234,20150501
aa,230,20150501
aa,230,20150501
aa,230,20150501
aa,230,20150501
aa,230,20150501
aa,230,20150501
aa,230,20150501
aa,231,20150502
aa,232,20150501
aa,232,20150501
aa,234,20150501
aa,234,20150501
aa,234,20150501
aa,234,20150501
aa,234,20150501
aa,234,20150501
aa,235,20150501
aa,235,20150501
aa,235,20150501
aa,235,20150501
aa,235,20150501
aa,235,20150501
aa,235,20150501
aa,235,20150501
aa,235,20150501
aa,235,20150501
aa,235,20150501
aa,236,20150501
aa,236,20150501
aa,236,20150501
aa,236,20150501
aa,236,20150501
aa,236,20150501
aa,236,20150501
aa,236,20150501
aa,236,20150501
aa,236,20150501
aa,236,20150501
aa,236,20150501
aa,237,20150502
aa,338,20150501
aa,338,20150501
aa,338,20150501
aa,338,20150501
aa,338,20150501
aa,338,20150501
aa,338,20150501
aa,338,20150501
aa,338,20150501
aa,338,20150501
aa,338,20150501
aa,338,20150501
aa,338,20150501
aa,339,20150502
aa,340,20150501
aa,340,20150501
aa,340,20150501
aa,340,20150501
aa,340,20150501
aa,340,20150501
aa,340,20150501
aa,340,20150501
aa,340,20150501
aa,340,20150501
aa,340,20150501
aa,340,20150501
aa,340,20150501
aa,340,20150501
aa,341,20150501
aa,341,20150501
aa,341,20150501
aa,341,20150501
aa,341,20150501
aa,341,20150501
aa,341,20150501
aa,341,20150501
aa,341,20150501
aa,341,20150501
aa,341,20150501
aa,341,20150501
aa,341,20150501
aa,341,20150501
aa,341,20150501
aa,341,20150501
aa,343,20150502
cc,222,20150501
cc,223,20150501
cc,223,20150501
cc,228,20150501
cc,228,20150501
cc,228,20150501
cc,228,20150501
cc,228,20150501
cc,228,20150501
cc,229,20150501
cc,229,20150501
cc,229,20150501
cc,229,20150501
cc,229,20150501
cc,229,20150501
cc,229,20150501
cc,230,20150501
cc,230,20150501
cc,230,20150501
cc,230,20150501
cc,230,20150501
cc,230,20150501
cc,230,20150501
cc,230,20150501
cc,231,20150502
cc,232,20150501
cc,232,20150501
cc,232,20150501
cc,232,20150501
cc,232,20150501
cc,232,20150501
cc,232,20150501
cc,232,20150501
cc,232,20150501
cc,233,20150502
cc,234,20150501
cc,234,20150501
cc,234,20150501
cc,224,20150501
cc,224,20150501
cc,224,20150501
cc,225,20150502
cc,226,20150501
cc,226,20150501
cc,226,20150501
cc,226,20150501
cc,226,20150501
cc,227,20150502
cc,234,20150501
cc,234,20150501
cc,234,20150501
cc,234,20150501
cc,234,20150501
cc,234,20150501
cc,234,20150501
cc,235,20150501
cc,235,20150501
cc,235,20150501
cc,235,20150501
cc,235,20150501
cc,235,20150501
cc,235,20150501
cc,235,20150501
cc,235,20150501
cc,235,20150501
cc,235,20150501
cc,236,20150501
cc,236,20150501
cc,236,20150501
cc,236,20150501
cc,236,20150501
cc,236,20150501
cc,236,20150501
cc,236,20150501
cc,236,20150501
cc,236,20150501
cc,236,20150501
cc,236,20150501
cc,237,20150502
cc,338,20150501
cc,338,20150501
cc,338,20150501
cc,338,20150501
cc,338,20150501
cc,338,20150501
cc,338,20150501
cc,338,20150501
cc,338,20150501
cc,338,20150501
cc,338,20150501
cc,338,20150501
cc,338,20150501
cc,339,20150502
cc,340,20150501
cc,340,20150501
cc,340,20150501
cc,340,20150501
cc,340,20150501
cc,340,20150501
cc,340,20150501
cc,340,20150501
cc,340,20150501
cc,340,20150501
cc,340,20150501
cc,340,20150501
cc,340,20150501
cc,340,20150501
cc,341,20150501
cc,341,20150501
cc,341,20150501
cc,341,20150501
cc,341,20150501
cc,341,20150501
cc,341,20150501
cc,341,20150501
cc,341,20150501
cc,341,20150501
cc,341,20150501
cc,341,20150501
cc,341,20150501
cc,341,20150501
cc,341,20150501
cc,341,20150501
cc,343,20150502
bb,222,20150501
bb,223,20150501
bb,223,20150501
bb,224,20150501
bb,226,20150501
bb,227,20150502
bb,228,20150501
bb,228,20150501
bb,228,20150501
bb,228,20150501
bb,228,20150501
bb,228,20150501
bb,229,20150501
bb,224,20150501
bb,224,20150501
bb,225,20150502
bb,226,20150501
bb,226,20150501
bb,226,20150501
bb,226,20150501
bb,229,20150501
bb,229,20150501
bb,229,20150501
bb,229,20150501
bb,229,20150501
bb,229,20150501
bb,230,20150501
bb,230,20150501
bb,230,20150501
bb,230,20150501
bb,230,20150501
bb,230,20150501
bb,230,20150501
bb,230,20150501
bb,231,20150502
bb,232,20150501
bb,232,20150501
bb,232,20150501
bb,232,20150501
bb,232,20150501
bb,234,20150501
bb,235,20150501
bb,235,20150501
bb,235,20150501
bb,235,20150501
bb,235,20150501
bb,235,20150501
bb,235,20150501
bb,235,20150501
bb,235,20150501
bb,235,20150501
bb,235,20150501
bb,236,20150501
bb,236,20150501
bb,236,20150501
bb,236,20150501
bb,236,20150501
bb,236,20150501
bb,236,20150501
bb,236,20150501
bb,236,20150501
bb,236,20150501
bb,236,20150501
bb,236,20150501
bb,237,20150502
bb,338,20150501
bb,338,20150501
bb,338,20150501
bb,338,20150501
bb,338,20150501
bb,338,20150501
bb,338,20150501
bb,338,20150501
bb,338,20150501
bb,338,20150501
bb,338,20150501
bb,338,20150501
bb,338,20150501
bb,339,20150502
bb,340,20150501
bb,340,20150501
bb,340,20150501
bb,340,20150501
bb,340,20150501
bb,340,20150501
bb,340,20150501
bb,340,20150501
bb,340,20150501
bb,340,20150501
bb,340,20150501
bb,340,20150501
bb,340,20150501
bb,340,20150501
bb,341,20150501
bb,341,20150501
bb,341,20150501
bb,341,20150501
bb,341,20150501
bb,341,20150501
bb,341,20150501
bb,341,20150501
bb,341,20150501
bb,341,20150501
bb,341,20150501
bb,341,20150501
bb,232,20150501
bb,232,20150501
bb,232,20150501
bb,232,20150501
bb,233,20150502
bb,234,20150501
bb,234,20150501
bb,234,20150501
bb,234,20150501
bb,234,20150501
bb,234,20150501
bb,234,20150501
bb,234,20150501
bb,234,20150501
bb,341,20150501
bb,341,20150501
bb,341,20150501
bb,341,20150501
bb,343,20150502
页:
[1]