This project has retired. For details please refer to its
Attic page.
SolrWriter xref
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.chukwa.datacollection.writer.solr;
19
20 import java.io.IOException;
21 import java.nio.charset.Charset;
22 import java.text.ParseException;
23 import java.text.SimpleDateFormat;
24 import java.util.Date;
25 import java.util.List;
26 import java.util.regex.Matcher;
27 import java.util.regex.Pattern;
28
29 import org.apache.hadoop.chukwa.Chunk;
30 import org.apache.hadoop.chukwa.datacollection.agent.ChukwaAgent;
31 import org.apache.hadoop.chukwa.datacollection.writer.ChukwaWriter;
32 import org.apache.hadoop.chukwa.datacollection.writer.PipelineableWriter;
33 import org.apache.hadoop.chukwa.datacollection.writer.WriterException;
34 import org.apache.hadoop.chukwa.util.ExceptionUtil;
35 import org.apache.hadoop.conf.Configuration;
36 import org.apache.log4j.Logger;
37 import org.apache.solr.client.solrj.SolrServerException;
38 import org.apache.solr.client.solrj.impl.CloudSolrServer;
39 import org.apache.solr.common.SolrInputDocument;
40
41 public class SolrWriter extends PipelineableWriter {
42 private static Logger log = Logger.getLogger(SolrWriter.class);
43 private CloudSolrServer server;
44 private final static String ID = "id";
45 private final static String SEQ_ID = "seqId";
46 private final static String DATA_TYPE = "type";
47 private final static String STREAM_NAME = "stream";
48 private final static String TAGS = "tags";
49 private final static String SOURCE = "source";
50 private final static String DATA = "data";
51 private final static String USER = "user";
52 private final static String SERVICE = "service";
53 private final static String DATE = "date";
54 private final static Pattern userPattern = Pattern.compile("user=(.+?)[, ]");
55 private SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS");
56
57 public SolrWriter() throws WriterException {
58 init(ChukwaAgent.getStaticConfiguration());
59 }
60
61 @Override
62 public void init(Configuration c) throws WriterException {
63 String serverName = c.get("solr.cloud.address");
64 if (serverName == null) {
65 throw new WriterException("Solr server address is not defined.");
66 }
67 String collection = c.get("solr.collection", "logs");
68 if(server == null) {
69 server = new CloudSolrServer(serverName);
70 server.setDefaultCollection(collection);
71 }
72 }
73
74 @Override
75 public void close() throws WriterException {
76 }
77
78 @Override
79 public CommitStatus add(List<Chunk> chunks) throws WriterException {
80 CommitStatus rv = ChukwaWriter.COMMIT_OK;
81 for(Chunk chunk : chunks) {
82 try {
83 SolrInputDocument doc = new SolrInputDocument();
84 doc.addField(ID, chunk.getSource() + "_" + chunk.getSeqID());
85 doc.addField(TAGS, chunk.getTags());
86 doc.addField(STREAM_NAME, chunk.getStreamName());
87 doc.addField(SOURCE, chunk.getSource());
88 doc.addField(SEQ_ID, chunk.getSeqID());
89 doc.addField(DATA_TYPE, chunk.getDataType());
90 doc.addField(DATA, new String(chunk.getData(), Charset.forName("UTF-8")));
91
92
93 String data = new String(chunk.getData(), Charset.forName("UTF-8"));
94 Matcher m = userPattern.matcher(data);
95 if(m.find()) {
96 doc.addField(USER, m.group(1));
97 }
98 if(data.contains("hdfs")) {
99 doc.addField(SERVICE, "hdfs");
100 }
101 if(data.contains("yarn")) {
102 doc.addField(SERVICE, "yarn");
103 }
104 if(data.contains("mapredice")) {
105 doc.addField(SERVICE, "mapreduce");
106 }
107 if(data.contains("hbase")) {
108 doc.addField(SERVICE, "hbase");
109 }
110 try {
111 Date d = sdf.parse(data);
112 doc.addField(DATE, d, 1.0f);
113 } catch(ParseException e) {
114
115 }
116 server.add(doc);
117 server.commit();
118 } catch (SolrServerException | IOException e) {
119 log.warn("Failed to store data to Solr Cloud.");
120 log.warn(ExceptionUtil.getStackTrace(e));
121 }
122 }
123 if (next != null) {
124 rv = next.add(chunks);
125 }
126 return rv;
127 }
128 }