This project has retired. For details please refer to its
Attic page.
SolrWriter xref
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.chukwa.datacollection.writer.solr;
19
20 import java.nio.charset.Charset;
21 import java.text.ParseException;
22 import java.text.SimpleDateFormat;
23 import java.util.Date;
24 import java.util.List;
25 import java.util.regex.Matcher;
26 import java.util.regex.Pattern;
27
28 import org.apache.hadoop.chukwa.Chunk;
29 import org.apache.hadoop.chukwa.datacollection.agent.ChukwaAgent;
30 import org.apache.hadoop.chukwa.datacollection.writer.ChukwaWriter;
31 import org.apache.hadoop.chukwa.datacollection.writer.PipelineableWriter;
32 import org.apache.hadoop.chukwa.datacollection.writer.WriterException;
33 import org.apache.hadoop.chukwa.util.ExceptionUtil;
34 import org.apache.hadoop.conf.Configuration;
35 import org.apache.log4j.Logger;
36 import org.apache.solr.client.solrj.impl.CloudSolrClient;
37 import org.apache.solr.common.SolrInputDocument;
38
39 public class SolrWriter extends PipelineableWriter {
40 private static Logger log = Logger.getLogger(SolrWriter.class);
41 private CloudSolrClient client;
42 private final static String ID = "id";
43 private final static String SEQ_ID = "seqId";
44 private final static String DATA_TYPE = "type";
45 private final static String STREAM_NAME = "stream";
46 private final static String TAGS = "tags";
47 private final static String SOURCE = "source";
48 private final static String DATA = "data";
49 private final static String USER = "user";
50 private final static String SERVICE = "service";
51 private final static String DATE = "date";
52 private final static Pattern userPattern = Pattern.compile("user=(.+?)[, ]");
53 private SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS");
54
55 public SolrWriter() throws WriterException {
56 init(ChukwaAgent.getStaticConfiguration());
57 }
58
59 @Override
60 public void init(Configuration c) throws WriterException {
61 String serverName = c.get("solr.cloud.address");
62 if (serverName == null) {
63 throw new WriterException("Solr server address is not defined.");
64 }
65 String collection = c.get("solr.collection", "logs");
66 if(client == null) {
67 client = new CloudSolrClient(serverName);
68 client.setDefaultCollection(collection);
69 }
70 }
71
72 @Override
73 public void close() throws WriterException {
74 }
75
76 @Override
77 public CommitStatus add(List<Chunk> chunks) throws WriterException {
78 if(client == null) {
79 init(ChukwaAgent.getStaticConfiguration());
80 }
81 CommitStatus rv = ChukwaWriter.COMMIT_OK;
82 for(Chunk chunk : chunks) {
83 try {
84 SolrInputDocument doc = new SolrInputDocument();
85 doc.addField(ID, chunk.getSource() + "_" + chunk.getSeqID());
86 doc.addField(TAGS, chunk.getTags());
87 doc.addField(STREAM_NAME, chunk.getStreamName());
88 doc.addField(SOURCE, chunk.getSource());
89 doc.addField(SEQ_ID, chunk.getSeqID());
90 doc.addField(DATA_TYPE, chunk.getDataType());
91 doc.addField(DATA, new String(chunk.getData(), Charset.forName("UTF-8")));
92
93
94 String data = new String(chunk.getData(), Charset.forName("UTF-8"));
95 Matcher m = userPattern.matcher(data);
96 if(m.find()) {
97 doc.addField(USER, m.group(1));
98 } else {
99 doc.addField(USER, "Unclassified");
100 }
101 if(data.contains("hdfs")) {
102 doc.addField(SERVICE, "hdfs");
103 } else if(data.contains("yarn")) {
104 doc.addField(SERVICE, "yarn");
105 } else if(data.contains("mapredice")) {
106 doc.addField(SERVICE, "mapreduce");
107 } else if(data.contains("hbase")) {
108 doc.addField(SERVICE, "hbase");
109 } else {
110 doc.addField(SERVICE, "Unclassified");
111 }
112 try {
113 Date d = sdf.parse(data);
114 doc.addField(DATE, d, 1.0f);
115 } catch(ParseException e) {
116
117 }
118 client.add(doc);
119 } catch (Exception e) {
120 log.warn("Failed to store data to Solr Cloud.");
121 log.warn(ExceptionUtil.getStackTrace(e));
122 client = null;
123 }
124 }
125 try {
126 if(client != null) {
127 client.commit();
128 }
129 } catch (Exception e) {
130 log.warn("Failed to store data to Solr Cloud.");
131 log.warn(ExceptionUtil.getStackTrace(e));
132 }
133 if (next != null) {
134 rv = next.add(chunks);
135 }
136 return rv;
137 }
138 }