This project has retired. For details please refer to its Attic page.
SolrWriter xref
View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.chukwa.datacollection.writer.solr;
19  
20  import java.nio.charset.Charset;
21  import java.text.ParseException;
22  import java.text.SimpleDateFormat;
23  import java.util.Date;
24  import java.util.List;
25  import java.util.regex.Matcher;
26  import java.util.regex.Pattern;
27  
28  import org.apache.hadoop.chukwa.Chunk;
29  import org.apache.hadoop.chukwa.datacollection.agent.ChukwaAgent;
30  import org.apache.hadoop.chukwa.datacollection.writer.ChukwaWriter;
31  import org.apache.hadoop.chukwa.datacollection.writer.PipelineableWriter;
32  import org.apache.hadoop.chukwa.datacollection.writer.WriterException;
33  import org.apache.hadoop.chukwa.util.ExceptionUtil;
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.log4j.Logger;
36  import org.apache.solr.client.solrj.impl.CloudSolrClient;
37  import org.apache.solr.common.SolrInputDocument;
38  
39  public class SolrWriter extends PipelineableWriter {
40    private static Logger log = Logger.getLogger(SolrWriter.class);
41    private CloudSolrClient client;
42    private final static String ID = "id";
43    private final static String SEQ_ID = "seqId";
44    private final static String DATA_TYPE = "type";
45    private final static String STREAM_NAME = "stream";
46    private final static String TAGS = "tags";
47    private final static String SOURCE = "source";
48    private final static String DATA = "data";
49    private final static String USER = "user";
50    private final static String SERVICE = "service";
51    private final static String DATE = "date";
52    private final static Pattern userPattern = Pattern.compile("user=(.+?)[, ]");
53    private SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS");
54  
55    public SolrWriter() throws WriterException {
56      init(ChukwaAgent.getStaticConfiguration());
57    }
58    
59    @Override
60    public void init(Configuration c) throws WriterException {
61      String serverName = c.get("solr.cloud.address");
62      if (serverName == null) {
63        throw new WriterException("Solr server address is not defined.");
64      }
65      String collection = c.get("solr.collection", "logs");
66      if(client == null) {
67        client = new CloudSolrClient(serverName);
68        client.setDefaultCollection(collection);
69      }
70    }
71  
72    @Override
73    public void close() throws WriterException {
74    }
75  
76    @Override
77    public CommitStatus add(List<Chunk> chunks) throws WriterException {
78      if(client == null) {
79        init(ChukwaAgent.getStaticConfiguration());
80      }
81      CommitStatus rv = ChukwaWriter.COMMIT_OK;
82      for(Chunk chunk : chunks) {
83        try {
84          SolrInputDocument doc = new SolrInputDocument();
85          doc.addField(ID, chunk.getSource() + "_" + chunk.getSeqID());
86          doc.addField(TAGS, chunk.getTags());
87          doc.addField(STREAM_NAME, chunk.getStreamName());
88          doc.addField(SOURCE, chunk.getSource());
89          doc.addField(SEQ_ID, chunk.getSeqID());
90          doc.addField(DATA_TYPE, chunk.getDataType());
91          doc.addField(DATA, new String(chunk.getData(), Charset.forName("UTF-8")));
92          
93          // TODO: improve parsing logic for more sophisticated tagging
94          String data = new String(chunk.getData(), Charset.forName("UTF-8"));
95          Matcher m = userPattern.matcher(data);
96          if(m.find()) {
97            doc.addField(USER, m.group(1));
98          } else {
99            doc.addField(USER, "Unclassified");
100         }
101         if(data.contains("hdfs")) {
102           doc.addField(SERVICE, "hdfs");
103         } else if(data.contains("yarn")) {
104           doc.addField(SERVICE, "yarn");
105         } else if(data.contains("mapredice")) {
106           doc.addField(SERVICE, "mapreduce");
107         } else  if(data.contains("hbase")) {
108           doc.addField(SERVICE, "hbase");
109         } else {
110           doc.addField(SERVICE, "Unclassified");
111         }
112         try {
113           Date d = sdf.parse(data);
114           doc.addField(DATE, d, 1.0f);
115         } catch(ParseException e) {
116           
117         }
118         client.add(doc);
119       } catch (Exception e) {
120         log.warn("Failed to store data to Solr Cloud.");
121         log.warn(ExceptionUtil.getStackTrace(e));
122         client = null;
123       }
124     }
125     try {
126       if(client != null) {
127         client.commit();
128       }
129     } catch (Exception e) {
130       log.warn("Failed to store data to Solr Cloud.");
131       log.warn(ExceptionUtil.getStackTrace(e));
132     }
133     if (next != null) {
134       rv = next.add(chunks); //pass data through
135     }
136     return rv;
137   }
138 }