This project has retired. For details please refer to its Attic page.
SolrWriter xref
View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.chukwa.datacollection.writer.solr;
19  
20  import java.io.IOException;
21  import java.nio.charset.Charset;
22  import java.text.ParseException;
23  import java.text.SimpleDateFormat;
24  import java.util.Date;
25  import java.util.List;
26  import java.util.regex.Matcher;
27  import java.util.regex.Pattern;
28  
29  import org.apache.hadoop.chukwa.Chunk;
30  import org.apache.hadoop.chukwa.datacollection.agent.ChukwaAgent;
31  import org.apache.hadoop.chukwa.datacollection.writer.ChukwaWriter;
32  import org.apache.hadoop.chukwa.datacollection.writer.PipelineableWriter;
33  import org.apache.hadoop.chukwa.datacollection.writer.WriterException;
34  import org.apache.hadoop.chukwa.util.ExceptionUtil;
35  import org.apache.hadoop.conf.Configuration;
36  import org.apache.log4j.Logger;
37  import org.apache.solr.client.solrj.SolrServerException;
38  import org.apache.solr.client.solrj.impl.CloudSolrServer;
39  import org.apache.solr.common.SolrInputDocument;
40  
41  public class SolrWriter extends PipelineableWriter {
42    private static Logger log = Logger.getLogger(SolrWriter.class);
43    private CloudSolrServer server;
44    private final static String ID = "id";
45    private final static String SEQ_ID = "seqId";
46    private final static String DATA_TYPE = "type";
47    private final static String STREAM_NAME = "stream";
48    private final static String TAGS = "tags";
49    private final static String SOURCE = "source";
50    private final static String DATA = "data";
51    private final static String USER = "user";
52    private final static String SERVICE = "service";
53    private final static String DATE = "date";
54    private final static Pattern userPattern = Pattern.compile("user=(.+?)[, ]");
55    private SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS");
56  
57    public SolrWriter() throws WriterException {
58      init(ChukwaAgent.getStaticConfiguration());
59    }
60    
61    @Override
62    public void init(Configuration c) throws WriterException {
63      String serverName = c.get("solr.cloud.address");
64      if (serverName == null) {
65        throw new WriterException("Solr server address is not defined.");
66      }
67      String collection = c.get("solr.collection", "logs");
68      if(server == null) {
69        server = new CloudSolrServer(serverName);
70        server.setDefaultCollection(collection);
71      }
72    }
73  
74    @Override
75    public void close() throws WriterException {
76    }
77  
78    @Override
79    public CommitStatus add(List<Chunk> chunks) throws WriterException {
80      CommitStatus rv = ChukwaWriter.COMMIT_OK;
81      for(Chunk chunk : chunks) {
82        try {
83          SolrInputDocument doc = new SolrInputDocument();
84          doc.addField(ID, chunk.getSource() + "_" + chunk.getSeqID());
85          doc.addField(TAGS, chunk.getTags());
86          doc.addField(STREAM_NAME, chunk.getStreamName());
87          doc.addField(SOURCE, chunk.getSource());
88          doc.addField(SEQ_ID, chunk.getSeqID());
89          doc.addField(DATA_TYPE, chunk.getDataType());
90          doc.addField(DATA, new String(chunk.getData(), Charset.forName("UTF-8")));
91          
92          // TODO: improve parsing logic for more sophisticated tagging
93          String data = new String(chunk.getData(), Charset.forName("UTF-8"));
94          Matcher m = userPattern.matcher(data);
95          if(m.find()) {
96            doc.addField(USER, m.group(1));
97          }
98          if(data.contains("hdfs")) {
99            doc.addField(SERVICE, "hdfs");
100         }
101         if(data.contains("yarn")) {
102           doc.addField(SERVICE, "yarn");
103         }
104         if(data.contains("mapredice")) {
105           doc.addField(SERVICE, "mapreduce");
106         }
107         if(data.contains("hbase")) {
108           doc.addField(SERVICE, "hbase");
109         }
110         try {
111           Date d = sdf.parse(data);
112           doc.addField(DATE, d, 1.0f);
113         } catch(ParseException e) {
114           
115         }
116         server.add(doc);
117         server.commit();
118       } catch (SolrServerException | IOException e) {
119         log.warn("Failed to store data to Solr Cloud.");
120         log.warn(ExceptionUtil.getStackTrace(e));
121       }
122     }
123     if (next != null) {
124       rv = next.add(chunks); //pass data through
125     }
126     return rv;
127   }
128 }