This project has retired. For details please refer to its
Attic page.
Filter xref
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.chukwa.util;
19
20 import java.nio.charset.Charset;
21 import java.util.ArrayList;
22 import java.util.List;
23 import java.util.regex.Pattern;
24 import java.util.regex.PatternSyntaxException;
25
26 import org.apache.commons.lang.ArrayUtils;
27 import org.apache.hadoop.chukwa.Chunk;
28 import org.apache.hadoop.chukwa.extraction.engine.RecordUtil;
29 import org.apache.hadoop.chukwa.util.RegexUtil.CheckedPatternSyntaxException;
30 import org.apache.log4j.Logger;
31
32
33 public class Filter {
34
35 static Logger log = Logger.getLogger(Filter.class);
36
37 private static final String[] SEARCH_TARGS =
38 {"datatype", "name", "host", "cluster", "content"};
39 static final String SEPARATOR="&";
40
41 private static class SearchRule {
42 Pattern p;
43 String targ;
44
45 SearchRule(Pattern p, String t) {
46 this.p = p;
47 this.targ = t;
48 }
49
50 boolean matches(Chunk chunk) {
51 if(targ.equals("datatype")) {
52 return p.matcher(chunk.getDataType()).matches();
53 } else if(targ.equals("name")) {
54 return p.matcher(chunk.getStreamName()).matches();
55 } else if(targ.equals("host")) {
56 return p.matcher(chunk.getSource()).matches();
57 } else if(targ.equals("cluster")) {
58 String cluster = RecordUtil.getClusterName(chunk);
59 return p.matcher(cluster).matches();
60 } else if(targ.equals("content")) {
61 String content = new String(chunk.getData(), Charset.forName("UTF-8"));
62 return p.matcher(content).matches();
63 } else if(targ.startsWith("tags.")) {
64 String tagName = targ.substring("tags.".length());
65 if (!RegexUtil.isRegex(tagName)) {
66 log.warn("Error parsing 'tagName' as a regex: "
67 + RegexUtil.regexError(tagName));
68 return false;
69 }
70 String tagVal = chunk.getTag(tagName);
71 if(tagVal == null)
72 return false;
73 return p.matcher(tagVal).matches();
74 } else {
75 assert false: "unknown target: " +targ;
76 return false;
77 }
78 }
79
80 public String toString() {
81 return targ + "=" +p.toString();
82 }
83 }
84
85 List<SearchRule> compiledPatterns;
86
87 public Filter(String listOfPatterns) throws CheckedPatternSyntaxException {
88 compiledPatterns = new ArrayList<SearchRule>();
89
90 String[] patterns = listOfPatterns.split(SEPARATOR);
91 for(String p: patterns) {
92 int equalsPos = p.indexOf('=');
93
94 if(equalsPos < 0 || equalsPos > (p.length() -2)) {
95 throw new CheckedPatternSyntaxException(
96 "pattern must be of form targ=pattern", p, -1);
97 }
98
99 String targ = p.substring(0, equalsPos);
100 if(!targ.startsWith("tags.") && !ArrayUtils.contains(SEARCH_TARGS, targ)) {
101 throw new CheckedPatternSyntaxException(
102 "pattern doesn't start with recognized search target", p, -1);
103 }
104
105 String regex = p.substring(equalsPos+1);
106 if (!RegexUtil.isRegex(regex)) {
107 throw new CheckedPatternSyntaxException(RegexUtil.regexException(regex));
108 }
109
110 Pattern pat = Pattern.compile(regex, Pattern.DOTALL);
111 compiledPatterns.add(new SearchRule(pat, targ));
112 }
113 }
114
115 public boolean matches(Chunk chunk) {
116 for(SearchRule r: compiledPatterns) {
117 if(!r.matches(chunk))
118 return false;
119 }
120 return true;
121 }
122
123 int size() {
124 return compiledPatterns.size();
125 }
126
127 public String toString() {
128 StringBuilder sb = new StringBuilder();
129 sb.append(compiledPatterns.get(0));
130 for(int i=1; i < compiledPatterns.size(); ++i) {
131 sb.append(" & ");
132 sb.append(compiledPatterns.get(i));
133 }
134 return sb.toString();
135 }
136
137 private static final class MatchAll extends Filter {
138 public MatchAll() throws CheckedPatternSyntaxException {
139 super("datatype=.*");
140 }
141
142 public boolean matches(Chunk c) {
143 return true;
144 }
145
146 public String toString() {
147 return "ALL";
148 }
149 }
150
151 public static final Filter ALL = newMatchAll();
152 private static Filter newMatchAll() {
153 try {
154 return new MatchAll();
155 } catch (CheckedPatternSyntaxException e) {
156 throw new RuntimeException("Illegal MatchAll regular expression.", e);
157 }
158 }
159
160 }