1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 package org.apache.hadoop.chukwa.util;
20
21 import java.util.regex.Pattern;
22 import java.util.regex.PatternSyntaxException;
23
24 /*>>>
25 import checkers.nullness.quals.*;
26 import checkers.regex.quals.*;
27 */
28
29 /**
30 * Utility methods for regular expressions, most notably for testing whether
31 * a string is a regular expression.
32 */
33 public class RegexUtil {
34
35 /**
36 * A checked version of {@link PatternSyntaxException}.
37 * <p>
38 * This exception is useful when an illegal regex is detected but the
39 * contextual information to report a helpful error message is not available
40 * at the current depth in the call stack. By using a checked
41 * PatternSyntaxException the error must be handled up the call stack where
42 * a better error message can be reported.
43 * <p>
44 *
45 * Typical usage is:
46 * <pre>
47 * void myMethod(...) throws CheckedPatternSyntaxException {
48 * ...
49 * if (! isRegex(myString)) {
50 * throw new CheckedPatternSyntaxException(...);
51 * }
52 * ... Pattern.compile(myString) ...
53 * </pre>
54 *
55 * Simply calling <tt>Pattern.compile</tt> would have a similar effect,
56 * in that <tt>PatternSyntaxException</tt> would be thrown at run time if
57 * <tt>myString</tt> is not a regular expression. There are two problems
58 * with such an approach. First, a client of <tt>myMethod</tt> might
59 * forget to handle the exception, since <tt>PatternSyntaxException</tt>
60 * is not checked. Also, the Regex Checker would issue a warning about
61 * the call to <tt>Pattern.compile</tt> that might throw an exception.
62 * The above usage pattern avoids both problems.
63 *
64 * @see PatternSyntaxException
65 */
66 public static class CheckedPatternSyntaxException extends Exception {
67
68 private static final long serialVersionUID = 6266881831979001480L;
69
70 private final PatternSyntaxException pse;
71
72 /**
73 * Constructs a new CheckedPatternSyntaxException equivalent to the
74 * given {@link PatternSyntaxException}.
75 * <p>
76 * Consider calling this constructor with the result of
77 * {@link RegexUtil#regexError}.
78 * @param pse
79 */
80 public CheckedPatternSyntaxException(PatternSyntaxException pse) {
81 this.pse = pse;
82 }
83
84 /**
85 * Constructs a new CheckedPatternSyntaxException.
86 *
87 * @param desc A description of the error
88 * @param regex The erroneous pattern
89 * @param index The approximate index in the pattern of the error,
90 * or {@code -1} if the index is not known
91 */
92 public CheckedPatternSyntaxException(String desc, String regex, int index) {
93 this(new PatternSyntaxException(desc, regex, index));
94 }
95
96 /**
97 * Retrieves the description of the error.
98 *
99 * @return The description of the error
100 */
101 public String getDescription() {
102 return pse.getDescription();
103 }
104
105 /**
106 * Retrieves the error index.
107 *
108 * @return The approximate index in the pattern of the error, or {@code -1}
109 * if the index is not known
110 */
111 public int getIndex() {
112 return pse.getIndex();
113 }
114
115 /**
116 * Returns a multi-line string containing the description of the syntax
117 * error and its index, the erroneous regular-expression pattern, and a
118 * visual indication of the error index within the pattern.
119 *
120 * @return The full detail message
121 */
122 public String getMessage() {
123 return pse.getMessage();
124 }
125
126 /**
127 * Retrieves the erroneous regular-expression pattern.
128 *
129 * @return The erroneous pattern
130 */
131 public String getPattern() {
132 return pse.getPattern();
133 }
134 }
135
136 private RegexUtil() {
137 throw new AssertionError("Class RegexUtil shouldn't be instantiated");
138 }
139
140 /**
141 * Returns true if the argument is a syntactically valid regular
142 * expression.
143 * @param s
144 * @return
145 */
146 public static boolean isRegex(String s) {
147 return isRegex(s, 0);
148 }
149
150 /**
151 * Returns true if the argument is a syntactically valid regular
152 * expression with at least the given number of groups.
153 * @param s
154 * @param groups
155 * @return
156 */
157 /*>>>
158 @SuppressWarnings("regex") // RegexUtil
159 */
160 /*@Pure*/
161 public static boolean isRegex(String s, int groups) {
162 Pattern p;
163 try {
164 p = Pattern.compile(s);
165 } catch (PatternSyntaxException e) {
166 return false;
167 }
168 return getGroupCount(p) >= groups;
169 }
170
171 /**
172 * Returns true if the argument is a syntactically valid regular
173 * expression.
174 * @param c
175 * @return
176 */
177 /*>>>
178 @SuppressWarnings("regex") // RegexUtil
179 */
180 /*@Pure*/
181 public static boolean isRegex(char c) {
182 return isRegex(Character.toString(c));
183 }
184
185 /**
186 * Returns null if the argument is a syntactically valid regular
187 * expression. Otherwise returns a string describing why the argument is
188 * not a regex.
189 * @param s
190 * @return
191 */
192 /*>>>
193 @SuppressWarnings("regex") // RegexUtil
194 */
195 /*@Pure*/
196 public static /*@Nullable*/ String regexError(String s) {
197 return regexError(s, 0);
198 }
199
200 /**
201 * Returns null if the argument is a syntactically valid regular
202 * expression with at least the given number of groups. Otherwise returns
203 * a string describing why the argument is not a regex.
204 * @param s
205 * @param groups
206 * @return
207 */
208 /*>>>
209 @SuppressWarnings("regex") // RegexUtil
210 */
211 /*@Pure*/
212 public static /*@Nullable*/ String regexError(String s, int groups) {
213 try {
214 Pattern p = Pattern.compile(s);
215 int actualGroups = getGroupCount(p);
216 if (actualGroups < groups) {
217 return regexErrorMessage(s, groups, actualGroups);
218 }
219 } catch (PatternSyntaxException e) {
220 return e.getMessage();
221 }
222 return null;
223 }
224
225 /**
226 * Returns null if the argument is a syntactically valid regular
227 * expression. Otherwise returns a PatternSyntaxException describing
228 * why the argument is not a regex.
229 * @param s
230 * @return
231 */
232 /*>>>
233 @SuppressWarnings("regex") // RegexUtil
234 */
235 /*@Pure*/
236 public static /*@Nullable*/ PatternSyntaxException regexException(String s) {
237 return regexException(s, 0);
238 }
239
240 /**
241 * Returns null if the argument is a syntactically valid regular
242 * expression with at least the given number of groups. Otherwise returns a
243 * PatternSyntaxException describing why the argument is not a regex.
244 * @param s
245 * @param groups
246 * @return
247 */
248 /*>>>
249 @SuppressWarnings("regex") // RegexUtil
250 */
251 /*@Pure*/
252 public static /*@Nullable*/ PatternSyntaxException regexException(String s, int groups) {
253 try {
254 Pattern p = Pattern.compile(s);
255 int actualGroups = getGroupCount(p);
256 if (actualGroups < groups) {
257 return new PatternSyntaxException(regexErrorMessage(s, groups, actualGroups), s, -1);
258 }
259 } catch (PatternSyntaxException pse) {
260 return pse;
261 }
262 return null;
263 }
264
265 /**
266 * Returns the argument as a {@code @Regex String} if it is a regex,
267 * otherwise throws an error. The purpose of this method is to suppress Regex
268 * Checker warnings. Once the the Regex Checker supports flow-sensitivity, it
269 * should be very rarely needed.
270 * @param s
271 * @return
272 */
273 public static /*@Regex*/ String asRegex(String s) {
274 return asRegex(s, 0);
275 }
276
277 /**
278 * Returns the argument as a {@code @Regex(groups) String} if it is a regex
279 * with at least the given number of groups, otherwise throws an error. The
280 * purpose of this method is to suppress Regex Checker warnings. Once the the
281 * Regex Checker supports flow-sensitivity, it should be very rarely needed.
282 * @param s
283 * @param groups
284 * @return
285 */
286 /*>>>
287 @SuppressWarnings("regex") // RegexUtil
288 */
289 /*@Pure*/
290 public static /*@Regex*/ String asRegex(String s, int groups) {
291 try {
292 Pattern p = Pattern.compile(s);
293 int actualGroups = getGroupCount(p);
294 if (actualGroups < groups) {
295 throw new Error(regexErrorMessage(s, groups, actualGroups));
296 }
297 return s;
298 } catch (PatternSyntaxException e) {
299 throw new Error(e);
300 }
301 }
302
303 /**
304 * Generates an error message for s when expectedGroups are needed, but s
305 * only has actualGroups.
306 */
307 private static String regexErrorMessage(String s, int expectedGroups, int actualGroups) {
308 return "regex \"" + s + "\" has " + actualGroups + " groups, but " +
309 expectedGroups + " groups are needed.";
310 }
311
312 /**
313 * Returns the count of groups in the argument.
314 */
315 private static int getGroupCount(Pattern p) {
316 return p.matcher("").groupCount();
317 }
318 }