1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 package org.apache.hadoop.chukwa.util;
20
21 import java.util.regex.Pattern;
22 import java.util.regex.PatternSyntaxException;
23
24 /*>>>
25 import checkers.nullness.quals.*;
26 import checkers.regex.quals.*;
27 */
28
29 /**
30 * Utility methods for regular expressions, most notably for testing whether
31 * a string is a regular expression.
32 */
33 public class RegexUtil {
34
35 /**
36 * A checked version of {@link PatternSyntaxException}.
37 * <p>
38 * This exception is useful when an illegal regex is detected but the
39 * contextual information to report a helpful error message is not available
40 * at the current depth in the call stack. By using a checked
41 * PatternSyntaxException the error must be handled up the call stack where
42 * a better error message can be reported.
43 * <p>
44 *
45 * Typical usage is:
46 * <pre>
47 * void myMethod(...) throws CheckedPatternSyntaxException {
48 * ...
49 * if (! isRegex(myString)) {
50 * throw new CheckedPatternSyntaxException(...);
51 * }
52 * ... Pattern.compile(myString) ...
53 * </pre>
54 *
55 * Simply calling <tt>Pattern.compile</tt> would have a similar effect,
56 * in that <tt>PatternSyntaxException</tt> would be thrown at run time if
57 * <tt>myString</tt> is not a regular expression. There are two problems
58 * with such an approach. First, a client of <tt>myMethod</tt> might
59 * forget to handle the exception, since <tt>PatternSyntaxException</tt>
60 * is not checked. Also, the Regex Checker would issue a warning about
61 * the call to <tt>Pattern.compile</tt> that might throw an exception.
62 * The above usage pattern avoids both problems.
63 *
64 * @see PatternSyntaxException
65 */
66 public static class CheckedPatternSyntaxException extends Exception {
67
68 private static final long serialVersionUID = 6266881831979001480L;
69
70 private final PatternSyntaxException pse;
71
72 /**
73 * Constructs a new CheckedPatternSyntaxException equivalent to the
74 * given {@link PatternSyntaxException}.
75 * <p>
76 * Consider calling this constructor with the result of
77 * {@link RegexUtil#regexError}.
78 */
79 public CheckedPatternSyntaxException(PatternSyntaxException pse) {
80 this.pse = pse;
81 }
82
83 /**
84 * Constructs a new CheckedPatternSyntaxException.
85 *
86 * @param desc A description of the error
87 * @param regex The erroneous pattern
88 * @param index The approximate index in the pattern of the error,
89 * or {@code -1} if the index is not known
90 */
91 public CheckedPatternSyntaxException(String desc, String regex, int index) {
92 this(new PatternSyntaxException(desc, regex, index));
93 }
94
95 /**
96 * Retrieves the description of the error.
97 *
98 * @return The description of the error
99 */
100 public String getDescription() {
101 return pse.getDescription();
102 }
103
104 /**
105 * Retrieves the error index.
106 *
107 * @return The approximate index in the pattern of the error, or {@code -1}
108 * if the index is not known
109 */
110 public int getIndex() {
111 return pse.getIndex();
112 }
113
114 /**
115 * Returns a multi-line string containing the description of the syntax
116 * error and its index, the erroneous regular-expression pattern, and a
117 * visual indication of the error index within the pattern.
118 *
119 * @return The full detail message
120 */
121 public String getMessage() {
122 return pse.getMessage();
123 }
124
125 /**
126 * Retrieves the erroneous regular-expression pattern.
127 *
128 * @return The erroneous pattern
129 */
130 public String getPattern() {
131 return pse.getPattern();
132 }
133 }
134
135 private RegexUtil() {
136 throw new AssertionError("Class RegexUtil shouldn't be instantiated");
137 }
138
139 /**
140 * Returns true if the argument is a syntactically valid regular
141 * expression.
142 */
143 public static boolean isRegex(String s) {
144 return isRegex(s, 0);
145 }
146
147 /**
148 * Returns true if the argument is a syntactically valid regular
149 * expression with at least the given number of groups.
150 */
151 /*>>>
152 @SuppressWarnings("regex") // RegexUtil
153 */
154 /*@Pure*/
155 public static boolean isRegex(String s, int groups) {
156 Pattern p;
157 try {
158 p = Pattern.compile(s);
159 } catch (PatternSyntaxException e) {
160 return false;
161 }
162 return getGroupCount(p) >= groups;
163 }
164
165 /**
166 * Returns true if the argument is a syntactically valid regular
167 * expression.
168 */
169 /*>>>
170 @SuppressWarnings("regex") // RegexUtil
171 */
172 /*@Pure*/
173 public static boolean isRegex(char c) {
174 return isRegex(Character.toString(c));
175 }
176
177 /**
178 * Returns null if the argument is a syntactically valid regular
179 * expression. Otherwise returns a string describing why the argument is
180 * not a regex.
181 */
182 /*>>>
183 @SuppressWarnings("regex") // RegexUtil
184 */
185 /*@Pure*/
186 public static /*@Nullable*/ String regexError(String s) {
187 return regexError(s, 0);
188 }
189
190 /**
191 * Returns null if the argument is a syntactically valid regular
192 * expression with at least the given number of groups. Otherwise returns
193 * a string describing why the argument is not a regex.
194 */
195 /*>>>
196 @SuppressWarnings("regex") // RegexUtil
197 */
198 /*@Pure*/
199 public static /*@Nullable*/ String regexError(String s, int groups) {
200 try {
201 Pattern p = Pattern.compile(s);
202 int actualGroups = getGroupCount(p);
203 if (actualGroups < groups) {
204 return regexErrorMessage(s, groups, actualGroups);
205 }
206 } catch (PatternSyntaxException e) {
207 return e.getMessage();
208 }
209 return null;
210 }
211
212 /**
213 * Returns null if the argument is a syntactically valid regular
214 * expression. Otherwise returns a PatternSyntaxException describing
215 * why the argument is not a regex.
216 */
217 /*>>>
218 @SuppressWarnings("regex") // RegexUtil
219 */
220 /*@Pure*/
221 public static /*@Nullable*/ PatternSyntaxException regexException(String s) {
222 return regexException(s, 0);
223 }
224
225 /**
226 * Returns null if the argument is a syntactically valid regular
227 * expression with at least the given number of groups. Otherwise returns a
228 * PatternSyntaxException describing why the argument is not a regex.
229 */
230 /*>>>
231 @SuppressWarnings("regex") // RegexUtil
232 */
233 /*@Pure*/
234 public static /*@Nullable*/ PatternSyntaxException regexException(String s, int groups) {
235 try {
236 Pattern p = Pattern.compile(s);
237 int actualGroups = getGroupCount(p);
238 if (actualGroups < groups) {
239 return new PatternSyntaxException(regexErrorMessage(s, groups, actualGroups), s, -1);
240 }
241 } catch (PatternSyntaxException pse) {
242 return pse;
243 }
244 return null;
245 }
246
247 /**
248 * Returns the argument as a {@code @Regex String} if it is a regex,
249 * otherwise throws an error. The purpose of this method is to suppress Regex
250 * Checker warnings. Once the the Regex Checker supports flow-sensitivity, it
251 * should be very rarely needed.
252 */
253 public static /*@Regex*/ String asRegex(String s) {
254 return asRegex(s, 0);
255 }
256
257 /**
258 * Returns the argument as a {@code @Regex(groups) String} if it is a regex
259 * with at least the given number of groups, otherwise throws an error. The
260 * purpose of this method is to suppress Regex Checker warnings. Once the the
261 * Regex Checker supports flow-sensitivity, it should be very rarely needed.
262 */
263 /*>>>
264 @SuppressWarnings("regex") // RegexUtil
265 */
266 /*@Pure*/
267 public static /*@Regex*/ String asRegex(String s, int groups) {
268 try {
269 Pattern p = Pattern.compile(s);
270 int actualGroups = getGroupCount(p);
271 if (actualGroups < groups) {
272 throw new Error(regexErrorMessage(s, groups, actualGroups));
273 }
274 return s;
275 } catch (PatternSyntaxException e) {
276 throw new Error(e);
277 }
278 }
279
280 /**
281 * Generates an error message for s when expectedGroups are needed, but s
282 * only has actualGroups.
283 */
284 private static String regexErrorMessage(String s, int expectedGroups, int actualGroups) {
285 return "regex \"" + s + "\" has " + actualGroups + " groups, but " +
286 expectedGroups + " groups are needed.";
287 }
288
289 /**
290 * Returns the count of groups in the argument.
291 */
292 private static int getGroupCount(Pattern p) {
293 return p.matcher("").groupCount();
294 }
295 }