This project has retired. For details please refer to its Attic page.
RegexUtil xref
View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.chukwa.util;
20  
21  import java.util.regex.Pattern;
22  import java.util.regex.PatternSyntaxException;
23  
24  /*>>>
25  import checkers.nullness.quals.*;
26  import checkers.regex.quals.*;
27  */
28  
29  /**
30   * Utility methods for regular expressions, most notably for testing whether
31   * a string is a regular expression.
32   */
33  public class RegexUtil {
34  
35    /**
36     * A checked version of {@link PatternSyntaxException}.
37     * <p>
38     * This exception is useful when an illegal regex is detected but the
39     * contextual information to report a helpful error message is not available
40     * at the current depth in the call stack. By using a checked
41     * PatternSyntaxException the error must be handled up the call stack where
42     * a better error message can be reported.
43     * <p>
44     *
45     * Typical usage is:
46     * <pre>
47     * void myMethod(...) throws CheckedPatternSyntaxException {
48     *   ...
49     *   if (! isRegex(myString)) {
50     *     throw new CheckedPatternSyntaxException(...);
51     *   }
52     *   ... Pattern.compile(myString) ...
53     * </pre>
54     *
55     * Simply calling <tt>Pattern.compile</tt> would have a similar effect,
56     * in that <tt>PatternSyntaxException</tt> would be thrown at run time if
57     * <tt>myString</tt> is not a regular expression.  There are two problems
58     * with such an approach.  First, a client of <tt>myMethod</tt> might
59     * forget to handle the exception, since <tt>PatternSyntaxException</tt>
60     * is not checked.  Also, the Regex Checker would issue a warning about
61     * the call to <tt>Pattern.compile</tt> that might throw an exception.
62     * The above usage pattern avoids both problems.
63     *
64     * @see PatternSyntaxException
65     */
66    public static class CheckedPatternSyntaxException extends Exception {
67  
68      private static final long serialVersionUID = 6266881831979001480L;
69  
70      private final PatternSyntaxException pse;
71  
72      /**
73       * Constructs a new CheckedPatternSyntaxException equivalent to the
74       * given {@link PatternSyntaxException}.
75       * <p>
76       * Consider calling this constructor with the result of
77       * {@link RegexUtil#regexError}.
78       * @param pse 
79       */
80      public CheckedPatternSyntaxException(PatternSyntaxException pse) {
81        this.pse = pse;
82      }
83  
84      /**
85       * Constructs a new CheckedPatternSyntaxException.
86       *
87       * @param desc A description of the error
88       * @param regex The erroneous pattern
89       * @param index The approximate index in the pattern of the error,
90       *              or {@code -1} if the index is not known
91       */
92      public CheckedPatternSyntaxException(String desc, String regex, int index) {
93        this(new PatternSyntaxException(desc, regex, index));
94      }
95  
96      /**
97       * Retrieves the description of the error.
98       *
99       * @return The description of the error
100      */
101     public String getDescription() {
102       return pse.getDescription();
103     }
104 
105     /**
106      * Retrieves the error index.
107      *
108      * @return The approximate index in the pattern of the error, or {@code -1}
109      *         if the index is not known
110      */
111     public int getIndex() {
112       return pse.getIndex();
113     }
114 
115     /**
116      * Returns a multi-line string containing the description of the syntax
117      * error and its index, the erroneous regular-expression pattern, and a
118      * visual indication of the error index within the pattern.
119      *
120      * @return The full detail message
121      */
122     public String getMessage() {
123       return pse.getMessage();
124     }
125 
126     /**
127      * Retrieves the erroneous regular-expression pattern.
128      *
129      * @return The erroneous pattern
130      */
131     public String getPattern() {
132       return pse.getPattern();
133     }
134   }
135 
136   private RegexUtil() {
137     throw new AssertionError("Class RegexUtil shouldn't be instantiated");
138   }
139 
140   /**
141    * Returns true if the argument is a syntactically valid regular
142    * expression.
143    * @param s 
144    * @return 
145    */
146   public static boolean isRegex(String s) {
147     return isRegex(s, 0);
148   }
149 
150   /**
151    * Returns true if the argument is a syntactically valid regular
152    * expression with at least the given number of groups.
153    * @param s 
154    * @param groups 
155    * @return 
156    */
157   /*>>>
158   @SuppressWarnings("regex")    // RegexUtil
159   */
160   /*@Pure*/
161   public static boolean isRegex(String s, int groups) {
162     Pattern p;
163     try {
164       p = Pattern.compile(s);
165     } catch (PatternSyntaxException e) {
166       return false;
167     }
168     return getGroupCount(p) >= groups;
169   }
170 
171   /**
172    * Returns true if the argument is a syntactically valid regular
173    * expression.
174    * @param c 
175    * @return 
176    */
177   /*>>>
178   @SuppressWarnings("regex")    // RegexUtil
179   */
180   /*@Pure*/
181   public static boolean isRegex(char c) {
182     return isRegex(Character.toString(c));
183   }
184 
185   /**
186    * Returns null if the argument is a syntactically valid regular
187    * expression. Otherwise returns a string describing why the argument is
188    * not a regex.
189    * @param s 
190    * @return 
191    */
192   /*>>>
193   @SuppressWarnings("regex")    // RegexUtil
194   */
195   /*@Pure*/
196   public static /*@Nullable*/ String regexError(String s) {
197     return regexError(s, 0);
198   }
199 
200   /**
201    * Returns null if the argument is a syntactically valid regular
202    * expression with at least the given number of groups. Otherwise returns
203    * a string describing why the argument is not a regex.
204    * @param s 
205    * @param groups 
206    * @return 
207    */
208   /*>>>
209   @SuppressWarnings("regex")    // RegexUtil
210   */
211   /*@Pure*/
212   public static /*@Nullable*/ String regexError(String s, int groups) {
213     try {
214       Pattern p = Pattern.compile(s);
215       int actualGroups = getGroupCount(p);
216       if (actualGroups < groups) {
217         return regexErrorMessage(s, groups, actualGroups);
218       }
219     } catch (PatternSyntaxException e) {
220       return e.getMessage();
221     }
222     return null;
223   }
224 
225   /**
226    * Returns null if the argument is a syntactically valid regular
227    * expression. Otherwise returns a PatternSyntaxException describing
228    * why the argument is not a regex.
229    * @param s 
230    * @return 
231    */
232   /*>>>
233   @SuppressWarnings("regex")    // RegexUtil
234   */
235   /*@Pure*/
236   public static /*@Nullable*/ PatternSyntaxException regexException(String s) {
237     return regexException(s, 0);
238   }
239 
240   /**
241    * Returns null if the argument is a syntactically valid regular
242    * expression with at least the given number of groups. Otherwise returns a
243    * PatternSyntaxException describing why the argument is not a regex.
244    * @param s 
245    * @param groups 
246    * @return 
247    */
248   /*>>>
249   @SuppressWarnings("regex")    // RegexUtil
250   */
251   /*@Pure*/
252   public static /*@Nullable*/ PatternSyntaxException regexException(String s, int groups) {
253     try {
254       Pattern p = Pattern.compile(s);
255       int actualGroups = getGroupCount(p);
256       if (actualGroups < groups) {
257         return new PatternSyntaxException(regexErrorMessage(s, groups, actualGroups), s, -1);
258       }
259     } catch (PatternSyntaxException pse) {
260       return pse;
261     }
262     return null;
263   }
264 
265   /**
266    * Returns the argument as a {@code @Regex String} if it is a regex,
267    * otherwise throws an error. The purpose of this method is to suppress Regex
268    * Checker warnings. Once the the Regex Checker supports flow-sensitivity, it
269    * should be very rarely needed.
270    * @param s 
271    * @return 
272    */
273   public static /*@Regex*/ String asRegex(String s) {
274     return asRegex(s, 0);
275   }
276 
277   /**
278    * Returns the argument as a {@code @Regex(groups) String} if it is a regex
279    * with at least the given number of groups, otherwise throws an error. The
280    * purpose of this method is to suppress Regex Checker warnings. Once the the
281    * Regex Checker supports flow-sensitivity, it should be very rarely needed.
282    * @param s 
283    * @param groups 
284    * @return 
285    */
286   /*>>>
287   @SuppressWarnings("regex")    // RegexUtil
288   */
289   /*@Pure*/
290   public static /*@Regex*/ String asRegex(String s, int groups) {
291     try {
292       Pattern p = Pattern.compile(s);
293       int actualGroups = getGroupCount(p);
294       if (actualGroups < groups) {
295         throw new Error(regexErrorMessage(s, groups, actualGroups));
296       }
297       return s;
298     } catch (PatternSyntaxException e) {
299       throw new Error(e);
300     }
301   }
302 
303   /**
304    * Generates an error message for s when expectedGroups are needed, but s
305    * only has actualGroups.
306    */
307   private static String regexErrorMessage(String s, int expectedGroups, int actualGroups) {
308     return "regex \"" + s + "\" has " + actualGroups + " groups, but " +
309         expectedGroups + " groups are needed.";
310   }
311 
312   /**
313    * Returns the count of groups in the argument.
314    */
315   private static int getGroupCount(Pattern p) {
316     return p.matcher("").groupCount();
317   }
318 }