This project has retired. For details please refer to its Attic page.
RegexUtil xref
View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.chukwa.util;
20  
21  import java.util.regex.Pattern;
22  import java.util.regex.PatternSyntaxException;
23  
24  /*>>>
25  import checkers.nullness.quals.*;
26  import checkers.regex.quals.*;
27  */
28  
29  /**
30   * Utility methods for regular expressions, most notably for testing whether
31   * a string is a regular expression.
32   */
33  public class RegexUtil {
34  
35    /**
36     * A checked version of {@link PatternSyntaxException}.
37     * <p>
38     * This exception is useful when an illegal regex is detected but the
39     * contextual information to report a helpful error message is not available
40     * at the current depth in the call stack. By using a checked
41     * PatternSyntaxException the error must be handled up the call stack where
42     * a better error message can be reported.
43     * <p>
44     *
45     * Typical usage is:
46     * <pre>
47     * void myMethod(...) throws CheckedPatternSyntaxException {
48     *   ...
49     *   if (! isRegex(myString)) {
50     *     throw new CheckedPatternSyntaxException(...);
51     *   }
52     *   ... Pattern.compile(myString) ...
53     * </pre>
54     *
55     * Simply calling <tt>Pattern.compile</tt> would have a similar effect,
56     * in that <tt>PatternSyntaxException</tt> would be thrown at run time if
57     * <tt>myString</tt> is not a regular expression.  There are two problems
58     * with such an approach.  First, a client of <tt>myMethod</tt> might
59     * forget to handle the exception, since <tt>PatternSyntaxException</tt>
60     * is not checked.  Also, the Regex Checker would issue a warning about
61     * the call to <tt>Pattern.compile</tt> that might throw an exception.
62     * The above usage pattern avoids both problems.
63     *
64     * @see PatternSyntaxException
65     */
66    public static class CheckedPatternSyntaxException extends Exception {
67  
68      private static final long serialVersionUID = 6266881831979001480L;
69  
70      private final PatternSyntaxException pse;
71  
72      /**
73       * Constructs a new CheckedPatternSyntaxException equivalent to the
74       * given {@link PatternSyntaxException}.
75       * <p>
76       * Consider calling this constructor with the result of
77       * {@link RegexUtil#regexError}.
78       */
79      public CheckedPatternSyntaxException(PatternSyntaxException pse) {
80        this.pse = pse;
81      }
82  
83      /**
84       * Constructs a new CheckedPatternSyntaxException.
85       *
86       * @param desc A description of the error
87       * @param regex The erroneous pattern
88       * @param index The approximate index in the pattern of the error,
89       *              or {@code -1} if the index is not known
90       */
91      public CheckedPatternSyntaxException(String desc, String regex, int index) {
92        this(new PatternSyntaxException(desc, regex, index));
93      }
94  
95      /**
96       * Retrieves the description of the error.
97       *
98       * @return The description of the error
99       */
100     public String getDescription() {
101       return pse.getDescription();
102     }
103 
104     /**
105      * Retrieves the error index.
106      *
107      * @return The approximate index in the pattern of the error, or {@code -1}
108      *         if the index is not known
109      */
110     public int getIndex() {
111       return pse.getIndex();
112     }
113 
114     /**
115      * Returns a multi-line string containing the description of the syntax
116      * error and its index, the erroneous regular-expression pattern, and a
117      * visual indication of the error index within the pattern.
118      *
119      * @return The full detail message
120      */
121     public String getMessage() {
122       return pse.getMessage();
123     }
124 
125     /**
126      * Retrieves the erroneous regular-expression pattern.
127      *
128      * @return The erroneous pattern
129      */
130     public String getPattern() {
131       return pse.getPattern();
132     }
133   }
134 
135   private RegexUtil() {
136     throw new AssertionError("Class RegexUtil shouldn't be instantiated");
137   }
138 
139   /**
140    * Returns true if the argument is a syntactically valid regular
141    * expression.
142    */
143   public static boolean isRegex(String s) {
144     return isRegex(s, 0);
145   }
146 
147   /**
148    * Returns true if the argument is a syntactically valid regular
149    * expression with at least the given number of groups.
150    */
151   /*>>>
152   @SuppressWarnings("regex")    // RegexUtil
153   */
154   /*@Pure*/
155   public static boolean isRegex(String s, int groups) {
156     Pattern p;
157     try {
158       p = Pattern.compile(s);
159     } catch (PatternSyntaxException e) {
160       return false;
161     }
162     return getGroupCount(p) >= groups;
163   }
164 
165   /**
166    * Returns true if the argument is a syntactically valid regular
167    * expression.
168    */
169   /*>>>
170   @SuppressWarnings("regex")    // RegexUtil
171   */
172   /*@Pure*/
173   public static boolean isRegex(char c) {
174     return isRegex(Character.toString(c));
175   }
176 
177   /**
178    * Returns null if the argument is a syntactically valid regular
179    * expression. Otherwise returns a string describing why the argument is
180    * not a regex.
181    */
182   /*>>>
183   @SuppressWarnings("regex")    // RegexUtil
184   */
185   /*@Pure*/
186   public static /*@Nullable*/ String regexError(String s) {
187     return regexError(s, 0);
188   }
189 
190   /**
191    * Returns null if the argument is a syntactically valid regular
192    * expression with at least the given number of groups. Otherwise returns
193    * a string describing why the argument is not a regex.
194    */
195   /*>>>
196   @SuppressWarnings("regex")    // RegexUtil
197   */
198   /*@Pure*/
199   public static /*@Nullable*/ String regexError(String s, int groups) {
200     try {
201       Pattern p = Pattern.compile(s);
202       int actualGroups = getGroupCount(p);
203       if (actualGroups < groups) {
204         return regexErrorMessage(s, groups, actualGroups);
205       }
206     } catch (PatternSyntaxException e) {
207       return e.getMessage();
208     }
209     return null;
210   }
211 
212   /**
213    * Returns null if the argument is a syntactically valid regular
214    * expression. Otherwise returns a PatternSyntaxException describing
215    * why the argument is not a regex.
216    */
217   /*>>>
218   @SuppressWarnings("regex")    // RegexUtil
219   */
220   /*@Pure*/
221   public static /*@Nullable*/ PatternSyntaxException regexException(String s) {
222     return regexException(s, 0);
223   }
224 
225   /**
226    * Returns null if the argument is a syntactically valid regular
227    * expression with at least the given number of groups. Otherwise returns a
228    * PatternSyntaxException describing why the argument is not a regex.
229    */
230   /*>>>
231   @SuppressWarnings("regex")    // RegexUtil
232   */
233   /*@Pure*/
234   public static /*@Nullable*/ PatternSyntaxException regexException(String s, int groups) {
235     try {
236       Pattern p = Pattern.compile(s);
237       int actualGroups = getGroupCount(p);
238       if (actualGroups < groups) {
239         return new PatternSyntaxException(regexErrorMessage(s, groups, actualGroups), s, -1);
240       }
241     } catch (PatternSyntaxException pse) {
242       return pse;
243     }
244     return null;
245   }
246 
247   /**
248    * Returns the argument as a {@code @Regex String} if it is a regex,
249    * otherwise throws an error. The purpose of this method is to suppress Regex
250    * Checker warnings. Once the the Regex Checker supports flow-sensitivity, it
251    * should be very rarely needed.
252    */
253   public static /*@Regex*/ String asRegex(String s) {
254     return asRegex(s, 0);
255   }
256 
257   /**
258    * Returns the argument as a {@code @Regex(groups) String} if it is a regex
259    * with at least the given number of groups, otherwise throws an error. The
260    * purpose of this method is to suppress Regex Checker warnings. Once the the
261    * Regex Checker supports flow-sensitivity, it should be very rarely needed.
262    */
263   /*>>>
264   @SuppressWarnings("regex")    // RegexUtil
265   */
266   /*@Pure*/
267   public static /*@Regex*/ String asRegex(String s, int groups) {
268     try {
269       Pattern p = Pattern.compile(s);
270       int actualGroups = getGroupCount(p);
271       if (actualGroups < groups) {
272         throw new Error(regexErrorMessage(s, groups, actualGroups));
273       }
274       return s;
275     } catch (PatternSyntaxException e) {
276       throw new Error(e);
277     }
278   }
279 
280   /**
281    * Generates an error message for s when expectedGroups are needed, but s
282    * only has actualGroups.
283    */
284   private static String regexErrorMessage(String s, int expectedGroups, int actualGroups) {
285     return "regex \"" + s + "\" has " + actualGroups + " groups, but " +
286         expectedGroups + " groups are needed.";
287   }
288 
289   /**
290    * Returns the count of groups in the argument.
291    */
292   private static int getGroupCount(Pattern p) {
293     return p.matcher("").groupCount();
294   }
295 }