1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 package org.apache.hadoop.chukwa.util; 20 21 import java.util.regex.Pattern; 22 import java.util.regex.PatternSyntaxException; 23 24 /*>>> 25 import checkers.nullness.quals.*; 26 import checkers.regex.quals.*; 27 */ 28 29 /** 30 * Utility methods for regular expressions, most notably for testing whether 31 * a string is a regular expression. 32 */ 33 public class RegexUtil { 34 35 /** 36 * A checked version of {@link PatternSyntaxException}. 37 * <p> 38 * This exception is useful when an illegal regex is detected but the 39 * contextual information to report a helpful error message is not available 40 * at the current depth in the call stack. By using a checked 41 * PatternSyntaxException the error must be handled up the call stack where 42 * a better error message can be reported. 43 * <p> 44 * 45 * Typical usage is: 46 * <pre> 47 * void myMethod(...) throws CheckedPatternSyntaxException { 48 * ... 49 * if (! isRegex(myString)) { 50 * throw new CheckedPatternSyntaxException(...); 51 * } 52 * ... Pattern.compile(myString) ... 53 * </pre> 54 * 55 * Simply calling <tt>Pattern.compile</tt> would have a similar effect, 56 * in that <tt>PatternSyntaxException</tt> would be thrown at run time if 57 * <tt>myString</tt> is not a regular expression. There are two problems 58 * with such an approach. First, a client of <tt>myMethod</tt> might 59 * forget to handle the exception, since <tt>PatternSyntaxException</tt> 60 * is not checked. Also, the Regex Checker would issue a warning about 61 * the call to <tt>Pattern.compile</tt> that might throw an exception. 62 * The above usage pattern avoids both problems. 63 * 64 * @see PatternSyntaxException 65 */ 66 public static class CheckedPatternSyntaxException extends Exception { 67 68 private static final long serialVersionUID = 6266881831979001480L; 69 70 private final PatternSyntaxException pse; 71 72 /** 73 * Constructs a new CheckedPatternSyntaxException equivalent to the 74 * given {@link PatternSyntaxException}. 75 * <p> 76 * Consider calling this constructor with the result of 77 * {@link RegexUtil#regexError}. 78 * @param pse is PatternSyntaxException object 79 */ 80 public CheckedPatternSyntaxException(PatternSyntaxException pse) { 81 this.pse = pse; 82 } 83 84 /** 85 * Constructs a new CheckedPatternSyntaxException. 86 * 87 * @param desc A description of the error 88 * @param regex The erroneous pattern 89 * @param index The approximate index in the pattern of the error, 90 * or {@code -1} if the index is not known 91 */ 92 public CheckedPatternSyntaxException(String desc, String regex, int index) { 93 this(new PatternSyntaxException(desc, regex, index)); 94 } 95 96 /** 97 * Retrieves the description of the error. 98 * 99 * @return The description of the error 100 */ 101 public String getDescription() { 102 return pse.getDescription(); 103 } 104 105 /** 106 * Retrieves the error index. 107 * 108 * @return The approximate index in the pattern of the error, or {@code -1} 109 * if the index is not known 110 */ 111 public int getIndex() { 112 return pse.getIndex(); 113 } 114 115 /** 116 * Returns a multi-line string containing the description of the syntax 117 * error and its index, the erroneous regular-expression pattern, and a 118 * visual indication of the error index within the pattern. 119 * 120 * @return The full detail message 121 */ 122 public String getMessage() { 123 return pse.getMessage(); 124 } 125 126 /** 127 * Retrieves the erroneous regular-expression pattern. 128 * 129 * @return The erroneous pattern 130 */ 131 public String getPattern() { 132 return pse.getPattern(); 133 } 134 } 135 136 private RegexUtil() { 137 throw new AssertionError("Class RegexUtil shouldn't be instantiated"); 138 } 139 140 /** 141 * Returns true if the argument is a syntactically valid regular 142 * expression. 143 * @param s is regular expression 144 * @return true if there is a match 145 */ 146 public static boolean isRegex(String s) { 147 return isRegex(s, 0); 148 } 149 150 /** 151 * Returns true if the argument is a syntactically valid regular 152 * expression with at least the given number of groups. 153 * @param s is regular expression 154 * @param groups is number of groups to match 155 * @return true if there is a match 156 */ 157 /*>>> 158 @SuppressWarnings("regex") // RegexUtil 159 */ 160 /*@Pure*/ 161 public static boolean isRegex(String s, int groups) { 162 Pattern p; 163 try { 164 p = Pattern.compile(s); 165 } catch (PatternSyntaxException e) { 166 return false; 167 } 168 return getGroupCount(p) >= groups; 169 } 170 171 /** 172 * Returns true if the argument is a syntactically valid regular 173 * expression. 174 * @param c is a character 175 * @return true if there is a match 176 */ 177 public static boolean isRegex(char c) { 178 return isRegex(Character.toString(c)); 179 } 180 181 /** 182 * Returns null if the argument is a syntactically valid regular 183 * expression. Otherwise returns a string describing why the argument is 184 * not a regex. 185 * @param s is regular expression 186 * @return null if s is a regular expression 187 */ 188 public static String regexError(String s) { 189 return regexError(s, 0); 190 } 191 192 /** 193 * Returns null if the argument is a syntactically valid regular 194 * expression with at least the given number of groups. Otherwise returns 195 * a string describing why the argument is not a regex. 196 * @param s is regular expression 197 * @param groups is number of groups to match 198 * @return null if s is a regular expression 199 */ 200 public static String regexError(String s, int groups) { 201 try { 202 Pattern p = Pattern.compile(s); 203 int actualGroups = getGroupCount(p); 204 if (actualGroups < groups) { 205 return regexErrorMessage(s, groups, actualGroups); 206 } 207 } catch (PatternSyntaxException e) { 208 return e.getMessage(); 209 } 210 return null; 211 } 212 213 /** 214 * Returns null if the argument is a syntactically valid regular 215 * expression. Otherwise returns a PatternSyntaxException describing 216 * why the argument is not a regex. 217 * @param s is regular expression 218 * @return null if s is a regular expression 219 */ 220 public static PatternSyntaxException regexException(String s) { 221 return regexException(s, 0); 222 } 223 224 /** 225 * Returns null if the argument is a syntactically valid regular 226 * expression with at least the given number of groups. Otherwise returns a 227 * PatternSyntaxException describing why the argument is not a regex. 228 * @param s is regular expression 229 * @param groups is number of groups to match 230 * @return null if s is a regular expression 231 */ 232 public static PatternSyntaxException regexException(String s, int groups) { 233 try { 234 Pattern p = Pattern.compile(s); 235 int actualGroups = getGroupCount(p); 236 if (actualGroups < groups) { 237 return new PatternSyntaxException(regexErrorMessage(s, groups, actualGroups), s, -1); 238 } 239 } catch (PatternSyntaxException pse) { 240 return pse; 241 } 242 return null; 243 } 244 245 /** 246 * Returns the argument as a {@code @Regex String} if it is a regex, 247 * otherwise throws an error. The purpose of this method is to suppress Regex 248 * Checker warnings. Once the the Regex Checker supports flow-sensitivity, it 249 * should be very rarely needed. 250 * @param s is a regular expression 251 * @return null if s is a regular expression 252 */ 253 public static String asRegex(String s) { 254 return asRegex(s, 0); 255 } 256 257 /** 258 * Returns the argument as a {@code @Regex(groups) String} if it is a regex 259 * with at least the given number of groups, otherwise throws an error. The 260 * purpose of this method is to suppress Regex Checker warnings. Once the the 261 * Regex Checker supports flow-sensitivity, it should be very rarely needed. 262 * @param s is a regular expression 263 * @param groups is number of group to match 264 * @return null if s is a regular expression 265 */ 266 public static String asRegex(String s, int groups) { 267 try { 268 Pattern p = Pattern.compile(s); 269 int actualGroups = getGroupCount(p); 270 if (actualGroups < groups) { 271 throw new Error(regexErrorMessage(s, groups, actualGroups)); 272 } 273 return s; 274 } catch (PatternSyntaxException e) { 275 throw new Error(e); 276 } 277 } 278 279 /** 280 * Generates an error message for s when expectedGroups are needed, but s 281 * only has actualGroups. 282 */ 283 private static String regexErrorMessage(String s, int expectedGroups, int actualGroups) { 284 return "regex \"" + s + "\" has " + actualGroups + " groups, but " + 285 expectedGroups + " groups are needed."; 286 } 287 288 /** 289 * Returns the count of groups in the argument. 290 */ 291 private static int getGroupCount(Pattern p) { 292 return p.matcher("").groupCount(); 293 } 294 }