1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 package org.apache.hadoop.chukwa.util; 20 21 import java.util.regex.Pattern; 22 import java.util.regex.PatternSyntaxException; 23 24 /*>>> 25 import checkers.nullness.quals.*; 26 import checkers.regex.quals.*; 27 */ 28 29 /** 30 * Utility methods for regular expressions, most notably for testing whether 31 * a string is a regular expression. 32 */ 33 public class RegexUtil { 34 35 /** 36 * A checked version of {@link PatternSyntaxException}. 37 * <p> 38 * This exception is useful when an illegal regex is detected but the 39 * contextual information to report a helpful error message is not available 40 * at the current depth in the call stack. By using a checked 41 * PatternSyntaxException the error must be handled up the call stack where 42 * a better error message can be reported. 43 * <p> 44 * 45 * Typical usage is: 46 * <pre> 47 * void myMethod(...) throws CheckedPatternSyntaxException { 48 * ... 49 * if (! isRegex(myString)) { 50 * throw new CheckedPatternSyntaxException(...); 51 * } 52 * ... Pattern.compile(myString) ... 53 * </pre> 54 * 55 * Simply calling <tt>Pattern.compile</tt> would have a similar effect, 56 * in that <tt>PatternSyntaxException</tt> would be thrown at run time if 57 * <tt>myString</tt> is not a regular expression. There are two problems 58 * with such an approach. First, a client of <tt>myMethod</tt> might 59 * forget to handle the exception, since <tt>PatternSyntaxException</tt> 60 * is not checked. Also, the Regex Checker would issue a warning about 61 * the call to <tt>Pattern.compile</tt> that might throw an exception. 62 * The above usage pattern avoids both problems. 63 * 64 * @see PatternSyntaxException 65 */ 66 public static class CheckedPatternSyntaxException extends Exception { 67 68 private static final long serialVersionUID = 6266881831979001480L; 69 70 private final PatternSyntaxException pse; 71 72 /** 73 * Constructs a new CheckedPatternSyntaxException equivalent to the 74 * given {@link PatternSyntaxException}. 75 * <p> 76 * Consider calling this constructor with the result of 77 * {@link RegexUtil#regexError}. 78 * @param pse 79 */ 80 public CheckedPatternSyntaxException(PatternSyntaxException pse) { 81 this.pse = pse; 82 } 83 84 /** 85 * Constructs a new CheckedPatternSyntaxException. 86 * 87 * @param desc A description of the error 88 * @param regex The erroneous pattern 89 * @param index The approximate index in the pattern of the error, 90 * or {@code -1} if the index is not known 91 */ 92 public CheckedPatternSyntaxException(String desc, String regex, int index) { 93 this(new PatternSyntaxException(desc, regex, index)); 94 } 95 96 /** 97 * Retrieves the description of the error. 98 * 99 * @return The description of the error 100 */ 101 public String getDescription() { 102 return pse.getDescription(); 103 } 104 105 /** 106 * Retrieves the error index. 107 * 108 * @return The approximate index in the pattern of the error, or {@code -1} 109 * if the index is not known 110 */ 111 public int getIndex() { 112 return pse.getIndex(); 113 } 114 115 /** 116 * Returns a multi-line string containing the description of the syntax 117 * error and its index, the erroneous regular-expression pattern, and a 118 * visual indication of the error index within the pattern. 119 * 120 * @return The full detail message 121 */ 122 public String getMessage() { 123 return pse.getMessage(); 124 } 125 126 /** 127 * Retrieves the erroneous regular-expression pattern. 128 * 129 * @return The erroneous pattern 130 */ 131 public String getPattern() { 132 return pse.getPattern(); 133 } 134 } 135 136 private RegexUtil() { 137 throw new AssertionError("Class RegexUtil shouldn't be instantiated"); 138 } 139 140 /** 141 * Returns true if the argument is a syntactically valid regular 142 * expression. 143 * @param s 144 * @return 145 */ 146 public static boolean isRegex(String s) { 147 return isRegex(s, 0); 148 } 149 150 /** 151 * Returns true if the argument is a syntactically valid regular 152 * expression with at least the given number of groups. 153 * @param s 154 * @param groups 155 * @return 156 */ 157 /*>>> 158 @SuppressWarnings("regex") // RegexUtil 159 */ 160 /*@Pure*/ 161 public static boolean isRegex(String s, int groups) { 162 Pattern p; 163 try { 164 p = Pattern.compile(s); 165 } catch (PatternSyntaxException e) { 166 return false; 167 } 168 return getGroupCount(p) >= groups; 169 } 170 171 /** 172 * Returns true if the argument is a syntactically valid regular 173 * expression. 174 * @param c 175 * @return 176 */ 177 /*>>> 178 @SuppressWarnings("regex") // RegexUtil 179 */ 180 /*@Pure*/ 181 public static boolean isRegex(char c) { 182 return isRegex(Character.toString(c)); 183 } 184 185 /** 186 * Returns null if the argument is a syntactically valid regular 187 * expression. Otherwise returns a string describing why the argument is 188 * not a regex. 189 * @param s 190 * @return 191 */ 192 /*>>> 193 @SuppressWarnings("regex") // RegexUtil 194 */ 195 /*@Pure*/ 196 public static /*@Nullable*/ String regexError(String s) { 197 return regexError(s, 0); 198 } 199 200 /** 201 * Returns null if the argument is a syntactically valid regular 202 * expression with at least the given number of groups. Otherwise returns 203 * a string describing why the argument is not a regex. 204 * @param s 205 * @param groups 206 * @return 207 */ 208 /*>>> 209 @SuppressWarnings("regex") // RegexUtil 210 */ 211 /*@Pure*/ 212 public static /*@Nullable*/ String regexError(String s, int groups) { 213 try { 214 Pattern p = Pattern.compile(s); 215 int actualGroups = getGroupCount(p); 216 if (actualGroups < groups) { 217 return regexErrorMessage(s, groups, actualGroups); 218 } 219 } catch (PatternSyntaxException e) { 220 return e.getMessage(); 221 } 222 return null; 223 } 224 225 /** 226 * Returns null if the argument is a syntactically valid regular 227 * expression. Otherwise returns a PatternSyntaxException describing 228 * why the argument is not a regex. 229 * @param s 230 * @return 231 */ 232 /*>>> 233 @SuppressWarnings("regex") // RegexUtil 234 */ 235 /*@Pure*/ 236 public static /*@Nullable*/ PatternSyntaxException regexException(String s) { 237 return regexException(s, 0); 238 } 239 240 /** 241 * Returns null if the argument is a syntactically valid regular 242 * expression with at least the given number of groups. Otherwise returns a 243 * PatternSyntaxException describing why the argument is not a regex. 244 * @param s 245 * @param groups 246 * @return 247 */ 248 /*>>> 249 @SuppressWarnings("regex") // RegexUtil 250 */ 251 /*@Pure*/ 252 public static /*@Nullable*/ PatternSyntaxException regexException(String s, int groups) { 253 try { 254 Pattern p = Pattern.compile(s); 255 int actualGroups = getGroupCount(p); 256 if (actualGroups < groups) { 257 return new PatternSyntaxException(regexErrorMessage(s, groups, actualGroups), s, -1); 258 } 259 } catch (PatternSyntaxException pse) { 260 return pse; 261 } 262 return null; 263 } 264 265 /** 266 * Returns the argument as a {@code @Regex String} if it is a regex, 267 * otherwise throws an error. The purpose of this method is to suppress Regex 268 * Checker warnings. Once the the Regex Checker supports flow-sensitivity, it 269 * should be very rarely needed. 270 * @param s 271 * @return 272 */ 273 public static /*@Regex*/ String asRegex(String s) { 274 return asRegex(s, 0); 275 } 276 277 /** 278 * Returns the argument as a {@code @Regex(groups) String} if it is a regex 279 * with at least the given number of groups, otherwise throws an error. The 280 * purpose of this method is to suppress Regex Checker warnings. Once the the 281 * Regex Checker supports flow-sensitivity, it should be very rarely needed. 282 * @param s 283 * @param groups 284 * @return 285 */ 286 /*>>> 287 @SuppressWarnings("regex") // RegexUtil 288 */ 289 /*@Pure*/ 290 public static /*@Regex*/ String asRegex(String s, int groups) { 291 try { 292 Pattern p = Pattern.compile(s); 293 int actualGroups = getGroupCount(p); 294 if (actualGroups < groups) { 295 throw new Error(regexErrorMessage(s, groups, actualGroups)); 296 } 297 return s; 298 } catch (PatternSyntaxException e) { 299 throw new Error(e); 300 } 301 } 302 303 /** 304 * Generates an error message for s when expectedGroups are needed, but s 305 * only has actualGroups. 306 */ 307 private static String regexErrorMessage(String s, int expectedGroups, int actualGroups) { 308 return "regex \"" + s + "\" has " + actualGroups + " groups, but " + 309 expectedGroups + " groups are needed."; 310 } 311 312 /** 313 * Returns the count of groups in the argument. 314 */ 315 private static int getGroupCount(Pattern p) { 316 return p.matcher("").groupCount(); 317 } 318 }