This project has retired. For details please refer to its
Attic page.
DataExpiration xref
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.chukwa.database;
20
21 import java.sql.SQLException;
22 import java.text.ParseException;
23 import java.text.SimpleDateFormat;
24 import java.util.Calendar;
25 import java.util.Date;
26 import java.util.HashMap;
27 import java.util.Map.Entry;
28
29 import org.apache.commons.logging.Log;
30 import org.apache.commons.logging.LogFactory;
31 import org.apache.hadoop.chukwa.util.DatabaseWriter;
32 import org.apache.hadoop.chukwa.util.RegexUtil;
33
34 public class DataExpiration {
35 private static DatabaseConfig dbc = null;
36 private static Log log = LogFactory.getLog(DataExpiration.class);
37
38 public DataExpiration() {
39 if (dbc == null) {
40 dbc = new DatabaseConfig();
41 }
42 }
43
44 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value =
45 "SQL_NONCONSTANT_STRING_PASSED_TO_EXECUTE",
46 justification = "Dynamic based upon tables in the database")
47 public void dropTables(long start, long end) {
48 String cluster = System.getProperty("CLUSTER");
49 if (cluster == null) {
50 cluster = "unknown";
51 }
52 DatabaseWriter dbw = new DatabaseWriter(cluster);
53 try {
54 HashMap<String, String> dbNames = dbc.startWith("report.db.name.");
55 for(Entry<String, String> entry : dbNames.entrySet()) {
56 String tableName = entry.getValue();
57 if (!RegexUtil.isRegex(tableName)) {
58 log.warn("Skipping tableName: '" + tableName
59 + "' because there was an error parsing it as a regex: "
60 + RegexUtil.regexError(tableName));
61 return;
62 }
63 String[] tableList = dbc.findTableName(tableName, start, end);
64 for (String tl : tableList) {
65 log.debug("table name: " + tableList[0]);
66 try {
67 String[] parts = tl.split("_");
68 int partition = Integer.parseInt(parts[parts.length - 2]);
69 StringBuilder table = new StringBuilder();
70 for (int i = 0; i < parts.length - 2; i++) {
71 if (i != 0) {
72 table.append("_");
73 }
74 table.append(parts[i]);
75 }
76 partition = partition - 3;
77 if(partition>=0) {
78 StringBuilder dropPartition = new StringBuilder();
79 dropPartition.append("drop table if exists ");
80 dropPartition.append(table);
81 dropPartition.append("_");
82 dropPartition.append(partition);
83 dropPartition.append("_");
84 dropPartition.append(parts[parts.length - 1]);
85 final String query = dropPartition.toString();
86 dbw.execute(query);
87 }
88 } catch (NumberFormatException e) {
89 log
90 .error("Error in parsing table partition number, skipping table:"
91 + tableList[0]);
92 } catch (ArrayIndexOutOfBoundsException e) {
93 log.debug("Skipping table:" + tableList[0]
94 + ", because it has no partition configuration.");
95 }
96 }
97 }
98 dbw.close();
99 } catch (SQLException e) {
100 e.printStackTrace();
101 }
102 }
103
104 public static void usage() {
105 System.out.println("DataExpiration usage:");
106 System.out
107 .println("java -jar chukwa-core.jar org.apache.hadoop.chukwa.DataExpiration <date> <time window size>");
108 System.out.println(" date format: YYYY-MM-DD");
109 System.out.println(" time window size: 7, 30, 91, 365");
110 }
111
112 public static void main(String[] args) {
113 DataExpiration de = new DataExpiration();
114 long now = (new Date()).getTime();
115 long start = now;
116 long end = now;
117 if (args.length == 2) {
118 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
119 try {
120 long dataExpStart = Calendar.getInstance().getTimeInMillis();
121 start = sdf.parse(args[0]).getTime();
122 end = start + (Long.parseLong(args[1]) * 1440 * 60 * 1000L);
123 de.dropTables(start, end);
124 long dataExpEnd = Calendar.getInstance().getTimeInMillis();
125 log.info("DataExpiration for: "+args[0]+" "+args[1]+" finished: ("+(double) (dataExpEnd-dataExpStart)/1000+" seconds)");
126 } catch (ParseException e) {
127 usage();
128 }
129 } else {
130 usage();
131 }
132 }
133 }