This project has retired. For details please refer to its
Attic page.
DataExpiration xref
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.chukwa.database;
20
21
22 import java.text.SimpleDateFormat;
23 import java.util.Calendar;
24 import java.util.Date;
25 import java.util.HashMap;
26 import java.util.Iterator;
27 import org.apache.commons.logging.Log;
28 import org.apache.commons.logging.LogFactory;
29 import org.apache.hadoop.chukwa.util.DatabaseWriter;
30 import org.apache.hadoop.chukwa.util.RegexUtil;
31
32 public class DataExpiration {
33 private static DatabaseConfig dbc = null;
34 private static Log log = LogFactory.getLog(DataExpiration.class);
35
36 public DataExpiration() {
37 if (dbc == null) {
38 dbc = new DatabaseConfig();
39 }
40 }
41
42 public void dropTables(long start, long end) {
43 String cluster = System.getProperty("CLUSTER");
44 if (cluster == null) {
45 cluster = "unknown";
46 }
47 DatabaseWriter dbw = new DatabaseWriter(cluster);
48 try {
49 HashMap<String, String> dbNames = dbc.startWith("report.db.name.");
50 Iterator<String> ki = dbNames.keySet().iterator();
51 while (ki.hasNext()) {
52 String name = ki.next();
53 String tableName = dbNames.get(name);
54 if (!RegexUtil.isRegex(tableName)) {
55 log.warn("Skipping tableName: '" + tableName
56 + "' because there was an error parsing it as a regex: "
57 + RegexUtil.regexError(tableName));
58 return;
59 }
60 String[] tableList = dbc.findTableName(tableName, start, end);
61 for (String tl : tableList) {
62 log.debug("table name: " + tableList[0]);
63 try {
64 String[] parts = tl.split("_");
65 int partition = Integer.parseInt(parts[parts.length - 2]);
66 String table = "";
67 for (int i = 0; i < parts.length - 2; i++) {
68 if (i != 0) {
69 table = table + "_";
70 }
71 table = table + parts[i];
72 }
73 partition = partition - 3;
74 String dropPartition = "drop table if exists " + table + "_"
75 + partition + "_" + parts[parts.length - 1];
76 dbw.execute(dropPartition);
77 partition--;
78 if(partition>=0) {
79 dropPartition = "drop table if exists " + table + "_" + partition
80 + "_" + parts[parts.length - 1];
81 dbw.execute(dropPartition);
82 }
83 } catch (NumberFormatException e) {
84 log
85 .error("Error in parsing table partition number, skipping table:"
86 + tableList[0]);
87 } catch (ArrayIndexOutOfBoundsException e) {
88 log.debug("Skipping table:" + tableList[0]
89 + ", because it has no partition configuration.");
90 }
91 }
92 }
93 dbw.close();
94 } catch (Exception e) {
95 e.printStackTrace();
96 }
97 }
98
99 public static void usage() {
100 System.out.println("DataExpiration usage:");
101 System.out
102 .println("java -jar chukwa-core.jar org.apache.hadoop.chukwa.DataExpiration <date> <time window size>");
103 System.out.println(" date format: YYYY-MM-DD");
104 System.out.println(" time window size: 7, 30, 91, 365");
105 }
106
107 public static void main(String[] args) {
108 DataExpiration de = new DataExpiration();
109 long now = (new Date()).getTime();
110 long start = now;
111 long end = now;
112 if (args.length == 2) {
113 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
114 try {
115 long dataExpStart = Calendar.getInstance().getTimeInMillis();
116 start = sdf.parse(args[0]).getTime();
117 end = start + (Long.parseLong(args[1]) * 1440 * 60 * 1000L);
118 de.dropTables(start, end);
119 long dataExpEnd = Calendar.getInstance().getTimeInMillis();
120 log.info("DataExpiration for: "+args[0]+" "+args[1]+" finished: ("+(double) (dataExpEnd-dataExpStart)/1000+" seconds)");
121 } catch (Exception e) {
122 usage();
123 }
124 } else {
125 usage();
126 }
127 }
128 }