This project has retired. For details please refer to its Attic page.
DataExpiration xref
View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.chukwa.database;
20  
21  import java.sql.SQLException;
22  import java.text.ParseException;
23  import java.text.SimpleDateFormat;
24  import java.util.Calendar;
25  import java.util.Date;
26  import java.util.HashMap;
27  import java.util.Map.Entry;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.hadoop.chukwa.util.DatabaseWriter;
32  import org.apache.hadoop.chukwa.util.RegexUtil;
33  
34  public class DataExpiration {
35    private static DatabaseConfig dbc = null;
36    private static Log log = LogFactory.getLog(DataExpiration.class);
37  
38    public DataExpiration() {
39      if (dbc == null) {
40        dbc = new DatabaseConfig();
41      }
42    }
43  
44    @edu.umd.cs.findbugs.annotations.SuppressWarnings(value =
45        "SQL_NONCONSTANT_STRING_PASSED_TO_EXECUTE", 
46        justification = "Dynamic based upon tables in the database")
47    public void dropTables(long start, long end) {
48      String cluster = System.getProperty("CLUSTER");
49      if (cluster == null) {
50        cluster = "unknown";
51      }
52      DatabaseWriter dbw = new DatabaseWriter(cluster);
53      try {
54        HashMap<String, String> dbNames = dbc.startWith("report.db.name.");
55        for(Entry<String, String> entry : dbNames.entrySet()) {
56          String tableName = entry.getValue();
57          if (!RegexUtil.isRegex(tableName)) {
58            log.warn("Skipping tableName: '" + tableName
59                + "' because there was an error parsing it as a regex: "
60                + RegexUtil.regexError(tableName));
61            return;
62          }
63          String[] tableList = dbc.findTableName(tableName, start, end);
64          for (String tl : tableList) {
65            log.debug("table name: " + tableList[0]);
66            try {
67              String[] parts = tl.split("_");
68              int partition = Integer.parseInt(parts[parts.length - 2]);
69              StringBuilder table = new StringBuilder();
70              for (int i = 0; i < parts.length - 2; i++) {
71                if (i != 0) {
72                  table.append("_");
73                }
74                table.append(parts[i]);
75              }
76              partition = partition - 3;
77              if(partition>=0) {
78                StringBuilder dropPartition = new StringBuilder();
79                dropPartition.append("drop table if exists ");
80                dropPartition.append(table);
81                dropPartition.append("_");
82                dropPartition.append(partition);
83                dropPartition.append("_");
84                dropPartition.append(parts[parts.length - 1]);
85                final String query = dropPartition.toString();
86                dbw.execute(query);
87              }
88            } catch (NumberFormatException e) {
89              log
90                  .error("Error in parsing table partition number, skipping table:"
91                      + tableList[0]);
92            } catch (ArrayIndexOutOfBoundsException e) {
93              log.debug("Skipping table:" + tableList[0]
94                  + ", because it has no partition configuration.");
95            }
96          }
97        }
98        dbw.close();
99      } catch (SQLException e) {
100       e.printStackTrace();
101     }
102   }
103 
104   public static void usage() {
105     System.out.println("DataExpiration usage:");
106     System.out
107         .println("java -jar chukwa-core.jar org.apache.hadoop.chukwa.DataExpiration <date> <time window size>");
108     System.out.println("     date format: YYYY-MM-DD");
109     System.out.println("     time window size: 7, 30, 91, 365");
110   }
111 
112   public static void main(String[] args) {
113     DataExpiration de = new DataExpiration();
114     long now = (new Date()).getTime();
115     long start = now;
116     long end = now;
117     if (args.length == 2) {
118       SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
119       try {
120         long dataExpStart = Calendar.getInstance().getTimeInMillis();
121         start = sdf.parse(args[0]).getTime();
122         end = start + (Long.parseLong(args[1]) * 1440 * 60 * 1000L);
123         de.dropTables(start, end);
124         long dataExpEnd = Calendar.getInstance().getTimeInMillis();
125         log.info("DataExpiration for: "+args[0]+" "+args[1]+" finished: ("+(double) (dataExpEnd-dataExpStart)/1000+" seconds)");
126       } catch (ParseException e) {
127         usage();
128       }
129     } else {
130       usage();
131     }
132   }
133 }