1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 package org.apache.hadoop.chukwa; 20 21 22 import java.io.DataOutput; 23 import java.io.IOException; 24 import org.apache.hadoop.chukwa.datacollection.adaptor.*; 25 26 /** 27 * A chunk is a sequence of bytes at a particular logical offset in a stream, 28 * and containing one or more "records". Chunks have various metadata, such as 29 * source, format, and pointers to record boundaries within the chunk. 30 * 31 */ 32 public interface Chunk { 33 34 // these conceptually are really network addresses 35 public String getSource(); 36 37 public void setSource(String logSource); 38 39 /** 40 * Get the name of the stream that this Chunk is a chunk of 41 * 42 * @return the name of this stream; e.g. file name 43 */ 44 public String getStreamName(); 45 46 public void setStreamName(String streamName); 47 48 // These describe the format of the data buffer 49 public String getDataType(); 50 51 public void setDataType(String t); 52 53 /** 54 * @return the user data in the chunk 55 */ 56 public byte[] getData(); 57 58 /** 59 * @param logEvent the user data in the chunk 60 */ 61 public void setData(byte[] logEvent); 62 63 /** 64 * get/set the <b>end</b> offsets of records in the buffer. 65 * 66 * We use end, rather than start offsets, since the first start offset is 67 * always 0, but the last end offset specifies how much of the buffer is 68 * valid. 69 * 70 * More precisely, offsets[i] is the offset in the Chunk of the last byte of 71 * record i in this chunk. 72 * 73 * @return a list of record end offsets 74 */ 75 public int[] getRecordOffsets(); 76 77 public void setRecordOffsets(int[] offsets); 78 79 /** 80 * @return the byte offset of the first byte not in this chunk. 81 * 82 * We pick this convention so that subtracting sequence IDs yields 83 * length. 84 * 85 * Furthermore, seqID - length = first byte pos. 86 */ 87 public long getSeqID(); 88 89 public void setSeqID(long l); 90 91 /** 92 * Retrieve a reference to the adaptor that sent this event. Used by 93 * LocalAgent and Connectors to deliver acks to the appropriate place. 94 * @return Adaptor 95 */ 96 public Adaptor getInitiator(); 97 98 /** 99 * Estimate the size of this Chunk on the wire, assuming each char of metadata 100 * takes two bytes to serialize. This is pessimistic. 101 * 102 * @return size in bytes that this Chunk might take once serialized. 103 */ 104 public int getSerializedSizeEstimate(); 105 106 /** 107 * @return tags. 108 * 109 */ 110 public String getTags(); 111 112 /** 113 * Add tag. 114 * @param tag is a comma separated list 115 * 116 */ 117 public void addTag(String tag); 118 119 /** 120 * Returns the value of a single tag, assuming tags are of the form 121 * tagname="val" 122 * @param tagName the tag to return 123 * @return null if not matched. 124 */ 125 public String getTag(String tagName); 126 127 public void write(DataOutput data) throws IOException; 128 }