001/* 002 * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading.operation; 022 023import cascading.flow.FlowProcess; 024import cascading.flow.planner.DeclaresResults; 025import cascading.tuple.Fields; 026 027/** 028 * Interface Operation is the base interface for all functions applied to {@link cascading.tuple.Tuple} streams. 029 * <p/> 030 * Specifically {@link Function}, {@link Filter}, {@link Aggregator}, {@link Buffer}, and {@link Assertion}. 031 * <p/> 032 * Use {@link BaseOperation} for a convenient way to create new Operation types. 033 * 034 * @see cascading.operation.BaseOperation 035 * @see Function 036 * @see Filter 037 * @see Aggregator 038 * @see Buffer 039 * @see Assertion 040 */ 041public interface Operation<Context> extends DeclaresResults 042 { 043 /** Field ANY denotes that a given Operation will take any number of argument values */ 044 int ANY = Integer.MAX_VALUE; 045 046 /** 047 * The prepare method is called immediately before the current Operation instance is put into play processing Tuples. 048 * This method should initialize any resources that can be shutdown or released in the 049 * {@link #cleanup(cascading.flow.FlowProcess, OperationCall)} method. 050 * <p/> 051 * Any resources created should be stored in the {@code Context}, not as instance fields on the class. 052 * <p/> 053 * This method may be called more than once during the life of this instance. But it will never be called multiple times 054 * without a cleanup invocation immediately before subsequent invocations. 055 * <p/> 056 * If the Flow this Operation instance belongs will execute on a remote cluster, this method will be called 057 * cluster side, not client side. 058 * 059 * @param flowProcess 060 * @param operationCall 061 */ 062 void prepare( FlowProcess flowProcess, OperationCall<Context> operationCall ); 063 064 /** 065 * The flush method is called when an Operation that is caching values must empty the cache. It is called before 066 * {@link #cleanup(cascading.flow.FlowProcess, OperationCall)} is invoked. 067 * <p/> 068 * It is safe to cast the {@link cascading.operation.OperationCall} to a {@link FunctionCall}, or equivalent, and 069 * get its {@link cascading.operation.FunctionCall#getOutputCollector()}. 070 * 071 * @param flowProcess 072 * @param operationCall 073 */ 074 void flush( FlowProcess flowProcess, OperationCall<Context> operationCall ); 075 076 /** 077 * The cleanup method is called immediately after the current Operation instance is taken out of play processing Tuples. 078 * This method should shutdown any resources created or initialized during the 079 * {@link #prepare(cascading.flow.FlowProcess, OperationCall)} method. 080 * <p/> 081 * This method may be called more than once during the life of this instance. But it will never be called multiple times 082 * without a prepare invocation before. 083 * <p/> 084 * If the Flow this Operation instance belongs will execute on a remote cluster, this method will be called 085 * cluster side, not client side. 086 * 087 * @param flowProcess 088 * @param operationCall 089 */ 090 void cleanup( FlowProcess flowProcess, OperationCall<Context> operationCall ); 091 092 /** 093 * Returns the fields created by this Operation instance. If this instance is a {@link Filter}, it should always 094 * return {@link Fields#ALL}. 095 * 096 * @return a Fields instance 097 */ 098 Fields getFieldDeclaration(); 099 100 /** 101 * The minimum number of arguments this Operation expects from the calling {@link cascading.pipe.Each} or 102 * {@link cascading.pipe.Every} Operator. 103 * <p/> 104 * Operations should be willing to receive more arguments than expected, but should ignore them if they are unused, 105 * instead of failing. 106 * 107 * @return an int 108 */ 109 int getNumArgs(); 110 111 /** 112 * Returns {@code true} if this Operation instance can safely execute on the same 'record' multiple 113 * times, {@code false} otherwise. 114 * <p/> 115 * That is, this Operation is safe if it has no side-effects, or if it does, they are idempotent. 116 * <p/> 117 * If seeing the same 'record' more than once can cause errors (internally or externally), 118 * this method must return {@code false}. 119 * 120 * @return a boolean 121 */ 122 boolean isSafe(); 123 }