001/*
002 * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
003 *
004 * Project and contact information: http://www.cascading.org/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.operation;
022
023import cascading.flow.FlowProcess;
024import cascading.flow.planner.DeclaresResults;
025import cascading.tuple.Fields;
026
027/**
028 * Interface Operation is the base interface for all functions applied to {@link cascading.tuple.Tuple} streams.
029 * <p/>
030 * Specifically {@link Function}, {@link Filter}, {@link Aggregator}, {@link Buffer}, and {@link Assertion}.
031 * <p/>
032 * Use {@link BaseOperation} for a convenient way to create new Operation types.
033 *
034 * @see cascading.operation.BaseOperation
035 * @see Function
036 * @see Filter
037 * @see Aggregator
038 * @see Buffer
039 * @see Assertion
040 */
041public interface Operation<Context> extends DeclaresResults
042  {
043  /** Field ANY denotes that a given Operation will take any number of argument values */
044  int ANY = Integer.MAX_VALUE;
045
046  /**
047   * The prepare method is called immediately before the current Operation instance is put into play processing Tuples.
048   * This method should initialize any resources that can be shutdown or released in the
049   * {@link #cleanup(cascading.flow.FlowProcess, OperationCall)} method.
050   * <p/>
051   * Any resources created should be stored in the {@code Context}, not as instance fields on the class.
052   * <p/>
053   * This method may be called more than once during the life of this instance. But it will never be called multiple times
054   * without a cleanup invocation immediately before subsequent invocations.
055   * <p/>
056   * If the Flow this Operation instance belongs will execute on a remote cluster, this method will be called
057   * cluster side, not client side.
058   *
059   * @param flowProcess
060   * @param operationCall
061   */
062  void prepare( FlowProcess flowProcess, OperationCall<Context> operationCall );
063
064  /**
065   * The flush method is called when an Operation that is caching values must empty the cache. It is called before
066   * {@link #cleanup(cascading.flow.FlowProcess, OperationCall)} is invoked.
067   * <p/>
068   * It is safe to cast the {@link cascading.operation.OperationCall} to a {@link FunctionCall}, or equivalent, and
069   * get its {@link cascading.operation.FunctionCall#getOutputCollector()}.
070   *
071   * @param flowProcess
072   * @param operationCall
073   */
074  void flush( FlowProcess flowProcess, OperationCall<Context> operationCall );
075
076  /**
077   * The cleanup method is called immediately after the current Operation instance is taken out of play processing Tuples.
078   * This method should shutdown any resources created or initialized during the
079   * {@link #prepare(cascading.flow.FlowProcess, OperationCall)} method.
080   * <p/>
081   * This method may be called more than once during the life of this instance. But it will never be called multiple times
082   * without a prepare invocation before.
083   * <p/>
084   * If the Flow this Operation instance belongs will execute on a remote cluster, this method will be called
085   * cluster side, not client side.
086   *
087   * @param flowProcess
088   * @param operationCall
089   */
090  void cleanup( FlowProcess flowProcess, OperationCall<Context> operationCall );
091
092  /**
093   * Returns the fields created by this Operation instance. If this instance is a {@link Filter}, it should always
094   * return {@link Fields#ALL}.
095   *
096   * @return a Fields instance
097   */
098  Fields getFieldDeclaration();
099
100  /**
101   * The minimum number of arguments this Operation expects from the calling {@link cascading.pipe.Each} or
102   * {@link cascading.pipe.Every} Operator.
103   * <p/>
104   * Operations should be willing to receive more arguments than expected, but should ignore them if they are unused,
105   * instead of failing.
106   *
107   * @return an int
108   */
109  int getNumArgs();
110
111  /**
112   * Returns {@code true} if this Operation instance can safely execute on the same 'record' multiple
113   * times, {@code false} otherwise.
114   * <p/>
115   * That is, this Operation is safe if it has no side-effects, or if it does, they are idempotent.
116   * <p/>
117   * If seeing the same 'record' more than once can cause errors (internally or externally),
118   * this method must return {@code false}.
119   *
120   * @return a boolean
121   */
122  boolean isSafe();
123  }