public class TextLine extends cascading.scheme.Scheme<java.util.Properties,java.io.InputStream,java.io.OutputStream,java.io.LineNumberReader,java.io.PrintWriter>
Scheme
for plain text files. Files are broken into
lines. Either line-feed or carriage-return are used to signal end of line.
By default, this scheme returns a Tuple
with two fields, "num" and "line". Where "num"
is the line number for "line".
Many of the constructors take both "sourceFields" and "sinkFields". sourceFields denote the field names
to be used instead of the names "num" and "line". sinkFields is a selector and is by default Fields.ALL
.
Any available field names can be given if only a subset of the incoming fields should be used.
If a Fields
instance is passed on the constructor as sourceFields having only one field, the return tuples
will simply be the "line" value using the given field name.
Note that TextLine will concatenate all the Tuple values for the selected fields with a TAB delimiter before
writing out the line.
By default, all text is encoded/decoded as UTF-8. This can be changed via the charsetName
constructor
argument.Modifier and Type | Field and Description |
---|---|
static java.lang.String |
DEFAULT_CHARSET |
static cascading.tuple.Fields |
DEFAULT_SOURCE_FIELDS |
Constructor and Description |
---|
TextLine()
Creates a new TextLine instance that sources "num" and "line" fields, and sinks all incoming fields, where
"num" is the line number of the line in the input file.
|
TextLine(cascading.tuple.Fields sourceFields)
Creates a new TextLine instance.
|
TextLine(cascading.tuple.Fields sourceFields,
cascading.tuple.Fields sinkFields)
Creates a new TextLine instance.
|
TextLine(cascading.tuple.Fields sourceFields,
cascading.tuple.Fields sinkFields,
java.lang.String charsetName)
Creates a new TextLine instance.
|
TextLine(cascading.tuple.Fields sourceFields,
java.lang.String charsetName)
Creates a new TextLine instance.
|
Modifier and Type | Method and Description |
---|---|
java.io.LineNumberReader |
createInput(java.io.InputStream inputStream) |
java.io.PrintWriter |
createOutput(java.io.OutputStream outputStream) |
java.lang.String |
getCharsetName() |
void |
presentSinkFields(cascading.flow.FlowProcess<? extends java.util.Properties> process,
cascading.tap.Tap tap,
cascading.tuple.Fields fields) |
void |
presentSourceFields(cascading.flow.FlowProcess<? extends java.util.Properties> process,
cascading.tap.Tap tap,
cascading.tuple.Fields fields) |
void |
sink(cascading.flow.FlowProcess<? extends java.util.Properties> flowProcess,
cascading.scheme.SinkCall<java.io.PrintWriter,java.io.OutputStream> sinkCall) |
void |
sinkCleanup(cascading.flow.FlowProcess<? extends java.util.Properties> flowProcess,
cascading.scheme.SinkCall<java.io.PrintWriter,java.io.OutputStream> sinkCall) |
void |
sinkConfInit(cascading.flow.FlowProcess<? extends java.util.Properties> flowProcess,
cascading.tap.Tap<java.util.Properties,java.io.InputStream,java.io.OutputStream> tap,
java.util.Properties conf) |
void |
sinkPrepare(cascading.flow.FlowProcess<? extends java.util.Properties> flowProcess,
cascading.scheme.SinkCall<java.io.PrintWriter,java.io.OutputStream> sinkCall) |
boolean |
source(cascading.flow.FlowProcess<? extends java.util.Properties> flowProcess,
cascading.scheme.SourceCall<java.io.LineNumberReader,java.io.InputStream> sourceCall) |
void |
sourceCleanup(cascading.flow.FlowProcess<? extends java.util.Properties> flowProcess,
cascading.scheme.SourceCall<java.io.LineNumberReader,java.io.InputStream> sourceCall) |
void |
sourceConfInit(cascading.flow.FlowProcess<? extends java.util.Properties> flowProcess,
cascading.tap.Tap<java.util.Properties,java.io.InputStream,java.io.OutputStream> tap,
java.util.Properties conf) |
void |
sourcePrepare(cascading.flow.FlowProcess<? extends java.util.Properties> flowProcess,
cascading.scheme.SourceCall<java.io.LineNumberReader,java.io.InputStream> sourceCall) |
void |
sourceRePrepare(cascading.flow.FlowProcess<? extends java.util.Properties> flowProcess,
cascading.scheme.SourceCall<java.io.LineNumberReader,java.io.InputStream> sourceCall) |
protected void |
verify(cascading.tuple.Fields sourceFields) |
equals, getNumSinkParts, getSinkFields, getSourceFields, getTrace, hashCode, isSink, isSource, isSymmetrical, presentSinkFieldsInternal, presentSourceFieldsInternal, retrieveSinkFields, retrieveSourceFields, setNumSinkParts, setSinkFields, setSourceFields, toString
public static final java.lang.String DEFAULT_CHARSET
public static final cascading.tuple.Fields DEFAULT_SOURCE_FIELDS
public TextLine()
@ConstructorProperties(value="sourceFields") public TextLine(cascading.tuple.Fields sourceFields)
sourceFields
- of Fields@ConstructorProperties(value={"sourceFields","charsetName"}) public TextLine(cascading.tuple.Fields sourceFields, java.lang.String charsetName)
sourceFields
- of FieldscharsetName
- of type String@ConstructorProperties(value={"sourceFields","sinkFields"}) public TextLine(cascading.tuple.Fields sourceFields, cascading.tuple.Fields sinkFields)
sourceFields
- of FieldssinkFields
- of Fields@ConstructorProperties(value={"sourceFields","sinkFields","charsetName"}) public TextLine(cascading.tuple.Fields sourceFields, cascading.tuple.Fields sinkFields, java.lang.String charsetName)
sourceFields
- of FieldssinkFields
- of FieldscharsetName
- of type Stringpublic java.lang.String getCharsetName()
protected void verify(cascading.tuple.Fields sourceFields)
public java.io.LineNumberReader createInput(java.io.InputStream inputStream)
public java.io.PrintWriter createOutput(java.io.OutputStream outputStream)
public void presentSourceFields(cascading.flow.FlowProcess<? extends java.util.Properties> process, cascading.tap.Tap tap, cascading.tuple.Fields fields)
presentSourceFields
in class cascading.scheme.Scheme<java.util.Properties,java.io.InputStream,java.io.OutputStream,java.io.LineNumberReader,java.io.PrintWriter>
public void presentSinkFields(cascading.flow.FlowProcess<? extends java.util.Properties> process, cascading.tap.Tap tap, cascading.tuple.Fields fields)
presentSinkFields
in class cascading.scheme.Scheme<java.util.Properties,java.io.InputStream,java.io.OutputStream,java.io.LineNumberReader,java.io.PrintWriter>
public void sourceConfInit(cascading.flow.FlowProcess<? extends java.util.Properties> flowProcess, cascading.tap.Tap<java.util.Properties,java.io.InputStream,java.io.OutputStream> tap, java.util.Properties conf)
sourceConfInit
in class cascading.scheme.Scheme<java.util.Properties,java.io.InputStream,java.io.OutputStream,java.io.LineNumberReader,java.io.PrintWriter>
public void sinkConfInit(cascading.flow.FlowProcess<? extends java.util.Properties> flowProcess, cascading.tap.Tap<java.util.Properties,java.io.InputStream,java.io.OutputStream> tap, java.util.Properties conf)
sinkConfInit
in class cascading.scheme.Scheme<java.util.Properties,java.io.InputStream,java.io.OutputStream,java.io.LineNumberReader,java.io.PrintWriter>
public void sourcePrepare(cascading.flow.FlowProcess<? extends java.util.Properties> flowProcess, cascading.scheme.SourceCall<java.io.LineNumberReader,java.io.InputStream> sourceCall) throws java.io.IOException
sourcePrepare
in class cascading.scheme.Scheme<java.util.Properties,java.io.InputStream,java.io.OutputStream,java.io.LineNumberReader,java.io.PrintWriter>
java.io.IOException
public void sourceRePrepare(cascading.flow.FlowProcess<? extends java.util.Properties> flowProcess, cascading.scheme.SourceCall<java.io.LineNumberReader,java.io.InputStream> sourceCall) throws java.io.IOException
sourceRePrepare
in class cascading.scheme.Scheme<java.util.Properties,java.io.InputStream,java.io.OutputStream,java.io.LineNumberReader,java.io.PrintWriter>
java.io.IOException
public boolean source(cascading.flow.FlowProcess<? extends java.util.Properties> flowProcess, cascading.scheme.SourceCall<java.io.LineNumberReader,java.io.InputStream> sourceCall) throws java.io.IOException
source
in class cascading.scheme.Scheme<java.util.Properties,java.io.InputStream,java.io.OutputStream,java.io.LineNumberReader,java.io.PrintWriter>
java.io.IOException
public void sourceCleanup(cascading.flow.FlowProcess<? extends java.util.Properties> flowProcess, cascading.scheme.SourceCall<java.io.LineNumberReader,java.io.InputStream> sourceCall) throws java.io.IOException
sourceCleanup
in class cascading.scheme.Scheme<java.util.Properties,java.io.InputStream,java.io.OutputStream,java.io.LineNumberReader,java.io.PrintWriter>
java.io.IOException
public void sinkPrepare(cascading.flow.FlowProcess<? extends java.util.Properties> flowProcess, cascading.scheme.SinkCall<java.io.PrintWriter,java.io.OutputStream> sinkCall) throws java.io.IOException
sinkPrepare
in class cascading.scheme.Scheme<java.util.Properties,java.io.InputStream,java.io.OutputStream,java.io.LineNumberReader,java.io.PrintWriter>
java.io.IOException
public void sink(cascading.flow.FlowProcess<? extends java.util.Properties> flowProcess, cascading.scheme.SinkCall<java.io.PrintWriter,java.io.OutputStream> sinkCall) throws java.io.IOException
sink
in class cascading.scheme.Scheme<java.util.Properties,java.io.InputStream,java.io.OutputStream,java.io.LineNumberReader,java.io.PrintWriter>
java.io.IOException
public void sinkCleanup(cascading.flow.FlowProcess<? extends java.util.Properties> flowProcess, cascading.scheme.SinkCall<java.io.PrintWriter,java.io.OutputStream> sinkCall) throws java.io.IOException
sinkCleanup
in class cascading.scheme.Scheme<java.util.Properties,java.io.InputStream,java.io.OutputStream,java.io.LineNumberReader,java.io.PrintWriter>
java.io.IOException
Copyright © 2007-2015 Xplenty, Inc. All Rights Reserved.