elasticsearch-hadoop

org.elasticsearch.hadoop.pig
Class EsStorage

java.lang.Object
  extended by org.apache.pig.LoadFunc
      extended by org.elasticsearch.hadoop.pig.EsStorage
All Implemented Interfaces:
LoadMetadata, LoadPushDown, StoreFuncInterface, StoreMetadata

public class EsStorage
extends LoadFunc
implements LoadMetadata, LoadPushDown, StoreFuncInterface, StoreMetadata

Pig storage for reading and writing data into an ElasticSearch index. Uses the tuple implied schema to create the resulting JSON string sent to ElasticSearch.

Typical usage is:

 A = LOAD 'twitter/_search?q=kimchy' USING org.elasticsearch.hadoop.pig.ESStorage();
 
 STORE A INTO '' USING org.elasticsearch.hadoop.pig.ESStorage();
 
The ElasticSearch host/port can be specified through Hadoop properties (see package description) or passed to the EsStorage(String...) constructor.


Nested Class Summary
 
Nested classes/interfaces inherited from interface org.apache.pig.LoadPushDown
LoadPushDown.OperatorSet, LoadPushDown.RequiredField, LoadPushDown.RequiredFieldList, LoadPushDown.RequiredFieldResponse
 
Constructor Summary
EsStorage()
           
EsStorage(String... configuration)
           
 
Method Summary
 void checkSchema(ResourceSchema s)
           
 void cleanupOnFailure(String location, Job job)
           
 void cleanupOnSuccess(String location, Job job)
           
 List<LoadPushDown.OperatorSet> getFeatures()
           
 InputFormat getInputFormat()
           
 Tuple getNext()
           
 OutputFormat<Object,Map<Writable,Writable>> getOutputFormat()
           
 String[] getPartitionKeys(String location, Job job)
           
 ResourceSchema getSchema(String location, Job job)
           
 ResourceStatistics getStatistics(String location, Job job)
           
 void prepareToRead(RecordReader reader, PigSplit split)
           
 void prepareToWrite(RecordWriter writer)
           
 LoadPushDown.RequiredFieldResponse pushProjection(LoadPushDown.RequiredFieldList requiredFieldList)
           
 void putNext(Tuple t)
           
 String relativeToAbsolutePath(String location, Path curDir)
           
 String relToAbsPathForStoreLocation(String location, Path curDir)
           
 void setLocation(String location, Job job)
           
 void setPartitionFilter(Expression partitionFilter)
           
 void setStoreFuncUDFContextSignature(String signature)
           
 void setStoreLocation(String location, Job job)
           
 void setUDFContextSignature(String signature)
           
 void storeSchema(ResourceSchema schema, String location, Job job)
           
 void storeStatistics(ResourceStatistics stats, String location, Job job)
           
 
Methods inherited from class org.apache.pig.LoadFunc
getAbsolutePath, getLoadCaster, getPathStrings, join, warn
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

EsStorage

public EsStorage()

EsStorage

public EsStorage(String... configuration)
Method Detail

relToAbsPathForStoreLocation

public String relToAbsPathForStoreLocation(String location,
                                           Path curDir)
                                    throws IOException
Specified by:
relToAbsPathForStoreLocation in interface StoreFuncInterface
Throws:
IOException

setStoreFuncUDFContextSignature

public void setStoreFuncUDFContextSignature(String signature)
Specified by:
setStoreFuncUDFContextSignature in interface StoreFuncInterface

checkSchema

public void checkSchema(ResourceSchema s)
                 throws IOException
Specified by:
checkSchema in interface StoreFuncInterface
Throws:
IOException

setStoreLocation

public void setStoreLocation(String location,
                             Job job)
                      throws IOException
Specified by:
setStoreLocation in interface StoreFuncInterface
Throws:
IOException

getOutputFormat

public OutputFormat<Object,Map<Writable,Writable>> getOutputFormat()
                                                            throws IOException
Specified by:
getOutputFormat in interface StoreFuncInterface
Throws:
IOException

prepareToWrite

public void prepareToWrite(RecordWriter writer)
                    throws IOException
Specified by:
prepareToWrite in interface StoreFuncInterface
Throws:
IOException

putNext

public void putNext(Tuple t)
             throws IOException
Specified by:
putNext in interface StoreFuncInterface
Throws:
IOException

cleanupOnFailure

public void cleanupOnFailure(String location,
                             Job job)
                      throws IOException
Specified by:
cleanupOnFailure in interface StoreFuncInterface
Throws:
IOException

cleanupOnSuccess

public void cleanupOnSuccess(String location,
                             Job job)
                      throws IOException
Specified by:
cleanupOnSuccess in interface StoreFuncInterface
Throws:
IOException

storeStatistics

public void storeStatistics(ResourceStatistics stats,
                            String location,
                            Job job)
                     throws IOException
Specified by:
storeStatistics in interface StoreMetadata
Throws:
IOException

storeSchema

public void storeSchema(ResourceSchema schema,
                        String location,
                        Job job)
                 throws IOException
Specified by:
storeSchema in interface StoreMetadata
Throws:
IOException

setLocation

public void setLocation(String location,
                        Job job)
                 throws IOException
Specified by:
setLocation in class LoadFunc
Throws:
IOException

relativeToAbsolutePath

public String relativeToAbsolutePath(String location,
                                     Path curDir)
                              throws IOException
Overrides:
relativeToAbsolutePath in class LoadFunc
Throws:
IOException

getInputFormat

public InputFormat getInputFormat()
                           throws IOException
Specified by:
getInputFormat in class LoadFunc
Throws:
IOException

prepareToRead

public void prepareToRead(RecordReader reader,
                          PigSplit split)
                   throws IOException
Specified by:
prepareToRead in class LoadFunc
Throws:
IOException

getNext

public Tuple getNext()
              throws IOException
Specified by:
getNext in class LoadFunc
Throws:
IOException

getFeatures

public List<LoadPushDown.OperatorSet> getFeatures()
Specified by:
getFeatures in interface LoadPushDown

pushProjection

public LoadPushDown.RequiredFieldResponse pushProjection(LoadPushDown.RequiredFieldList requiredFieldList)
                                                  throws FrontendException
Specified by:
pushProjection in interface LoadPushDown
Throws:
FrontendException

getSchema

public ResourceSchema getSchema(String location,
                                Job job)
                         throws IOException
Specified by:
getSchema in interface LoadMetadata
Throws:
IOException

getStatistics

public ResourceStatistics getStatistics(String location,
                                        Job job)
                                 throws IOException
Specified by:
getStatistics in interface LoadMetadata
Throws:
IOException

getPartitionKeys

public String[] getPartitionKeys(String location,
                                 Job job)
                          throws IOException
Specified by:
getPartitionKeys in interface LoadMetadata
Throws:
IOException

setPartitionFilter

public void setPartitionFilter(Expression partitionFilter)
                        throws IOException
Specified by:
setPartitionFilter in interface LoadMetadata
Throws:
IOException

setUDFContextSignature

public void setUDFContextSignature(String signature)
Overrides:
setUDFContextSignature in class LoadFunc

elasticsearch-hadoop