Code snippet
showing how to call an in built as well as user defined function from
pig latin
Pig Script Calling
in-built function
A = LOAD
'/home/Sample.txt' using PigStorage('\t') AS
(f1:chararray,f2:chararray,f3:chararray);--Sample.txt is delimited
with '\t'
B = FOREACH A
GENERATE f1,f2,f3,UPPER(f3);--Calling the inbuilt function UPPER
DUMP B;--Displays
the contents of B
Pig Script Calling
UDF
Register
/home/startsWith.jar ;--give the path of the jar file
A = LOAD
'/home/Sample.txt' using PigStorage('\t') AS
(f1:chararray,f2:chararray,f3:chararray);
B = FOREACH A
GENERATE f1,f2,com.tcs.startsWith(f3,'c');--call the udf using fully
qualified class name
DUMP B;--Displays
the contents of B
import java.io.IOException;
import org.apache.pig.FilterFunc;
import org.apache.pig.PigWarning;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.Tuple;
/**
* This UDF is used to Test if string starts with the specified prefix. Note
* this function returns true if the character sequence represented by the
* argument is a prefix of the character sequence represented by the string.
* false otherwise. Note also that true will be returned if the 2nd
* argument(prefix) is an empty string or is equal to this String
* */
public class startsWith extends FilterFunc {
@Override
public Boolean exec(Tuple input) throws IOException {
if (input.size() != 2) {
String msg = "startsWith: Only 2 parameters are allowed.";
throw new IOException(msg);
}
if (input.get(0) == null) {
return null;
}
try {
if (!input.get(1).equals(null)) {
String str1 = (String) input.get(0);
String str2 = (String) input.get(1);
return str1.startsWith(str2);
}
} catch (ExecException e) {
warn("Error reading input: " + e.getMessage(),
PigWarning.UDF_WARNING_1);
}
return null;
}
}