Code snippet
showing how to call an in built as well as user defined function from
pig latin
Pig Script Calling
in-built function
A = LOAD
'/home/Sample.txt' using PigStorage('\t') AS
(f1:chararray,f2:chararray,f3:chararray);--Sample.txt is delimited
with '\t'
B = FOREACH A
GENERATE f1,f2,f3,UPPER(f3);--Calling the inbuilt function UPPER
DUMP B;--Displays
the contents of B
Pig Script Calling
UDF
Register
/home/startsWith.jar ;--give the path of the jar file
A = LOAD
'/home/Sample.txt' using PigStorage('\t') AS
(f1:chararray,f2:chararray,f3:chararray);
B = FOREACH A
GENERATE f1,f2,com.tcs.startsWith(f3,'c');--call the udf using fully
qualified class name
DUMP B;--Displays
the contents of B
import java.io.IOException; import org.apache.pig.FilterFunc; import org.apache.pig.PigWarning; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.data.Tuple; /** * This UDF is used to Test if string starts with the specified prefix. Note * this function returns true if the character sequence represented by the * argument is a prefix of the character sequence represented by the string. * false otherwise. Note also that true will be returned if the 2nd * argument(prefix) is an empty string or is equal to this String * */ public class startsWith extends FilterFunc { @Override public Boolean exec(Tuple input) throws IOException { if (input.size() != 2) { String msg = "startsWith: Only 2 parameters are allowed."; throw new IOException(msg); } if (input.get(0) == null) { return null; } try { if (!input.get(1).equals(null)) { String str1 = (String) input.get(0); String str2 = (String) input.get(1); return str1.startsWith(str2); } } catch (ExecException e) { warn("Error reading input: " + e.getMessage(), PigWarning.UDF_WARNING_1); } return null; } }