Files
doc-exports/docs/dli/dev/dli_09_0204.html
Hasko, Vladimir cfc48b3aed dli_dev_0104_version
Reviewed-by: Pruthi, Vineet <vineet.pruthi@t-systems.com>
Co-authored-by: Hasko, Vladimir <vladimir.hasko@t-systems.com>
Co-committed-by: Hasko, Vladimir <vladimir.hasko@t-systems.com>
2024-05-06 09:14:57 +00:00

233 lines
23 KiB
HTML

<a name="dli_09_0204"></a><a name="dli_09_0204"></a>
<h1 class="topictitle1">Calling UDTFs in Spark SQL Jobs</h1>
<div id="body0000001200072898"><div class="section" id="dli_09_0204__en-us_topic_0206789796_section20910549205110"><h4 class="sectiontitle">Scenario</h4><p id="dli_09_0204__en-us_topic_0206789796_p1383510563517">You can use Hive User-Defined Table-Generating Functions (UDTF) to customize table-valued functions. Hive UDTFs are used for the one-in-multiple-out data operations. UDTF reads a row of data and output multiple values.</p>
</div>
<div class="section" id="dli_09_0204__section1552203831916"><h4 class="sectiontitle">Constraints</h4><ul id="dli_09_0204__ul19170107122012"><li id="dli_09_0204__li717014702012">To perform UDTF-related operations on DLI, you need to create a SQL queue instead of using the default queue.</li><li id="dli_09_0204__li28920326217">When UDTFs are used by multiple accounts, other users, except the user who creates them, need to be authorized before using the UDTF. The authorization operations are as follows:<p id="dli_09_0204__p146357331211"><a name="dli_09_0204__li28920326217"></a><a name="li28920326217"></a>Log in to the DLI console and choose <strong id="dli_09_0204__b1714094114312">Data Management</strong> &gt; <strong id="dli_09_0204__b014534112314">Package Management</strong>. On the displayed page, select your UDTF Jar package and click <strong id="dli_09_0204__b14145204103113">Manage Permissions</strong> in the <strong id="dli_09_0204__b1914614153114">Operation</strong> column. On the permission management page, click <strong id="dli_09_0204__b41461441143115">Grant Permission</strong> in the upper right corner and select the required permissions.</p>
</li><li id="dli_09_0204__li192997411761">If you use a static class or interface in a UDF, add <strong id="dli_09_0204__b13385125022812">try catch</strong> to capture exceptions. Otherwise, package conflicts may occur.</li></ul>
</div>
<div class="section" id="dli_09_0204__section199842111628"><h4 class="sectiontitle">Environment Preparations</h4><p id="dli_09_0204__p8202163717211">Before you start, set up the development environment.</p>
<div class="tablenoborder"><table cellpadding="4" cellspacing="0" summary="" id="dli_09_0204__table15851625229" frame="border" border="1" rules="all"><caption><b>Table 1 </b>Development environment</caption><thead align="left"><tr id="dli_09_0204__row11859253210"><th align="left" class="cellrowborder" valign="top" width="27.63%" id="mcps1.3.3.3.2.3.1.1"><p id="dli_09_0204__p9852251528">Item</p>
</th>
<th align="left" class="cellrowborder" valign="top" width="72.37%" id="mcps1.3.3.3.2.3.1.2"><p id="dli_09_0204__p8851725529">Description</p>
</th>
</tr>
</thead>
<tbody><tr id="dli_09_0204__row78519251429"><td class="cellrowborder" valign="top" width="27.63%" headers="mcps1.3.3.3.2.3.1.1 "><p id="dli_09_0204__p108522517216">OS</p>
</td>
<td class="cellrowborder" valign="top" width="72.37%" headers="mcps1.3.3.3.2.3.1.2 "><p id="dli_09_0204__p20851825626">Windows 7 or later</p>
</td>
</tr>
<tr id="dli_09_0204__row18851325325"><td class="cellrowborder" valign="top" width="27.63%" headers="mcps1.3.3.3.2.3.1.1 "><p id="dli_09_0204__p1885825624">JDK</p>
</td>
<td class="cellrowborder" valign="top" width="72.37%" headers="mcps1.3.3.3.2.3.1.2 "><p id="dli_09_0204__p8859251424">JDK 1.8.</p>
</td>
</tr>
<tr id="dli_09_0204__row24601502619"><td class="cellrowborder" valign="top" width="27.63%" headers="mcps1.3.3.3.2.3.1.1 "><p id="dli_09_0204__p16497910469">IntelliJ IDEA</p>
</td>
<td class="cellrowborder" valign="top" width="72.37%" headers="mcps1.3.3.3.2.3.1.2 "><p id="dli_09_0204__p84601601562">This tool is used for application development. The version of the tool must be 2019.1 or other compatible versions.</p>
</td>
</tr>
<tr id="dli_09_0204__row53111251665"><td class="cellrowborder" valign="top" width="27.63%" headers="mcps1.3.3.3.2.3.1.1 "><p id="dli_09_0204__p831117511968">Maven</p>
</td>
<td class="cellrowborder" valign="top" width="72.37%" headers="mcps1.3.3.3.2.3.1.2 "><p id="dli_09_0204__p23118511064">Basic configurations of the development environment. Maven is used for project management throughout the lifecycle of software development.</p>
</td>
</tr>
</tbody>
</table>
</div>
</div>
<div class="section" id="dli_09_0204__section54791739112210"><h4 class="sectiontitle">Development Process</h4><div class="p" id="dli_09_0204__p892144112221">The process of developing a UDTF is as follows:<div class="fignone" id="dli_09_0204__fig1631918166341"><span class="figcap"><b>Figure 1 </b>Development process</span><br><span><img id="dli_09_0204__image1931991612342" src="en-us_image_0000001200075414.png"></span></div>
<div class="tablenoborder"><table cellpadding="4" cellspacing="0" summary="" id="dli_09_0204__table1421119391677" frame="border" border="1" rules="all"><caption><b>Table 2 </b>Process description</caption><thead align="left"><tr id="dli_09_0204__row11211153918715"><th align="left" class="cellrowborder" valign="top" width="6.830601092896176%" id="mcps1.3.4.2.2.2.5.1.1"><p id="dli_09_0204__p11573151398">No.</p>
</th>
<th align="left" class="cellrowborder" valign="top" width="23.936377829820454%" id="mcps1.3.4.2.2.2.5.1.2"><p id="dli_09_0204__p8211239475">Phase</p>
</th>
<th align="left" class="cellrowborder" valign="top" width="10.685011709601874%" id="mcps1.3.4.2.2.2.5.1.3"><p id="dli_09_0204__p167011419911">Software Portal</p>
</th>
<th align="left" class="cellrowborder" valign="top" width="58.548009367681495%" id="mcps1.3.4.2.2.2.5.1.4"><p id="dli_09_0204__p1921103911712">Description</p>
</th>
</tr>
</thead>
<tbody><tr id="dli_09_0204__row102114391879"><td class="cellrowborder" valign="top" width="6.830601092896176%" headers="mcps1.3.4.2.2.2.5.1.1 "><p id="dli_09_0204__p65761516918">1</p>
</td>
<td class="cellrowborder" valign="top" width="23.936377829820454%" headers="mcps1.3.4.2.2.2.5.1.2 "><p id="dli_09_0204__p4211133911710">Create a Maven project and configure the POM file.</p>
</td>
<td class="cellrowborder" rowspan="3" valign="top" width="10.685011709601874%" headers="mcps1.3.4.2.2.2.5.1.3 "><p id="dli_09_0204__p81691210101">IntelliJ IDEA</p>
</td>
<td class="cellrowborder" rowspan="3" valign="top" width="58.548009367681495%" headers="mcps1.3.4.2.2.2.5.1.4 "><p id="dli_09_0204__p321103914719"></p>
<p id="dli_09_0204__p152111391671">Write UDTF code by referring the steps in <a href="#dli_09_0204__en-us_topic_0206789796_section164701187527">Procedure</a>.</p>
<p id="dli_09_0204__p694692512124"></p>
</td>
</tr>
<tr id="dli_09_0204__row1211123914712"><td class="cellrowborder" valign="top" headers="mcps1.3.4.2.2.2.5.1.1 "><p id="dli_09_0204__p55731512916">2</p>
</td>
<td class="cellrowborder" valign="top" headers="mcps1.3.4.2.2.2.5.1.2 "><p id="dli_09_0204__p16211739576">Write UDTF code.</p>
</td>
</tr>
<tr id="dli_09_0204__row79452250121"><td class="cellrowborder" valign="top" headers="mcps1.3.4.2.2.2.5.1.1 "><p id="dli_09_0204__p79461255124">3</p>
</td>
<td class="cellrowborder" valign="top" headers="mcps1.3.4.2.2.2.5.1.2 "><p id="dli_09_0204__p10946172551215">Debug, compile, and pack the code into a Jar package.</p>
</td>
</tr>
<tr id="dli_09_0204__row86521956191210"><td class="cellrowborder" valign="top" width="6.830601092896176%" headers="mcps1.3.4.2.2.2.5.1.1 "><p id="dli_09_0204__p7652456101218">4</p>
</td>
<td class="cellrowborder" valign="top" width="23.936377829820454%" headers="mcps1.3.4.2.2.2.5.1.2 "><p id="dli_09_0204__p10652185691214">Upload the Jar package to OBS.</p>
</td>
<td class="cellrowborder" valign="top" width="10.685011709601874%" headers="mcps1.3.4.2.2.2.5.1.3 "><p id="dli_09_0204__p565211562128">OBS console</p>
</td>
<td class="cellrowborder" valign="top" width="58.548009367681495%" headers="mcps1.3.4.2.2.2.5.1.4 "><p id="dli_09_0204__p1165216565129">Upload the UDTF Jar file to an OBS directory.</p>
</td>
</tr>
<tr id="dli_09_0204__row18133049101414"><td class="cellrowborder" valign="top" width="6.830601092896176%" headers="mcps1.3.4.2.2.2.5.1.1 "><p id="dli_09_0204__p1513384931416">5</p>
</td>
<td class="cellrowborder" valign="top" width="23.936377829820454%" headers="mcps1.3.4.2.2.2.5.1.2 "><p id="dli_09_0204__p17133194971413">Create the UDTF on DLI.</p>
</td>
<td class="cellrowborder" valign="top" width="10.685011709601874%" headers="mcps1.3.4.2.2.2.5.1.3 "><p id="dli_09_0204__p11133449181419">DLI console</p>
</td>
<td class="cellrowborder" valign="top" width="58.548009367681495%" headers="mcps1.3.4.2.2.2.5.1.4 "><p id="dli_09_0204__p107651124156">Create a UDTF on the SQL job management page of the DLI console.</p>
</td>
</tr>
<tr id="dli_09_0204__row9403719162"><td class="cellrowborder" valign="top" width="6.830601092896176%" headers="mcps1.3.4.2.2.2.5.1.1 "><p id="dli_09_0204__p134035191618">6</p>
</td>
<td class="cellrowborder" valign="top" width="23.936377829820454%" headers="mcps1.3.4.2.2.2.5.1.2 "><p id="dli_09_0204__p114038181618">Verify and use the UDTF on DLI.</p>
</td>
<td class="cellrowborder" valign="top" width="10.685011709601874%" headers="mcps1.3.4.2.2.2.5.1.3 "><p id="dli_09_0204__p184101541614">DLI console</p>
</td>
<td class="cellrowborder" valign="top" width="58.548009367681495%" headers="mcps1.3.4.2.2.2.5.1.4 "><p id="dli_09_0204__p17403415169">Use the UDTF in your DLI job.</p>
</td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
<div class="section" id="dli_09_0204__en-us_topic_0206789796_section164701187527"><a name="dli_09_0204__en-us_topic_0206789796_section164701187527"></a><a name="en-us_topic_0206789796_section164701187527"></a><h4 class="sectiontitle">Procedure</h4><ol id="dli_09_0204__en-us_topic_0206789796_ol1580116925614"><li id="dli_09_0204__en-us_topic_0206789796_li2355755226">Create a Maven project and configure the POM file. This step uses IntelliJ IDEA 2020.2 as an example.<ol type="a" id="dli_09_0204__ol38661824151412"><li id="dli_09_0204__li0428142116145">Start IntelliJ IDEA and choose <strong id="dli_09_0204__b220472123518">File</strong> &gt; <strong id="dli_09_0204__b1620419243513">New</strong> &gt; <strong id="dli_09_0204__b7204172163512">Project</strong>.<div class="fignone" id="dli_09_0204__fig1364345165410"><span class="figcap"><b>Figure 2 </b>Creating a project</span><br><span><img id="dli_09_0204__image10643135165415" src="en-us_image_0000001245542509.png"></span></div>
</li><li id="dli_09_0204__li13857332152816">Choose <strong id="dli_09_0204__b23885066474014">Maven</strong>, set <strong id="dli_09_0204__b881609174014">Project SDK</strong> to <strong id="dli_09_0204__b83552336674014">1.8</strong>, and click <strong id="dli_09_0204__b51985072374014">Next</strong>.<div class="fignone" id="dli_09_0204__fig12685314115611"><span class="figcap"><b>Figure 3 </b>Choosing Maven</span><br><span><img id="dli_09_0204__image1026247102814" src="en-us_image_0000001245010109.png"></span></div>
</li><li id="dli_09_0204__li1974116643214">Set the project name, configure the storage path, and click <strong id="dli_09_0204__b195782354274014">Finish</strong>.<div class="fignone" id="dli_09_0204__fig61971323105314"><span class="figcap"><b>Figure 4 </b>Creating a project</span><br><span><img id="dli_09_0204__image977853563211" src="en-us_image_0000001245210469.png"></span></div>
</li><li id="dli_09_0204__li56201025357">Add the following content to the <strong id="dli_09_0204__b131655565574014">pom.xml</strong> file.<pre class="screen" id="dli_09_0204__screen175551817363">&lt;dependencies&gt;
&lt;dependency&gt;
&lt;groupId&gt;org.apache.hive&lt;/groupId&gt;
&lt;artifactId&gt;hive-exec&lt;/artifactId&gt;
&lt;version&gt;1.2.1&lt;/version&gt;
&lt;/dependency&gt;
&lt;/dependencies&gt;</pre>
<div class="p" id="dli_09_0204__p1141017953718"><div class="fignone" id="dli_09_0204__fig258295814571"><span class="figcap"><b>Figure 5 </b>Adding configurations to the POM file</span><br><span><img id="dli_09_0204__image1058295817572" src="en-us_image_0000001245542693.png"></span></div>
</div>
</li><li id="dli_09_0204__li532734873814">Choose <strong id="dli_09_0204__b10616165514244">src</strong> &gt; <strong id="dli_09_0204__b14617755122412">main</strong> and right-click the <strong id="dli_09_0204__b261717555240">java</strong> folder. Choose <strong id="dli_09_0204__b461765572419">New</strong> &gt; <strong id="dli_09_0204__b9617195562415">Package</strong> to create a package and a class file.<div class="fignone" id="dli_09_0204__fig097518319548"><span class="figcap"><b>Figure 6 </b>Creating a package and a class file</span><br><span><img id="dli_09_0204__image12629101314110" src="en-us_image_0000001245011273.png"></span></div>
<p id="dli_09_0204__p242315145436">Set the package name as you need. Then, press <strong id="dli_09_0204__b1216971175015">Enter</strong>.</p>
<p id="dli_09_0204__p14790156134412">Create a Java Class file in the package path. In this example, the Java Class file is <strong id="dli_09_0204__b3367418256">UDTFSplit</strong>.</p>
</li></ol>
</li><li id="dli_09_0204__li162811928115118">Write UDTF code. For sample code, see <a href="#dli_09_0204__en-us_topic_0206789796_section10593204711240">Sample Code</a>.<p id="dli_09_0204__p191561247145412">The UDTF class must inherit <strong id="dli_09_0204__b5297134017354">org.apache.hadoop.hive.ql.udf.generic.GenericUDTF</strong> to implement the <strong id="dli_09_0204__b7812184315352">initialize</strong>, <strong id="dli_09_0204__b1586024443512">process</strong>, and <strong id="dli_09_0204__b9291448163513">close</strong> methods.</p>
<ol type="a" id="dli_09_0204__ol49451957145416"><li id="dli_09_0204__li1380025315545">Call the <strong id="dli_09_0204__b595312147367">initialize</strong> method in the UDTF. This method returns the information about the returned data rows of the UDTF, such as the number and type.</li><li id="dli_09_0204__li613815335583">Call the <strong id="dli_09_0204__b384932353715">process</strong> method to process data. Each time <strong id="dli_09_0204__b112263613720">forward()</strong> is called in the <strong id="dli_09_0204__b1227181713817">process</strong> method, a row is generated.<div class="p" id="dli_09_0204__p18262103414588">If multiple columns are generated, you can put the values in an array and pass the array to the <strong id="dli_09_0204__b7901450183920">forward()</strong> function.<pre class="screen" id="dli_09_0204__screen1534918446597">public void process(Object[] args) throws HiveException {
// TODO Auto-generated method stub
if(args.length == 0){
return;
}
String input = args[0].toString();
if(StringUtils.isEmpty(input)){
return;
}
String[] test = input.split(";");
for (int i = 0; i &lt; test.length; i++) {
try {
<strong id="dli_09_0204__b457921172111">String[] result = test[i].split(":");</strong>
<strong id="dli_09_0204__b17582310213"> forward(result);</strong>
} catch (Exception e) {
continue;
}
}
}</pre>
</div>
</li><li id="dli_09_0204__li1170460556">Call the <strong id="dli_09_0204__b136391719184011">close</strong> method to clear methods that need to be closed.</li></ol>
</li><li id="dli_09_0204__li12693471628">Use IntelliJ IDEA to compile the code and pack it into the JAR package.<ol type="a" id="dli_09_0204__ol3387191918248"><li id="dli_09_0204__li469121413243">Click <strong id="dli_09_0204__b462213734210">Maven</strong> in the tool bar on the right, and click <strong id="dli_09_0204__b17622147194218">clean</strong> and <strong id="dli_09_0204__b13623117154212">compile</strong> to compile the code.<p id="dli_09_0204__p1190194252510">After the compilation is successful, click <strong id="dli_09_0204__b1499081374218">package</strong>.</p>
<p id="dli_09_0204__p7583182817281">The generated JAR package is stored in the <strong id="dli_09_0204__b1481738184320">target</strong> directory. In this example, <strong id="dli_09_0204__b1038054074311">MyUDTF-1.0-SNAPSHOT.jar</strong> is stored in <strong id="dli_09_0204__b1638034017435">D:\MyUDTF\target</strong>.</p>
</li></ol>
</li><li id="dli_09_0204__li124681258143211">Log in to the OBS console and upload the file to the OBS path.<div class="note" id="dli_09_0204__note1529413575330"><img src="public_sys-resources/note_3.0-en-us.png"><span class="notetitle"> </span><div class="notebody"><p id="dli_09_0204__p429435710335">The region of the OBS bucket to which the Jar package is uploaded must be the same as the region of the DLI queue. Cross-region operations are not allowed.</p>
</div></div>
</li><li id="dli_09_0204__li17620361980">(Optional) Upload the file to DLI for package management.<ol type="a" id="dli_09_0204__ol126811502141"><li id="dli_09_0204__li146162591139">Log in to the DLI management console and choose <strong id="dli_09_0204__b196401444442">Data Management</strong> &gt; <strong id="dli_09_0204__b15646149449">Package Management</strong>.</li><li id="dli_09_0204__li12933172414145">On the <strong id="dli_09_0204__b113104256674014">Package Management</strong> page, click <strong id="dli_09_0204__b71653142574014">Create</strong> in the upper right corner.</li><li id="dli_09_0204__li8291571157">In the <strong id="dli_09_0204__b5057709474014">Create Package</strong> dialog, set the following parameters:<ol class="substepthirdol" id="dli_09_0204__ol15544204152016"><li id="dli_09_0204__li773463514165"><strong id="dli_09_0204__b153651978074014">Type</strong>: Select <strong id="dli_09_0204__b137342616274014">JAR</strong>.</li><li id="dli_09_0204__li1860664711190"><strong id="dli_09_0204__b54385167974014">OBS Path</strong>: Specify the OBS path for storing the package.</li><li id="dli_09_0204__li182111619162020">Set <strong id="dli_09_0204__b52630364674014">Group</strong> and <strong id="dli_09_0204__b182519162174014">Group Name</strong> as required for package identification and management.</li></ol>
</li><li id="dli_09_0204__li19882201371513">Click <strong id="dli_09_0204__b73643173974014">OK</strong>.</li></ol>
</li><li id="dli_09_0204__en-us_topic_0206789796_li9516133616203"><a name="dli_09_0204__en-us_topic_0206789796_li9516133616203"></a><a name="en-us_topic_0206789796_li9516133616203"></a>Create the UDTF on DLI.<ol type="a" id="dli_09_0204__ol10758142517377"><li id="dli_09_0204__li128611920173710">Log in to the DLI console, choose <strong id="dli_09_0204__b1842591813448">SQL Editor</strong>. Set <strong id="dli_09_0204__b143116183443">Engine</strong> to <strong id="dli_09_0204__b343112188444">spark</strong>, and select the created SQL queue and database.</li><li id="dli_09_0204__li114046144815">In the SQL editing area, enter the path of the JAR file to be uploaded to create a UDTF and click <strong id="dli_09_0204__b169211019173517">Execute</strong>.<pre class="screen" id="dli_09_0204__screen9585108193711">CREATE FUNCTION mytestsplit AS 'com.demo.UDTFSplit' using jar 'obs://dli-test-obs01/MyUDTF-1.0-SNAPSHOT.jar';</pre>
</li></ol>
</li><li id="dli_09_0204__li1547203712127">Restart the original SQL queue for the added function to take effect.<ol type="a" id="dli_09_0204__ol6915156171419"><li id="dli_09_0204__li195347431411">Log in to the DLI management console and choose <strong id="dli_09_0204__b2378324113515">Resources</strong> &gt; <strong id="dli_09_0204__b1537852414352">Queue Management</strong> from the navigation pane. In the <strong id="dli_09_0204__b193781024103510">Operation</strong> column of the SQL queue job, click <strong id="dli_09_0204__b16378192416354">Restart</strong>.</li><li id="dli_09_0204__li770731015168">In the <strong id="dli_09_0204__b5292105618457">Restart</strong> dialog box, click <strong id="dli_09_0204__b329295624510">OK</strong>.</li></ol>
</li><li id="dli_09_0204__en-us_topic_0206789796_li816783552118">Verify and use the UDTF on DLI.<p id="dli_09_0204__en-us_topic_0206789796_p1064914469213"><a name="dli_09_0204__en-us_topic_0206789796_li816783552118"></a><a name="en-us_topic_0206789796_li816783552118"></a>Use the UDTF created in <a href="#dli_09_0204__en-us_topic_0206789796_li9516133616203">6</a> in the SELECT statement as follows:</p>
<pre class="screen" id="dli_09_0204__screen5786134210256">select mytestsplit('abc:123\;efd:567\;utf:890');</pre>
<p id="dli_09_0204__p043282031610"></p>
</li><li id="dli_09_0204__en-us_topic_0206789796_li7751241152315">(Optional) Delete the UDTF.<p id="dli_09_0204__en-us_topic_0206789796_p2654842182313"><a name="dli_09_0204__en-us_topic_0206789796_li7751241152315"></a><a name="en-us_topic_0206789796_li7751241152315"></a>If this function is no longer used, run the following statement to delete the function:</p>
<pre class="screen" id="dli_09_0204__screen58694547259">Drop FUNCTION mytestsplit;</pre>
</li></ol>
</div>
<div class="section" id="dli_09_0204__en-us_topic_0206789796_section10593204711240"><a name="dli_09_0204__en-us_topic_0206789796_section10593204711240"></a><a name="en-us_topic_0206789796_section10593204711240"></a><h4 class="sectiontitle">Sample Code</h4><p id="dli_09_0204__en-us_topic_0206789796_p849427102520">The complete <strong id="dli_09_0204__b7546153194718">UDTFSplit.java</strong> code is as follows:</p>
<pre class="screen" id="dli_09_0204__en-us_topic_0206789796_screen122633163018">import java.util.ArrayList;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
public class UDTFSplit extends GenericUDTF {
@Override
public void close() throws HiveException {
// TODO Auto-generated method stub
}
@Override
public void process(Object[] args) throws HiveException {
// TODO Auto-generated method stub
if(args.length == 0){
return;
}
String input = args[0].toString();
if(StringUtils.isEmpty(input)){
return;
}
String[] test = input.split(";");
for (int i = 0; i &lt; test.length; i++) {
try {
String[] result = test[i].split(":");
forward(result);
} catch (Exception e) {
continue;
}
}
}
@Override
public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
if (args.length != 1) {
throw new UDFArgumentLengthException("ExplodeMap takes only one argument");
}
if (args[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
throw new UDFArgumentException("ExplodeMap takes string as a parameter");
}
ArrayList&lt;String&gt; fieldNames = new ArrayList&lt;String&gt;();
ArrayList&lt;ObjectInspector&gt; fieldOIs = new ArrayList&lt;ObjectInspector&gt;();
fieldNames.add("col1");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col2");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
}</pre>
</div>
</div>
<div>
<div class="familylinks">
<div class="parentlink"><strong>Parent topic:</strong> <a href="dli_09_0120.html">SQL Jobs</a></div>
</div>
</div>