forked from docs/doc-exports
Reviewed-by: Pruthi, Vineet <vineet.pruthi@t-systems.com> Co-authored-by: Su, Xiaomeng <suxiaomeng1@huawei.com> Co-committed-by: Su, Xiaomeng <suxiaomeng1@huawei.com>
222 lines
16 KiB
HTML
222 lines
16 KiB
HTML
<a name="dli_08_15024"></a><a name="dli_08_15024"></a>
|
|
|
|
<h1 class="topictitle1">ORC</h1>
|
|
<div id="body0000001780860885"><div class="section" id="dli_08_15024__section2697143424312"><h4 class="sectiontitle">Function</h4><p id="dli_08_15024__p6670341124310">The Apache ORC format allows to read and write ORC data. For details, see <a href="https://nightlies.apache.org/flink/flink-docs-release-1.15/docs/connectors/table/formats/orc/" target="_blank" rel="noopener noreferrer">ORC Format</a>.</p>
|
|
</div>
|
|
<div class="section" id="dli_08_15024__section122491371116"><h4 class="sectiontitle">Supported Connectors</h4><ul id="dli_08_15024__ul188074312166"><li id="dli_08_15024__li14357112884017">FileSystem</li></ul>
|
|
</div>
|
|
<div class="section" id="dli_08_15024__section1126520512444"><h4 class="sectiontitle">Parameter Description</h4>
|
|
<div class="tablenoborder"><table cellpadding="4" cellspacing="0" summary="" id="dli_08_15024__table51831049681" frame="border" border="1" rules="all"><caption><b>Table 1 </b>Parameters</caption><thead align="left"><tr id="dli_08_15024__row11832491881"><th align="left" class="cellrowborder" valign="top" width="20%" id="mcps1.3.3.2.2.6.1.1"><p id="dli_08_15024__p10183349181">Parameter</p>
|
|
</th>
|
|
<th align="left" class="cellrowborder" valign="top" width="20%" id="mcps1.3.3.2.2.6.1.2"><p id="dli_08_15024__p31834491182">Mandatory</p>
|
|
</th>
|
|
<th align="left" class="cellrowborder" valign="top" width="20%" id="mcps1.3.3.2.2.6.1.3"><p id="dli_08_15024__p1518317494810">Default Value</p>
|
|
</th>
|
|
<th align="left" class="cellrowborder" valign="top" width="20%" id="mcps1.3.3.2.2.6.1.4"><p id="dli_08_15024__p191834491584">Data Type</p>
|
|
</th>
|
|
<th align="left" class="cellrowborder" valign="top" width="20%" id="mcps1.3.3.2.2.6.1.5"><p id="dli_08_15024__p91839491485">Description</p>
|
|
</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody><tr id="dli_08_15024__row3183849987"><td class="cellrowborder" valign="top" width="20%" headers="mcps1.3.3.2.2.6.1.1 "><p id="dli_08_15024__p94431926691">format</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="20%" headers="mcps1.3.3.2.2.6.1.2 "><p id="dli_08_15024__p161837491988">Yes</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="20%" headers="mcps1.3.3.2.2.6.1.3 "><p id="dli_08_15024__p11832491884">None</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="20%" headers="mcps1.3.3.2.2.6.1.4 "><p id="dli_08_15024__p131838495813">String</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="20%" headers="mcps1.3.3.2.2.6.1.5 "><p id="dli_08_15024__p41836492815">Specify what format to use, here should be <strong id="dli_08_15024__b417663211206">orc</strong>.</p>
|
|
</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
<p id="dli_08_15024__p1052712584813">ORC format also supports table properties from <a href="https://orc.apache.org/docs/hive-config.html#table-properties" target="_blank" rel="noopener noreferrer">Table properties</a>. For example, you can configure <strong id="dli_08_15024__b13677161781714">orc.compress=SNAPPY</strong> to enable snappy compression.</p>
|
|
</div>
|
|
<div class="section" id="dli_08_15024__section1247201119581"><h4 class="sectiontitle">Data Type Mapping</h4><p id="dli_08_15024__p17882517105810">ORC format type mapping is compatible with Apache Hive. The following table lists the type mapping from Flink type to ORC type.</p>
|
|
|
|
<div class="tablenoborder"><table cellpadding="4" cellspacing="0" summary="" id="dli_08_15024__table663194019588" frame="border" border="1" rules="all"><caption><b>Table 2 </b>Data type mapping</caption><thead align="left"><tr id="dli_08_15024__row12631164025815"><th align="left" class="cellrowborder" valign="top" width="33.33333333333333%" id="mcps1.3.4.3.2.4.1.1"><p id="dli_08_15024__p46311940115810">Flink SQL Type</p>
|
|
</th>
|
|
<th align="left" class="cellrowborder" valign="top" width="33.33333333333333%" id="mcps1.3.4.3.2.4.1.2"><p id="dli_08_15024__p15631194065817">ORC Physical Type</p>
|
|
</th>
|
|
<th align="left" class="cellrowborder" valign="top" width="33.33333333333333%" id="mcps1.3.4.3.2.4.1.3"><p id="dli_08_15024__p163194065812">ORC Logical Type</p>
|
|
</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody><tr id="dli_08_15024__row26311408588"><td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.1 "><p id="dli_08_15024__p9631174075812">CHAR</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.2 "><p id="dli_08_15024__p136311040195810">bytes</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.3 "><p id="dli_08_15024__p1663114407585">CHAR</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="dli_08_15024__row1631144075816"><td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.1 "><p id="dli_08_15024__p11631140185819">VARCHAR</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.2 "><p id="dli_08_15024__p0631184055811">bytes</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.3 "><p id="dli_08_15024__p15631204011583">VARCHAR</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="dli_08_15024__row7631164045814"><td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.1 "><p id="dli_08_15024__p13631114035815">STRING</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.2 "><p id="dli_08_15024__p1663174015585">bytes</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.3 "><p id="dli_08_15024__p20631134085814">STRING</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="dli_08_15024__row176319409584"><td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.1 "><p id="dli_08_15024__p06317404582">BOOLEAN</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.2 "><p id="dli_08_15024__p8631124018586">long</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.3 "><p id="dli_08_15024__p6631840145816">BOOLEAN</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="dli_08_15024__row186311340135812"><td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.1 "><p id="dli_08_15024__p14631134045820">BYTES</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.2 "><p id="dli_08_15024__p136311640145820">bytes</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.3 "><p id="dli_08_15024__p176311740165817">BINARY</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="dli_08_15024__row9631104075820"><td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.1 "><p id="dli_08_15024__p166310408584">DECIMAL</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.2 "><p id="dli_08_15024__p1263164015812">decimal</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.3 "><p id="dli_08_15024__p2631540205811">DECIMAL</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="dli_08_15024__row9631124005811"><td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.1 "><p id="dli_08_15024__p46311040195818">TINYINT</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.2 "><p id="dli_08_15024__p2063194015581">long</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.3 "><p id="dli_08_15024__p15631040185817">BYTE</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="dli_08_15024__row19631140165813"><td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.1 "><p id="dli_08_15024__p166322040105817">SMALLINT</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.2 "><p id="dli_08_15024__p1863214408580">long</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.3 "><p id="dli_08_15024__p663215401586">SHORT</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="dli_08_15024__row563224015589"><td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.1 "><p id="dli_08_15024__p3632104095816">INT</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.2 "><p id="dli_08_15024__p163214407589">long</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.3 "><p id="dli_08_15024__p9632124055817">INT</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="dli_08_15024__row16323402588"><td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.1 "><p id="dli_08_15024__p1263220403588">BIGINT</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.2 "><p id="dli_08_15024__p163214019581">long</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.3 "><p id="dli_08_15024__p2063216403585">LONG</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="dli_08_15024__row1363244075810"><td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.1 "><p id="dli_08_15024__p126321240165811">FLOAT</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.2 "><p id="dli_08_15024__p1163211404584">double</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.3 "><p id="dli_08_15024__p186321340145819">FLOAT</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="dli_08_15024__row76321040185810"><td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.1 "><p id="dli_08_15024__p17632540155810">DOUBLE</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.2 "><p id="dli_08_15024__p13632340175814">double</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.3 "><p id="dli_08_15024__p463215402587">DOUBLE</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="dli_08_15024__row19632204012585"><td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.1 "><p id="dli_08_15024__p76321440175819">DATE</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.2 "><p id="dli_08_15024__p56321340125810">long</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.3 "><p id="dli_08_15024__p463204085810">DATE</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="dli_08_15024__row18632134017585"><td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.1 "><p id="dli_08_15024__p7632640205820">TIMESTAMP</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.2 "><p id="dli_08_15024__p15632440205813">timestamp</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.3 "><p id="dli_08_15024__p1632134019580">TIMESTAMP</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="dli_08_15024__row563214402580"><td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.1 "><p id="dli_08_15024__p13632104015583">ARRAY</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.2 "><p id="dli_08_15024__p16632104095819">-</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.3 "><p id="dli_08_15024__p12632124010588">LIST</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="dli_08_15024__row186321040145814"><td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.1 "><p id="dli_08_15024__p863274011583">MAP</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.2 "><p id="dli_08_15024__p1163212408584">-</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.3 "><p id="dli_08_15024__p463220408584">MAP</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="dli_08_15024__row1963294010585"><td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.1 "><p id="dli_08_15024__p1632124011586">ROW</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.2 "><p id="dli_08_15024__p2632194011581">-</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="33.33333333333333%" headers="mcps1.3.4.3.2.4.1.3 "><p id="dli_08_15024__p2632240115815">STRUCT</p>
|
|
</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
<div class="section" id="dli_08_15024__section457955774517"><h4 class="sectiontitle">Example</h4><p id="dli_08_15024__p15881132116016">Use Kafka to send data and output the data to Print.</p>
|
|
<ol id="dli_08_15024__ol840395722311"><li id="dli_08_15024__li04031578234"><span>Create a datasource connection for the communication with the VPC and subnet where Kafka locates and bind the connection to the queue. Set a security group and inbound rule to allow access of the queue and test the connectivity of the queue using the Kafka IP address. For example, locate a general-purpose queue where the job runs and choose <strong id="dli_08_15024__b121805181142624">More</strong> > <strong id="dli_08_15024__b144664762542624">Test Address Connectivity</strong> in the <strong id="dli_08_15024__b13838900442624">Operation</strong> column. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.</span></li><li id="dli_08_15024__li1599913011242"><span>Create a Flink OpenSource SQL job and enable checkpointing. Copy the following statement and submit the job:</span><p><pre class="screen" id="dli_08_15024__screen299960162418">CREATE TABLE kafkaSource (
|
|
order_id string,
|
|
order_channel string,
|
|
order_time string,
|
|
pay_amount double,
|
|
real_pay double,
|
|
pay_time string,
|
|
user_id string,
|
|
user_name string,
|
|
area_id string
|
|
) WITH (
|
|
'connector' = 'kafka',
|
|
'topic-pattern' = <em id="dli_08_15024__i202647434911"><strong id="dli_08_15024__b16264194311914">kafkaTopic</strong></em>',
|
|
'properties.bootstrap.servers' = '<em id="dli_08_15024__i44191052793"><strong id="dli_08_15024__b144196521996">KafkaAddress1:KafkaPort,KafkaAddress2:KafkaPort</strong></em>',
|
|
'properties.group.id' = '<em id="dli_08_15024__i171985713915"><strong id="dli_08_15024__b55924560916">GroupId</strong></em>'',
|
|
'scan.startup.mode' = 'latest-offset',
|
|
'format' = 'csv'
|
|
);
|
|
|
|
|
|
CREATE TABLE sink (
|
|
order_id string,
|
|
order_channel string,
|
|
order_time string,
|
|
pay_amount double,
|
|
real_pay double,
|
|
pay_time string,
|
|
user_id string,
|
|
user_name string,
|
|
area_id string
|
|
) WITH (
|
|
'connector' = 'filesystem',
|
|
'format' = 'orc',
|
|
'path' = 'obs://xx'
|
|
);
|
|
insert into sink select * from kafkaSource; </pre>
|
|
</p></li><li id="dli_08_15024__li1511420343241"><span>Insert the following data into the source Kafka topic:</span><p><pre class="screen" id="dli_08_15024__screen107391221112410">202103251505050001,appshop,2021-03-25 15:05:05,500.00,400.00,2021-03-25 15:10:00,0003,Cindy,330108
|
|
|
|
202103241606060001,appShop,2021-03-24 16:06:06,200.00,180.00,2021-03-24 16:10:06,0001,Alice,330106</pre>
|
|
</p></li><li id="dli_08_15024__li4353143193117"><span>Read the ORC file in the OBS path configured in the sink table. The data results are as follows:</span><p><pre class="screen" id="dli_08_15024__screen14251955184812">202103251202020001, miniAppShop, 2021-03-25 12:02:02, 60.0, 60.0, 2021-03-25 12:03:00, 0002, Bob, 330110
|
|
|
|
202103241606060001, appShop, 2021-03-24 16:06:06, 200.0, 180.0, 2021-03-24 16:10:06, 0001, Alice, 330106</pre>
|
|
</p></li></ol>
|
|
</div>
|
|
</div>
|
|
<div>
|
|
<div class="familylinks">
|
|
<div class="parentlink"><strong>Parent topic:</strong> <a href="dli_08_15014.html">Formats</a></div>
|
|
</div>
|
|
</div>
|
|
|