forked from docs/doc-exports
Reviewed-by: Pruthi, Vineet <vineet.pruthi@t-systems.com> Co-authored-by: Hasko, Vladimir <vladimir.hasko@t-systems.com> Co-committed-by: Hasko, Vladimir <vladimir.hasko@t-systems.com>
264 lines
28 KiB
HTML
264 lines
28 KiB
HTML
<a name="dli_09_0114"></a><a name="dli_09_0114"></a>
|
|
|
|
<h1 class="topictitle1">Scala Example Code</h1>
|
|
<div id="body8662426"><div class="section" id="dli_09_0114__section15381055114517"><h4 class="sectiontitle">Development Description</h4><p id="dli_09_0114__en-us_topic_0204096844_p492312464537">Mongo can be connected only through enhanced datasource connections. </p>
|
|
<div class="note" id="dli_09_0114__note12343132893511"><img src="public_sys-resources/note_3.0-en-us.png"><span class="notetitle"> </span><div class="notebody"><p id="dli_09_0114__p1734422863515">DDS is compatible with the MongoDB protocol.</p>
|
|
</div></div>
|
|
<p id="dli_09_0114__p6629155314372">An enhanced datasource connection has been created on the DLI management console and bound to a queue in packages. </p>
|
|
<div class="note" id="dli_09_0114__note1358715714155"><img src="public_sys-resources/note_3.0-en-us.png"><span class="notetitle"> </span><div class="notebody"><p id="dli_09_0114__p692572617287">Hard-coded or plaintext passwords pose significant security risks. To ensure security, encrypt your passwords, store them in configuration files or environment variables, and decrypt them when needed.</p>
|
|
</div></div>
|
|
<ul id="dli_09_0114__ul132716113464"><li id="dli_09_0114__li108933114461">Constructing dependency information and creating a Spark session<ol id="dli_09_0114__en-us_topic_0204096844_ol433013261137"><li id="dli_09_0114__en-us_topic_0204096844_li1825623917170">Import dependencies.<div class="p" id="dli_09_0114__en-us_topic_0204096844_p0101458105614"><a name="dli_09_0114__en-us_topic_0204096844_li1825623917170"></a><a name="en-us_topic_0204096844_li1825623917170"></a>Maven dependency involved<div class="codecoloring" codetype="Scala" id="dli_09_0114__en-us_topic_0204096844_screen5760163172012"><div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span class="normal">1</span>
|
|
<span class="normal">2</span>
|
|
<span class="normal">3</span>
|
|
<span class="normal">4</span>
|
|
<span class="normal">5</span></pre></div></td><td class="code"><div><pre><span></span><span class="o"><</span><span class="n">dependency</span><span class="o">></span>
|
|
<span class="w"> </span><span class="o"><</span><span class="n">groupId</span><span class="o">></span><span class="n">org</span><span class="p">.</span><span class="n">apache</span><span class="p">.</span><span class="n">spark</span><span class="o"></</span><span class="n">groupId</span><span class="o">></span>
|
|
<span class="w"> </span><span class="o"><</span><span class="n">artifactId</span><span class="o">></span><span class="n">spark</span><span class="o">-</span><span class="n">sql_2</span><span class="mf">.11</span><span class="o"></</span><span class="n">artifactId</span><span class="o">></span>
|
|
<span class="w"> </span><span class="o"><</span><span class="n">version</span><span class="o">></span><span class="mf">2.3.2</span><span class="o"></</span><span class="n">version</span><span class="o">></span>
|
|
<span class="o"></</span><span class="n">dependency</span><span class="o">></span>
|
|
</pre></div></td></tr></table></div>
|
|
|
|
</div>
|
|
</div>
|
|
<div class="p" id="dli_09_0114__en-us_topic_0204096844_p9835524175614">Import dependency packages.<pre class="screen" id="dli_09_0114__en-us_topic_0204096844_screen2329162571618">import org.apache.spark.sql.SparkSession
|
|
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}</pre>
|
|
</div>
|
|
<div class="p" id="dli_09_0114__en-us_topic_0204096844_p32261820125710">Create a session.<pre class="screen" id="dli_09_0114__en-us_topic_0204096844_screen2610193835716">val sparkSession = SparkSession.builder().appName("datasource-mongo").getOrCreate()</pre>
|
|
</div>
|
|
</li></ol>
|
|
</li><li id="dli_09_0114__li128491810164712">Connecting to data sources through SQL APIs<ol id="dli_09_0114__en-us_topic_0204096844_ol14881939125919"><li id="dli_09_0114__en-us_topic_0204096844_li208833912591">Create a table to connect to a Mongo data source.<pre class="screen" id="dli_09_0114__en-us_topic_0204096844_screen189271321503">sparkSession.sql(
|
|
"create table test_dds(id string, name string, age int) using mongo options(
|
|
'url' = '192.168.4.62:8635,192.168.5.134:8635/test?authSource=admin',
|
|
'uri' = 'mongodb://<em id="dli_09_0114__i161001911114619">username</em>:<em id="dli_09_0114__i1982841213461">pwd</em>@<em id="dli_09_0114__i13955514144612">host</em>:8635/<em id="dli_09_0114__i1653231724610">db</em>',
|
|
'database' = 'test',
|
|
'collection' = 'test',
|
|
'user' = 'rwuser',
|
|
'password' = '######')")</pre>
|
|
|
|
<div class="tablenoborder"><a name="dli_09_0114__en-us_topic_0204096844_table2072415395012"></a><a name="en-us_topic_0204096844_table2072415395012"></a><table cellpadding="4" cellspacing="0" summary="" id="dli_09_0114__en-us_topic_0204096844_table2072415395012" frame="border" border="1" rules="all"><caption><b>Table 1 </b>Parameters for creating a table</caption><thead align="left"><tr id="dli_09_0114__en-us_topic_0204096844_row07251391503"><th align="left" class="cellrowborder" valign="top" width="14.39%" id="mcps1.3.1.6.2.1.1.2.2.3.1.1"><p id="dli_09_0114__en-us_topic_0204096844_p19725153917011">Parameter</p>
|
|
</th>
|
|
<th align="left" class="cellrowborder" valign="top" width="85.61%" id="mcps1.3.1.6.2.1.1.2.2.3.1.2"><p id="dli_09_0114__en-us_topic_0204096844_p844412549115">Description</p>
|
|
</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody><tr id="dli_09_0114__en-us_topic_0204096844_row187251339506"><td class="cellrowborder" valign="top" width="14.39%" headers="mcps1.3.1.6.2.1.1.2.2.3.1.1 "><p id="dli_09_0114__en-us_topic_0204096844_p177258397019">url</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="85.61%" headers="mcps1.3.1.6.2.1.1.2.2.3.1.2 "><ul id="dli_09_0114__en-us_topic_0204096844_ul147219105237"><li id="dli_09_0114__en-us_topic_0204096844_li1172141019233">URL format:<p id="dli_09_0114__en-us_topic_0204096844_p186884176012"><a name="dli_09_0114__en-us_topic_0204096844_li1172141019233"></a><a name="en-us_topic_0204096844_li1172141019233"></a>"IP:PORT[,IP:PORT]/[DATABASE][.COLLECTION][AUTH_PROPERTIES]"</p>
|
|
<p id="dli_09_0114__en-us_topic_0204096844_p1545918141017">Example:</p>
|
|
<pre class="screen" id="dli_09_0114__screen1660919262415">"192.168.4.62:8635/test?authSource=admin"</pre>
|
|
</li><li id="dli_09_0114__en-us_topic_0204096844_li16954192662310">The URL needs to be obtained from the Mongo (DDS) connection address..<p id="dli_09_0114__en-us_topic_0204096844_p14527162612114"><a name="dli_09_0114__en-us_topic_0204096844_li16954192662310"></a><a name="en-us_topic_0204096844_li16954192662310"></a>The obtained Mongo connection address is in the following format: <strong id="dli_09_0114__en-us_topic_0204096844_b5156124714118"><em id="dli_09_0114__i8421627924">Protocol header</em>://<em id="dli_09_0114__i228114301022">Username</em>:<em id="dli_09_0114__i17485331826">Password</em>@<em id="dli_09_0114__i1670110361622">Connection address</em>:<em id="dli_09_0114__i17184415211">Port number</em>/<em id="dli_09_0114__i850354517211">Database name</em>?authSource=admin</strong></p>
|
|
<p id="dli_09_0114__en-us_topic_0204096844_p125272266117">Example:</p>
|
|
<pre class="screen" id="dli_09_0114__screen1982523714611">mongodb://rwuser:****@192.168.4.62:8635,192.168.5.134:8635/test?authSource=admin</pre>
|
|
</li></ul>
|
|
</td>
|
|
</tr>
|
|
<tr id="dli_09_0114__en-us_topic_0204096844_row1725139605"><td class="cellrowborder" valign="top" width="14.39%" headers="mcps1.3.1.6.2.1.1.2.2.3.1.1 "><p id="dli_09_0114__p16716123191211">uri</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="85.61%" headers="mcps1.3.1.6.2.1.1.2.2.3.1.2 "><p id="dli_09_0114__p14801103031712">URI format: <strong id="dli_09_0114__b48644992016">mongodb://username:pwd@host:8635/db</strong></p>
|
|
<p id="dli_09_0114__p378852921912">Set the following parameters to the actual values:</p>
|
|
<ul id="dli_09_0114__ul27974505198"><li id="dli_09_0114__li12797155015192"><strong id="dli_09_0114__b917143112019">username</strong>: username used for creating the Mongo (DDS) database</li><li id="dli_09_0114__li973614919200"><strong id="dli_09_0114__b589473216246">pwd</strong>: password of the username for the Mongo (DDS) database</li><li id="dli_09_0114__li9871329162012"><strong id="dli_09_0114__b19371423192511">host</strong>: IP address of the Mongo (DDS) database instance</li><li id="dli_09_0114__li9368130114020"><strong id="dli_09_0114__b118402012619">db</strong>: name of the created Mongo (DDS) database</li></ul>
|
|
</td>
|
|
</tr>
|
|
<tr id="dli_09_0114__row1576121861212"><td class="cellrowborder" valign="top" width="14.39%" headers="mcps1.3.1.6.2.1.1.2.2.3.1.1 "><p id="dli_09_0114__p19551821121217">database</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="85.61%" headers="mcps1.3.1.6.2.1.1.2.2.3.1.2 "><p id="dli_09_0114__p11551112111218">DDS database name. If the database name is specified in the URL, the database name in the URL does not take effect.</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="dli_09_0114__en-us_topic_0204096844_row1172515391019"><td class="cellrowborder" valign="top" width="14.39%" headers="mcps1.3.1.6.2.1.1.2.2.3.1.1 "><p id="dli_09_0114__en-us_topic_0204096844_p420214716115">collection</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="85.61%" headers="mcps1.3.1.6.2.1.1.2.2.3.1.2 "><p id="dli_09_0114__en-us_topic_0204096844_en-us_topic_0142907229_en-us_topic_0114776213_en-us_topic_0103157088_p570649132164">Collection name in the DDS. If the collection is specified in the URL, the collection in the URL does not take effect.</p>
|
|
<div class="note" id="dli_09_0114__en-us_topic_0204096844_note26469541236"><span class="notetitle"> NOTE: </span><div class="notebody"><p id="dli_09_0114__en-us_topic_0204096844_p764615417313">If a collection already exists in DDS, you do not need to specify schema information when creating a table. DLI automatically generates schema information based on data in the collection.</p>
|
|
</div></div>
|
|
</td>
|
|
</tr>
|
|
<tr id="dli_09_0114__en-us_topic_0204096844_row1972513391708"><td class="cellrowborder" valign="top" width="14.39%" headers="mcps1.3.1.6.2.1.1.2.2.3.1.1 "><p id="dli_09_0114__en-us_topic_0204096844_p120217119">user</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="85.61%" headers="mcps1.3.1.6.2.1.1.2.2.3.1.2 "><p id="dli_09_0114__en-us_topic_0204096844_en-us_topic_0142907229_en-us_topic_0114776213_en-us_topic_0103157088_p401695862167">Username for accessing the DDS cluster.</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="dli_09_0114__en-us_topic_0204096844_row672512392019"><td class="cellrowborder" valign="top" width="14.39%" headers="mcps1.3.1.6.2.1.1.2.2.3.1.1 "><p id="dli_09_0114__en-us_topic_0204096844_p197254391802">password</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="85.61%" headers="mcps1.3.1.6.2.1.1.2.2.3.1.2 "><p id="dli_09_0114__en-us_topic_0204096844_p26038205218">Password for accessing the DDS cluster.</p>
|
|
</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</li><li id="dli_09_0114__en-us_topic_0204096844_li19602436314">Insert data.<pre class="screen" id="dli_09_0114__en-us_topic_0204096844_screen1232843913318">sparkSession.sql("insert into test_dds values('3', 'Ann',23)")</pre>
|
|
</li><li id="dli_09_0114__en-us_topic_0204096844_li51787203419">Query data.<pre class="screen" id="dli_09_0114__en-us_topic_0204096844_screen1626019499512">sparkSession.sql("select * from test_dds").show()</pre>
|
|
</li></ol>
|
|
</li><li id="dli_09_0114__li2104144764818">Connecting to data sources through DataFrame APIs<ol id="dli_09_0114__en-us_topic_0204096844_ol81572119429"><li id="dli_09_0114__en-us_topic_0204096844_li2926141813917">Set connection parameters.<pre class="screen" id="dli_09_0114__en-us_topic_0204096844_screen17561391694">val url = "192.168.4.62:8635,192.168.5.134:8635/test?authSource=admin"
|
|
val uri = "mongodb://username:pwd@host:8635/db"
|
|
val user = "rwuser"
|
|
val database = "test"
|
|
val collection = "test"
|
|
val password = "######"</pre>
|
|
</li><li id="dli_09_0114__en-us_topic_0204096844_li159162206521">Construct a schema.<div class="codecoloring" codetype="Scala" id="dli_09_0114__en-us_topic_0204096844_screen255614411440"><div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><span class="kd">val</span><span class="w"> </span><span class="n">schema</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nc">StructType</span><span class="p">(</span><span class="nc">List</span><span class="p">(</span><span class="nc">StructField</span><span class="p">(</span><span class="s">"id"</span><span class="p">,</span><span class="w"> </span><span class="nc">StringType</span><span class="p">),</span><span class="w"> </span><span class="nc">StructField</span><span class="p">(</span><span class="s">"name"</span><span class="p">,</span><span class="w"> </span><span class="nc">StringType</span><span class="p">),</span><span class="w"> </span><span class="nc">StructField</span><span class="p">(</span><span class="s">"age"</span><span class="p">,</span><span class="w"> </span><span class="nc">IntegerType</span><span class="p">)))</span>
|
|
</pre></div></td></tr></table></div>
|
|
|
|
</div>
|
|
</li><li id="dli_09_0114__en-us_topic_0204096844_li98014102417">Construct a DataFrame.<pre class="screen" id="dli_09_0114__en-us_topic_0204096844_screen84411952102411">val rdd = spark.sparkContext.parallelize(Seq(Row("1", "John", 23), Row("2", "Bob", 32)))
|
|
val dataFrame = spark.createDataFrame(rdd, schema)</pre>
|
|
</li><li id="dli_09_0114__en-us_topic_0204096844_li6847933185312">Import data to Mongo.<div class="codecoloring" codetype="Scala" id="dli_09_0114__en-us_topic_0204096844_screen1550013195536"><div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span class="normal">1</span>
|
|
<span class="normal">2</span>
|
|
<span class="normal">3</span>
|
|
<span class="normal">4</span>
|
|
<span class="normal">5</span>
|
|
<span class="normal">6</span>
|
|
<span class="normal">7</span>
|
|
<span class="normal">8</span>
|
|
<span class="normal">9</span></pre></div></td><td class="code"><div><pre><span></span><span class="n">dataFrame</span><span class="p">.</span><span class="n">write</span><span class="p">.</span><span class="n">format</span><span class="p">(</span><span class="s">"mongo"</span><span class="p">)</span>
|
|
<span class="w"> </span><span class="p">.</span><span class="n">option</span><span class="p">(</span><span class="s">"url"</span><span class="p">,</span><span class="w"> </span><span class="n">url</span><span class="p">)</span>
|
|
<span class="w"> </span><span class="p">.</span><span class="n">option</span><span class="p">(</span><span class="s">"uri"</span><span class="p">,</span><span class="w"> </span><span class="n">uri</span><span class="p">)</span>
|
|
<span class="w"> </span><span class="p">.</span><span class="n">option</span><span class="p">(</span><span class="s">"database"</span><span class="p">,</span><span class="w"> </span><span class="n">database</span><span class="p">)</span>
|
|
<span class="w"> </span><span class="p">.</span><span class="n">option</span><span class="p">(</span><span class="s">"collection"</span><span class="p">,</span><span class="w"> </span><span class="n">collection</span><span class="p">)</span>
|
|
<span class="w"> </span><span class="p">.</span><span class="n">option</span><span class="p">(</span><span class="s">"user"</span><span class="p">,</span><span class="w"> </span><span class="n">user</span><span class="p">)</span>
|
|
<span class="w"> </span><span class="p">.</span><span class="n">option</span><span class="p">(</span><span class="s">"password"</span><span class="p">,</span><span class="w"> </span><span class="n">password</span><span class="p">)</span>
|
|
<span class="w"> </span><span class="p">.</span><span class="n">mode</span><span class="p">(</span><span class="nc">SaveMode</span><span class="p">.</span><span class="nc">Overwrite</span><span class="p">)</span>
|
|
<span class="w"> </span><span class="p">.</span><span class="n">save</span><span class="p">()</span>
|
|
</pre></div></td></tr></table></div>
|
|
|
|
</div>
|
|
<div class="note" id="dli_09_0114__en-us_topic_0204096844_note3841161165514"><img src="public_sys-resources/note_3.0-en-us.png"><span class="notetitle"> </span><div class="notebody"><p id="dli_09_0114__en-us_topic_0204096844_p27502013132415">The options of <strong id="dli_09_0114__b51391457102913">mode</strong> are <strong id="dli_09_0114__b1714455715299">Overwrite</strong>, <strong id="dli_09_0114__b1614514577291">Append</strong>, <strong id="dli_09_0114__b214518576297">ErrorIfExis</strong>, and <strong id="dli_09_0114__b71455579293">Ignore</strong>.</p>
|
|
</div></div>
|
|
</li><li id="dli_09_0114__en-us_topic_0204096844_li6157513420">Read data from Mongo.<div class="codecoloring" codetype="Scala" id="dli_09_0114__en-us_topic_0204096844_screen5984155015578"><div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span class="normal">1</span>
|
|
<span class="normal">2</span>
|
|
<span class="normal">3</span>
|
|
<span class="normal">4</span>
|
|
<span class="normal">5</span>
|
|
<span class="normal">6</span>
|
|
<span class="normal">7</span>
|
|
<span class="normal">8</span></pre></div></td><td class="code"><div><pre><span></span><span class="kd">val</span><span class="w"> </span><span class="n">jdbcDF</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">spark</span><span class="p">.</span><span class="n">read</span><span class="p">.</span><span class="n">format</span><span class="p">(</span><span class="s">"mongo"</span><span class="p">).</span><span class="n">schema</span><span class="p">(</span><span class="n">schema</span><span class="p">)</span>
|
|
<span class="w"> </span><span class="p">.</span><span class="n">option</span><span class="p">(</span><span class="s">"url"</span><span class="p">,</span><span class="w"> </span><span class="n">url</span><span class="p">)</span>
|
|
<span class="w"> </span><span class="p">.</span><span class="n">option</span><span class="p">(</span><span class="s">"uri"</span><span class="p">,</span><span class="w"> </span><span class="n">uri</span><span class="p">)</span>
|
|
<span class="w"> </span><span class="p">.</span><span class="n">option</span><span class="p">(</span><span class="s">"database"</span><span class="p">,</span><span class="w"> </span><span class="n">database</span><span class="p">)</span>
|
|
<span class="w"> </span><span class="p">.</span><span class="n">option</span><span class="p">(</span><span class="s">"collection"</span><span class="p">,</span><span class="w"> </span><span class="n">collection</span><span class="p">)</span>
|
|
<span class="w"> </span><span class="p">.</span><span class="n">option</span><span class="p">(</span><span class="s">"user"</span><span class="p">,</span><span class="w"> </span><span class="n">user</span><span class="p">)</span>
|
|
<span class="w"> </span><span class="p">.</span><span class="n">option</span><span class="p">(</span><span class="s">"password"</span><span class="p">,</span><span class="w"> </span><span class="n">password</span><span class="p">)</span>
|
|
<span class="w"> </span><span class="p">.</span><span class="n">load</span><span class="p">()</span>
|
|
</pre></div></td></tr></table></div>
|
|
|
|
</div>
|
|
<p id="dli_09_0114__en-us_topic_0204096844_p1779813536284">Operation result</p>
|
|
<p id="dli_09_0114__en-us_topic_0204096844_p4889173621518"><span><img id="dli_09_0114__en-us_topic_0204096844_image138881636161515" src="en-us_image_0223996997.png"></span></p>
|
|
</li></ol>
|
|
</li><li id="dli_09_0114__li1521231694910">Submitting a Spark job<ol id="dli_09_0114__ol9213121510524"><li id="dli_09_0114__li1692416144334">Generate a JAR package based on the code and upload the package to DLI.<p id="dli_09_0114__dli_09_0063_p1749619513385"><a name="dli_09_0114__li1692416144334"></a><a name="li1692416144334"></a></p>
|
|
<p id="dli_09_0114__dli_09_0063_p114961151385"></p>
|
|
</li><li id="dli_09_0114__li128919367314">In the Spark job editor, select the corresponding dependency module and execute the Spark job.<p id="dli_09_0114__p114630400319"><a name="dli_09_0114__li128919367314"></a><a name="li128919367314"></a></p>
|
|
<div class="p" id="dli_09_0114__p11533837153112"><div class="note" id="dli_09_0114__en-us_topic_0204096844_note1435543551919"><img src="public_sys-resources/note_3.0-en-us.png"><span class="notetitle"> </span><div class="notebody"><ul id="dli_09_0114__en-us_topic_0204096844_ul17825285811"><li id="dli_09_0114__en-us_topic_0197738142_li58215295819">If the Spark version is 2.3.2 (will be offline soon) or 2.4.5, specify the <strong id="dli_09_0114__b569133913595">Module</strong> to <strong id="dli_09_0114__b1069123914595">sys.datasource.mongo</strong> when you submit a job.</li><li id="dli_09_0114__li6624653171317">If the Spark version is 3.1.1, you do not need to select a module. Configure <strong id="dli_09_0114__b446654419598">Spark parameters (--conf)</strong>.<p id="dli_09_0114__p1520611118290">spark.driver.extraClassPath=/usr/share/extension/dli/spark-jar/datasource/mongo/*</p>
|
|
<p id="dli_09_0114__p182061411152917">spark.executor.extraClassPath=/usr/share/extension/dli/spark-jar/datasource/mongo/*</p>
|
|
</li></ul>
|
|
</div></div>
|
|
</div>
|
|
</li></ol>
|
|
</li></ul>
|
|
</div>
|
|
<div class="section" id="dli_09_0114__section20960328175315"><h4 class="sectiontitle">Complete Example Code</h4><ul id="dli_09_0114__ul152197495530"><li id="dli_09_0114__li421904935316">Maven dependency<div class="codecoloring" codetype="Scala" id="dli_09_0114__en-us_topic_0204096845_screen5760163172012"><div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span class="normal">1</span>
|
|
<span class="normal">2</span>
|
|
<span class="normal">3</span>
|
|
<span class="normal">4</span>
|
|
<span class="normal">5</span></pre></div></td><td class="code"><div><pre><span></span><span class="o"><</span><span class="n">dependency</span><span class="o">></span>
|
|
<span class="w"> </span><span class="o"><</span><span class="n">groupId</span><span class="o">></span><span class="n">org</span><span class="p">.</span><span class="n">apache</span><span class="p">.</span><span class="n">spark</span><span class="o"></</span><span class="n">groupId</span><span class="o">></span>
|
|
<span class="w"> </span><span class="o"><</span><span class="n">artifactId</span><span class="o">></span><span class="n">spark</span><span class="o">-</span><span class="n">sql_2</span><span class="mf">.11</span><span class="o"></</span><span class="n">artifactId</span><span class="o">></span>
|
|
<span class="w"> </span><span class="o"><</span><span class="n">version</span><span class="o">></span><span class="mf">2.3.2</span><span class="o"></</span><span class="n">version</span><span class="o">></span>
|
|
<span class="o"></</span><span class="n">dependency</span><span class="o">></span>
|
|
</pre></div></td></tr></table></div>
|
|
|
|
</div>
|
|
</li><li id="dli_09_0114__li15561111215546">Connecting to data sources through SQL APIs<div class="codecoloring" codetype="Scala" id="dli_09_0114__en-us_topic_0204096845_screen144461426184015"><div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span class="normal"> 1</span>
|
|
<span class="normal"> 2</span>
|
|
<span class="normal"> 3</span>
|
|
<span class="normal"> 4</span>
|
|
<span class="normal"> 5</span>
|
|
<span class="normal"> 6</span>
|
|
<span class="normal"> 7</span>
|
|
<span class="normal"> 8</span>
|
|
<span class="normal"> 9</span>
|
|
<span class="normal">10</span>
|
|
<span class="normal">11</span>
|
|
<span class="normal">12</span>
|
|
<span class="normal">13</span>
|
|
<span class="normal">14</span>
|
|
<span class="normal">15</span>
|
|
<span class="normal">16</span>
|
|
<span class="normal">17</span>
|
|
<span class="normal">18</span></pre></div></td><td class="code"><div><pre><span></span><span class="k">import</span><span class="w"> </span><span class="nn">org</span><span class="p">.</span><span class="nn">apache</span><span class="p">.</span><span class="nn">spark</span><span class="p">.</span><span class="nn">sql</span><span class="p">.</span><span class="nc">SparkSession</span>
|
|
|
|
<span class="k">object</span><span class="w"> </span><span class="nc">TestMongoSql</span><span class="w"> </span><span class="p">{</span>
|
|
<span class="w"> </span><span class="k">def</span><span class="w"> </span><span class="nf">main</span><span class="p">(</span><span class="n">args</span><span class="p">:</span><span class="w"> </span><span class="nc">Array</span><span class="p">[</span><span class="nc">String</span><span class="p">]):</span><span class="w"> </span><span class="nc">Unit</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span>
|
|
<span class="w"> </span><span class="kd">val</span><span class="w"> </span><span class="n">sparkSession</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nc">SparkSession</span><span class="p">.</span><span class="n">builder</span><span class="p">().</span><span class="n">getOrCreate</span><span class="p">()</span>
|
|
<span class="w"> </span><span class="n">sparkSession</span><span class="p">.</span><span class="n">sql</span><span class="p">(</span>
|
|
<span class="w"> </span><span class="s">"create table test_dds(id string, name string, age int) using mongo options(</span>
|
|
<span class="s"> 'url' = '192.168.4.62:8635,192.168.5.134:8635/test?authSource=admin',</span>
|
|
<span class="s"> 'uri' = 'mongodb://username:pwd@host:8635/db',</span>
|
|
<span class="s"> 'database' = 'test',</span>
|
|
<span class="s"> 'collection' = 'test',</span>
|
|
<span class="s"> 'user' = 'rwuser',</span>
|
|
<span class="s"> 'password' = '######')"</span><span class="p">)</span>
|
|
<span class="w"> </span><span class="n">sparkSession</span><span class="p">.</span><span class="n">sql</span><span class="p">(</span><span class="s">"insert into test_dds values('3', 'Ann',23)"</span><span class="p">)</span>
|
|
<span class="w"> </span><span class="n">sparkSession</span><span class="p">.</span><span class="n">sql</span><span class="p">(</span><span class="s">"select * from test_dds"</span><span class="p">).</span><span class="n">show</span><span class="p">()</span>
|
|
<span class="w"> </span><span class="n">sparkSession</span><span class="p">.</span><span class="n">close</span><span class="p">()</span>
|
|
<span class="w"> </span><span class="p">}</span>
|
|
<span class="p">}</span>
|
|
</pre></div></td></tr></table></div>
|
|
|
|
</div>
|
|
</li><li id="dli_09_0114__li65851432115416">Connecting to data sources through DataFrame APIs<pre class="screen" id="dli_09_0114__en-us_topic_0204096845_screen19921165017368">import org.apache.spark.sql.{Row, SaveMode, SparkSession}
|
|
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
|
|
|
|
object Test_Mongo_SparkSql {
|
|
def main(args: Array[String]): Unit = {
|
|
// Create a SparkSession session.
|
|
val spark = SparkSession.builder().appName("mongodbTest").getOrCreate()
|
|
|
|
// Set the connection configuration parameters.
|
|
val url = "192.168.4.62:8635,192.168.5.134:8635/test?authSource=admin"
|
|
val uri = "mongodb://username:pwd@host:8635/db"
|
|
val user = "rwuser"
|
|
val database = "test"
|
|
val collection = "test"
|
|
val password = "######"
|
|
|
|
// Setting up the schema
|
|
val schema = StructType(List(StructField("id", StringType), StructField("name", StringType), StructField("age", IntegerType)))
|
|
|
|
// Setting up the DataFrame
|
|
val rdd = spark.sparkContext.parallelize(Seq(Row("1", "John", 23), Row("2", "Bob", 32)))
|
|
val dataFrame = spark.createDataFrame(rdd, schema)
|
|
|
|
|
|
// Write data to mongo
|
|
dataFrame.write.format("mongo")
|
|
.option("url", url)
|
|
.option("uri", uri)
|
|
.option("database", database)
|
|
.option("collection", collection)
|
|
.option("user", user)
|
|
.option("password", password)
|
|
.mode(SaveMode.Overwrite)
|
|
.save()
|
|
|
|
// Reading data from mongo
|
|
val jdbcDF = spark.read.format("mongo").schema(schema)
|
|
.option("url", url)
|
|
.option("uri", uri)
|
|
.option("database", database)
|
|
.option("collection", collection)
|
|
.option("user", user)
|
|
.option("password", password)
|
|
.load()
|
|
jdbcDF.show()
|
|
|
|
spark.close()
|
|
}
|
|
}</pre>
|
|
</li></ul>
|
|
</div>
|
|
</div>
|
|
<div>
|
|
<div class="familylinks">
|
|
<div class="parentlink"><strong>Parent topic:</strong> <a href="dli_09_0113.html">Connecting to Mongo</a></div>
|
|
</div>
|
|
</div>
|
|
|