22 lines
940 B
Python
22 lines
940 B
Python
import os
|
|
|
|
os.environ["JAVA_HOME"] = "/opt/modules/jdk1.8.0_212"
|
|
os.environ["YARN_CONF_DIR"] = "/usr/hdp/2.6.3.0-235/hadoop-yarn/etc/hadoop/"
|
|
|
|
from hdfs import InsecureClient
|
|
from pyspark.sql import SparkSession
|
|
|
|
hdfs_client = InsecureClient(url='http://192.168.100.100:50070', user='spark') # 指定远程地址,和用户名
|
|
|
|
|
|
def spark_app(app_name: str) -> SparkSession:
|
|
jdbc_driver_path = "/usr/hdp/2.6.3.0-235/spark2/jars/mysql-connector-java-5.1.40-bin.jar"
|
|
hdp_version = "-Dhdp.version=2.6.3.0-235"
|
|
return (SparkSession.builder.master("yarn")
|
|
.config("spark.hadoop.yarn.timeline-service.enabled", "false")
|
|
.config("spark.driver.extraJavaOptions", hdp_version)
|
|
.config("spark.yarn.am.extraJavaOptions", hdp_version)
|
|
.config("spark.jars", jdbc_driver_path)
|
|
.config("spark.driver.extraClassPath", jdbc_driver_path)
|
|
.appName(app_name).getOrCreate())
|