python_weather/spark/client.py
2024-06-01 12:39:58 +08:00

22 lines
940 B
Python

import os
os.environ["JAVA_HOME"] = "/opt/modules/jdk1.8.0_212"
os.environ["YARN_CONF_DIR"] = "/usr/hdp/2.6.3.0-235/hadoop-yarn/etc/hadoop/"
from hdfs import InsecureClient
from pyspark.sql import SparkSession
hdfs_client = InsecureClient(url='http://192.168.100.100:50070', user='spark') # 指定远程地址,和用户名
def spark_app(app_name: str) -> SparkSession:
jdbc_driver_path = "/usr/hdp/2.6.3.0-235/spark2/jars/mysql-connector-java-5.1.40-bin.jar"
hdp_version = "-Dhdp.version=2.6.3.0-235"
return (SparkSession.builder.master("yarn")
.config("spark.hadoop.yarn.timeline-service.enabled", "false")
.config("spark.driver.extraJavaOptions", hdp_version)
.config("spark.yarn.am.extraJavaOptions", hdp_version)
.config("spark.jars", jdbc_driver_path)
.config("spark.driver.extraClassPath", jdbc_driver_path)
.appName(app_name).getOrCreate())