import os os.environ["JAVA_HOME"] = "/opt/modules/jdk1.8.0_212" os.environ["YARN_CONF_DIR"] = "/usr/hdp/2.6.3.0-235/hadoop-yarn/etc/hadoop/" from hdfs import InsecureClient from pyspark.sql import SparkSession hdfs_client = InsecureClient(url='http://192.168.100.100:50070', user='spark') # 指定远程地址,和用户名 def spark_app(app_name: str) -> SparkSession: jdbc_driver_path = "/usr/hdp/2.6.3.0-235/spark2/jars/mysql-connector-java-5.1.40-bin.jar" hdp_version = "-Dhdp.version=2.6.3.0-235" return (SparkSession.builder.master("yarn") .config("spark.hadoop.yarn.timeline-service.enabled", "false") .config("spark.driver.extraJavaOptions", hdp_version) .config("spark.yarn.am.extraJavaOptions", hdp_version) .config("spark.jars", jdbc_driver_path) .config("spark.driver.extraClassPath", jdbc_driver_path) .appName(app_name).getOrCreate())