data = [(1.0, 0.1, 0.2), (0.0, 0.4, 0.5), (1.0, 0.2, 0.3), (0.0, 0.3, 0.4)] columns = ["label", "feature1", "feature2"] df = spark.createDataFrame(data, columns) # 특징 벡터 생성 assembler = VectorAssembler(inputCols=["feature1", "feature2"], outputCol="features") df = assembler.transform(df) # 로지스틱 회귀 모델 생성 lr = LogisticRegression(featuresCol="features", labelCol="label") # 파이프라인 생성 pipeline = Pipeli..