Python-Pyspark对Column使用UDF.md 发表于 2022-08-11 | 更新于 2022-09-11 | 分类于 编程语言 , Python | 阅读次数: Python代码如下: 1234567891011121314151617181920212223242526272829303132333435363738394041424344# -*- coding: utf-8 -*-# @Time : 2022-06-17from pyspark.sql import Columnfrom pyspark.sql.types import IntegerTypefrom pyspark.sql import SparkSessionfrom pyspark.sql import functions as Fdef count_add_val(col, val): def add(cell): return cell + val count_col = F.count(col) return F.udf(add, IntegerType())(count_col)def count_add(col: Column) -> Column: def add(cell): return cell + 1 count_col = F.count(col) return F.udf(add, IntegerType())(count_col)def count_base(col: Column) -> Column: return F.count(col)if __name__ == "__main__": spark = SparkSession.builder.getOrCreate() df = spark.createDataFrame([("A", 1), ("B", 2), ("B", 4)], ("name", "age")) df.show() df.select(count_base(df.age)).show() df.groupby(df.name).agg(count_base(df.name)).show() df.select(count_add(df.age)).show() df.groupby(df.name).agg(count_add(df.name)).show() df.select(count_add_val(df.age, val=3)).show() df.groupby(df.name).agg(count_add_val(df.name, val=3)).show() 运行的结果为: 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253>>> df.show()+----+---+|name|age|+----+---+| A| 1|| B| 2|| B| 4|+----+---+>>> df.select(count_base(df.age)).show()+----------+|count(age)|+----------+| 3|+----------+>>> df.groupby(df.name).agg(count_base(df.name)).show() +----+-----------+|name|count(name)|+----+-----------+| B| 2|| A| 1|+----+-----------+>>> df.select(count_add(df.age)).show()+---------------+|add(count(age))|+---------------+| 4|+---------------+>>> df.groupby(df.name).agg(count_add(df.name)).show()+----+----------------+|name|add(count(name))|+----+----------------+| B| 3|| A| 2|+----+----------------+>>> df.select(count_add_val(df.age, val=3)).show()+---------------+|add(count(age))|+---------------+| 6|+---------------+>>> df.groupby(df.name).agg(count_add_val(df.name, val=3)).show()+----+----------------+|name|add(count(name))|+----+----------------+| B| 5|| A| 4|+----+----------------+ Thanks for rewarding 打赏 微信支付 支付宝