Untitled
unknown
plain_text
2 years ago
696 B
32
Indexable
import pyspark
from pyspark.sql import SparkSession
from pyspark.sql.window import Window
import pyspark.sql.functions as F
spark = SparkSession.builder \
.master("local") \
.appName("Learning DataFrames") \
.getOrCreate()
events = spark.read.json("/user/master/data/events/date=2022-05-01")
window = Window().partitionBy('event.message_from').orderBy("event.message_from")
dfWithLag = events.withColumn("lag_7",F.lag("event.message_to", 7).over(window))
dfWithLag.select('event.message_from', 'lag_7') \
.filter(dfWithLag.lag_7.isNotNull()) \
.orderBy(F.col('event.message_to').desc()) \
.show(10, False)
#ключ: tl0Bn1k6AwEditor is loading...
Leave a Comment