Skip to content
Snippets Groups Projects
Commit a2d701c9 authored by Stefan Knauff's avatar Stefan Knauff
Browse files

apply .collect(streaming=self.streaming)

parent 3187d940
Branches main
No related tags found
No related merge requests found
......@@ -46,7 +46,7 @@ class Xgta:
(pl.col('isretweet') == False).sum().alias('is_original_tweet'),
])
.sort('postdate')
.collect(streaming=True)
.collect(streaming=self.streaming)
.upsample(time_column="postdate", every="1d")
.with_columns(
pl.col(['all_tweets','is_retweet','is_original_tweet']).fill_null(strategy="zero")
......@@ -133,7 +133,7 @@ class Xgta:
# 'quoted_status_id': ('xgta_user_id', 'quoted_status_xgta_user_id'),
}
ids_cached = {table: self.ids[table].collect(streaming=True).lazy() for table in {map[i][0][:-3] for i in [i for i in map if i in df.columns]}}
ids_cached = {table: self.ids[table].collect(streaming=self.streaming).lazy() for table in {map[i][0][:-3] for i in [i for i in map if i in df.columns]}}
for i in [i for i in map if i in df.columns]:
df = (
......@@ -141,7 +141,7 @@ class Xgta:
)
if not was_lazy:
df = df.collect(streaming=True)
df = df.collect(streaming=self.streaming)
return df
......@@ -162,8 +162,8 @@ class Xgta:
)
q = (
q.collect(streaming=True)
.join(self.df_tweets_per_day.collect(streaming=True), on='postdate',how='left')
q.collect(streaming=self.streaming)
.join(self.df_tweets_per_day.collect(streaming=self.streaming), on='postdate',how='left')
.with_columns([(pl.col(f'search_term:{i}') / pl.col(i)).alias(f'percent:{i}') for i in ['all_tweets','is_retweet','is_original_tweet']])
.upsample(time_column="postdate", every="1d")
.with_columns(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment