diff --git a/lib/xgta.py b/lib/xgta.py index ecf73337ccbd700167468b1158e307f278678ac4..74a25e815344a6be3a944c037cff801ee90f9567 100644 --- a/lib/xgta.py +++ b/lib/xgta.py @@ -46,7 +46,7 @@ class Xgta: (pl.col('isretweet') == False).sum().alias('is_original_tweet'), ]) .sort('postdate') - .collect(streaming=True) + .collect(streaming=self.streaming) .upsample(time_column="postdate", every="1d") .with_columns( pl.col(['all_tweets','is_retweet','is_original_tweet']).fill_null(strategy="zero") @@ -133,7 +133,7 @@ class Xgta: # 'quoted_status_id': ('xgta_user_id', 'quoted_status_xgta_user_id'), } - ids_cached = {table: self.ids[table].collect(streaming=True).lazy() for table in {map[i][0][:-3] for i in [i for i in map if i in df.columns]}} + ids_cached = {table: self.ids[table].collect(streaming=self.streaming).lazy() for table in {map[i][0][:-3] for i in [i for i in map if i in df.columns]}} for i in [i for i in map if i in df.columns]: df = ( @@ -141,7 +141,7 @@ class Xgta: ) if not was_lazy: - df = df.collect(streaming=True) + df = df.collect(streaming=self.streaming) return df @@ -162,8 +162,8 @@ class Xgta: ) q = ( - q.collect(streaming=True) - .join(self.df_tweets_per_day.collect(streaming=True), on='postdate',how='left') + q.collect(streaming=self.streaming) + .join(self.df_tweets_per_day.collect(streaming=self.streaming), on='postdate',how='left') .with_columns([(pl.col(f'search_term:{i}') / pl.col(i)).alias(f'percent:{i}') for i in ['all_tweets','is_retweet','is_original_tweet']]) .upsample(time_column="postdate", every="1d") .with_columns(