Untitled
unknown
plain_text
2 years ago
994 B
3
Indexable
import s3fs import boto3 import awswrangler as wr from tqdm import tqdm s3_key = "AKIAYTV2Y6J4VG3DDM4E"#Add s3 key s3_secret = "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"#Add s3 secret s3_storage_options = {"key":s3_key,"secret":s3_secret} fs = s3fs.S3FileSystem(key=s3_key, secret=s3_secret) s3_session = boto3.Session( aws_access_key_id=s3_key, aws_secret_access_key=s3_secret, region_name = "us-east-2" ) tweets_files_paths = fs.glob("s3://electionsandtwitter/Addressable Insights/Data Files/Twitter_Endpoint/Statewise/Minnesota/Tweets/Users Tweets/**/tweets_*") all_tweets=[] for file in tqdm(range(len(tweets_files_paths))): sample_tweets = wr.s3.read_parquet( path='s3://'+tweets_files_paths[file].strip() , boto3_session=s3_session, ) all_tweets.append(sample_tweets) df=pd.conat(all_tweets) df.to_csv("all_tweets_raw.csv",index=False)
Editor is loading...