Untitled

 avatar
unknown
plain_text
2 years ago
994 B
3
Indexable
import s3fs
import boto3
import awswrangler as wr
from tqdm import tqdm
s3_key = "AKIAYTV2Y6J4VG3DDM4E"#Add s3 key
s3_secret = "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"#Add s3 secret
s3_storage_options = {"key":s3_key,"secret":s3_secret}
fs =  s3fs.S3FileSystem(key=s3_key, secret=s3_secret)
s3_session = boto3.Session(
                aws_access_key_id=s3_key,
                aws_secret_access_key=s3_secret,
                region_name = "us-east-2"
            )
tweets_files_paths = fs.glob("s3://electionsandtwitter/Addressable Insights/Data Files/Twitter_Endpoint/Statewise/Minnesota/Tweets/Users Tweets/**/tweets_*")


all_tweets=[]
for file in tqdm(range(len(tweets_files_paths))):
    sample_tweets = wr.s3.read_parquet(
                    path='s3://'+tweets_files_paths[file].strip() ,
                    boto3_session=s3_session,
                )
    all_tweets.append(sample_tweets)

df=pd.conat(all_tweets)
df.to_csv("all_tweets_raw.csv",index=False)
Editor is loading...