Untitled
unknown
plain_text
3 years ago
994 B
7
Indexable
import s3fs
import boto3
import awswrangler as wr
from tqdm import tqdm
s3_key = "AKIAYTV2Y6J4VG3DDM4E"#Add s3 key
s3_secret = "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"#Add s3 secret
s3_storage_options = {"key":s3_key,"secret":s3_secret}
fs = s3fs.S3FileSystem(key=s3_key, secret=s3_secret)
s3_session = boto3.Session(
aws_access_key_id=s3_key,
aws_secret_access_key=s3_secret,
region_name = "us-east-2"
)
tweets_files_paths = fs.glob("s3://electionsandtwitter/Addressable Insights/Data Files/Twitter_Endpoint/Statewise/Minnesota/Tweets/Users Tweets/**/tweets_*")
all_tweets=[]
for file in tqdm(range(len(tweets_files_paths))):
sample_tweets = wr.s3.read_parquet(
path='s3://'+tweets_files_paths[file].strip() ,
boto3_session=s3_session,
)
all_tweets.append(sample_tweets)
df=pd.conat(all_tweets)
df.to_csv("all_tweets_raw.csv",index=False)Editor is loading...