import tweepy
import time
import pandas as pdconsumer_key = "add_key"
consumer_secret = "add_key"
access_key = "add_key"
access_secret = "add_key"
bearer_token = "add_key"client = tweepy.Client(bearer_token, wait_on_rate_limit=True)
raw_tweets = []
for response in tweepy.Paginator(client.search_all_tweets,
query = '"ukraine krieg" -is:retweet lang:de',
tweet_fields = ['created_at', 'geo', 'text'],
start_time = '2022-02-28T00:00:00Z',
end_time = '2022-11-30T00:00:00Z'
):
time.sleep(1)
raw_tweets.append(response)for response in tweepy.Paginator(client.search_all_tweets,
query = '"ukraine krieg" -is:retweet lang:de',
tweet_fields = ['created_at', 'geo', 'text'],
start_time = '2022-08-24T08:23:52Z',
end_time = '2022-11-30T00:00:00Z'
):
time.sleep(1)
raw_tweets.append(response)for response in tweepy.Paginator(client.search_all_tweets,
query = '"ukraine krieg" -is:retweet lang:de',
tweet_fields = ['created_at', 'geo', 'text'],
start_time = '2022-09-22T06:14:34Z',
end_time = '2022-11-30T00:00:00Z'
):
time.sleep(1)
raw_tweets.append(response)len(raw_tweets) raw_tweets2 = []
for response in tweepy.Paginator(client.search_all_tweets,
query = '"ukraine krieg" -is:retweet lang:de',
tweet_fields = ['created_at', 'geo', 'text'],
start_time = '2022-02-28T00:00:00Z',
end_time = '2022-08-24T08:23:52Z'
):
time.sleep(1)
raw_tweets2.append(response)raw_tweets3 = []
for response in tweepy.Paginator(client.search_all_tweets,
query = '"ukraine krieg" -is:retweet lang:de',
tweet_fields = ['created_at', 'geo', 'text'],
start_time = '2022-02-28T00:00:00Z',
end_time = '2022-06-15T20:53:28Z'
):
time.sleep(1)
raw_tweets3.append(response)raw_tweets4 = []
for response in tweepy.Paginator(client.search_all_tweets,
query = '"ukraine krieg" -is:retweet lang:de',
tweet_fields = ['created_at', 'geo', 'text'],
start_time = '2022-02-28T00:00:00Z',
end_time = '2022-03-04T05:46:41Z'
):
time.sleep(1)
raw_tweets4.append(response)raw_tweets5 = []
for response in tweepy.Paginator(client.search_all_tweets,
query = '"ukraine krieg" -is:retweet lang:de',
tweet_fields = ['created_at', 'geo', 'text'],
start_time = '2022-02-24T23:59:59Z',
end_time = '2022-03-01T08:36:47Z'
):
time.sleep(1)
raw_tweets5.append(response)result = []
user_dict = {}
# Loop through each response object
for response in raw_tweets:
for tweet in response.data:
# Put all of the information we want to keep in a single dictionary for each tweet
result.append({
'text': tweet.text,
'created_at': tweet.created_at,
'geo': tweet.geo
})
# Change this list of dictionaries into a dataframe
df = pd.DataFrame(result)result2 = []
user_dict2 = {}
# Loop through each response object
for response in raw_tweets2:
for tweet in response.data:
# Put all of the information we want to keep in a single dictionary for each tweet
result2.append({
'text': tweet.text,
'created_at': tweet.created_at,
'geo': tweet.geo
})
# Change this list of dictionaries into a dataframe
df2 = pd.DataFrame(result2)df2.to_csv('uk_tag4.csv', encoding='utf-8')len(raw_tweets)result3 = []
user_dict3 = {}
# Loop through each response object
for response in raw_tweets3:
for tweet in response.data:
# Put all of the information we want to keep in a single dictionary for each tweet
result3.append({
'text': tweet.text,
'created_at': tweet.created_at,
'geo': tweet.geo
})
# Change this list of dictionaries into a dataframe
df3 = pd.DataFrame(result3)df3.to_csv('uk_tag5.csv', encoding='utf-8')result4 = []
user_dict4 = {}
# Loop through each response object
for response in raw_tweets4:
for tweet in response.data:
# Put all of the information we want to keep in a single dictionary for each tweet
result4.append({
'text': tweet.text,
'created_at': tweet.created_at,
'geo': tweet.geo
})
# Change this list of dictionaries into a dataframe
df4 = pd.DataFrame(result4)df4.to_csv('uk_tag6.csv', encoding='utf-8')result5 = []
user_dict5 = {}
# Loop through each response object
for response in raw_tweets5:
for tweet in response.data:
# Put all of the information we want to keep in a single dictionary for each tweet
result5.append({
'text': tweet.text,
'created_at': tweet.created_at,
'geo': tweet.geo
})
# Change this list of dictionaries into a dataframe
df5 = pd.DataFrame(result5)df5.to_csv('uk_tag7.csv', encoding='utf-8')raw_tweets6 = []
for response in tweepy.Paginator(client.search_all_tweets,
query = '"ukraine krieg" -is:retweet lang:de',
tweet_fields = ['created_at', 'geo', 'text'],
start_time = '2022-02-24T00:00:00Z',
end_time = '2022-02-25T00:00:58Z'
):
time.sleep(1)
raw_tweets6.append(response)result6 = []
user_dict6 = {}
# Loop through each response object
for response in raw_tweets6:
for tweet in response.data:
# Put all of the information we want to keep in a single dictionary for each tweet
result6.append({
'text': tweet.text,
'created_at': tweet.created_at,
'geo': tweet.geo
})
# Change this list of dictionaries into a dataframe
df6 = pd.DataFrame(result6)df6.to_csv('uk_tag8.csv', encoding='utf-8')raw_tweets7 = []
for response in tweepy.Paginator(client.search_all_tweets,
query = '"ukraine krieg" -is:retweet lang:de',
tweet_fields = ['created_at', 'geo', 'text'],
start_time = '2022-08-24T08:23:16Z',
end_time = '2022-09-22T06:14:34Z'
):
time.sleep(1)
raw_tweets7.append(response)result7 = []
user_dict7 = {}
# Loop through each response object
for response in raw_tweets7:
for tweet in response.data:
# Put all of the information we want to keep in a single dictionary for each tweet
result7.append({
'text': tweet.text,
'created_at': tweet.created_at,
'geo': tweet.geo
})
# Change this list of dictionaries into a dataframe
df7 = pd.DataFrame(result7)df7.to_csv('uk_tag2.csv', encoding='utf-8')raw_tweets8 = []
for response in tweepy.Paginator(client.search_all_tweets,
query = '"ukraine krieg" -is:retweet lang:de',
tweet_fields = ['created_at', 'geo', 'text'],
start_time = '2022-11-29T23:57:32Z',
end_time = '2022-11-30T23:59:59Z'
):
time.sleep(1)
raw_tweets8.append(response)result8 = []
user_dict8 = {}
# Loop through each response object
for response in raw_tweets8:
for tweet in response.data:
# Put all of the information we want to keep in a single dictionary for each tweet
result8.append({
'text': tweet.text,
'created_at': tweet.created_at,
'geo': tweet.geo
})
# Change this list of dictionaries into a dataframe
df8 = pd.DataFrame(result8)df8.to_csv('uk_tag1.csv', encoding='utf-8')# Added from differerent notebook the example preparation and analysis of the first week import nltk
import germansentiment
from nltk.probability import FreqDist
from textblob_de import TextBlobDE
import csv
import string
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from nltk.tokenize import sent_tokenize
nltk.download('punkt')
from textblob_de import TextBlobDE as TextBlobwith open('/Users/lisa/Final_Project_geopolitcs/first_week.csv', 'r') as csv_datei:
reader = csv.reader(csv_datei, delimiter=',')
text = csv_datei.read()
token_text = sent_tokenize(text)
words = text.split()lowercase_words = []
for w in words: lowercase_words.append(w.lower())
print(len(lowercase_words)) def remove_punc(string):
punc = '''!()-[]{};:'"\, <>./?@#$%^&*_~'''
for ele in string:
if ele in punc:
string = string.replace(ele, "")
return string
lowercase_words_clean = [remove_punc(i) for i in lowercase_words]tweet_words = []
for word in lowercase_words_clean:
if word.startswith('http'):
word = "http"
elif word.startswith('@'):
word = '@user'
elif word:
word = word
tweet_words.append(word)nltk.download('stopwords')
from nltk.corpus import stopwords
stopwords = stopwords.words('german')other_stopwords = ["ukraine", "ukraine-krieg", "mehr", "-", "via", "+++", "ukrainekrieg", "krieg", "tonline", "@user", "http"]
print(other_stopwords)words_withoutstop = []
for word in tweet_words :
if word not in stopwords:
words_withoutstop.append(word)
fdist = FreqDist(words_withoutstop)
fdist.plot(10)
print(len(words), len(words_withoutstop))words_withoutstop2 = []
for word in words_withoutstop:
if word not in other_stopwords:
words_withoutstop2.append(word)
fdist2 = FreqDist(words_withoutstop2)
fdist2.plot(10)
print(len(words_withoutstop), len(words_withoutstop2))string_firstweek = ''
for x in words_withoutstop2:
string_firstweek += ' ' + xwc = WordCloud(scale=3,
colormap='Paired',
background_color='white')
wc.generate(string_firstweek)
plt.imshow(wc)
wc.to_file('wordcloud_firstweek.png')
plt.axis("off")blob = TextBlob(string_firstweek)
print(blob.sentiment)
sentiment_mw = blob.sentiment