1.文本文件转 json 格式
读取 txt 文件中的 tweets 文本,将其转为 json 格式,可以打印输出,也可以提取详细信息
代码:
import json
import os
folderpath = r"D:\Twitter Data\Data\test"
files = os.listdir(folderpath)
os.chdir(folderpath)
# get the first txt file
tweets_data_path = files[0]
# store json format file in this array
tweets_data = []
tweets_file = open(tweets_data_path, "r")
for line in tweets_file:
try:
tweet = json.loads(line)
tweets_data.append(tweet)
except:
continue
# print json format file with indentation
print(json.dumps(tweets_data[0], indent=4))
输出:
{
"created_at": "Tue Jun 25 20:44:34 +0000 2019",
"id": 1143621025550049280,
"id_str": "1143621025550049280",
"text": "Australia beat the Poms overnight \ud83d\ude01\ud83c\udfcf\ud83c\udde6\ud83c\uddfa\ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f #AUSvENG #CmonAussie #CWC19",
"source": "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>",
"truncated": false,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"user": {
"id": 252426781,
"id_str": "252426781",
"name": "Willy Aitch",
"screen_name": "WillyAitch",
"location": "Melbourne, Victoria",
"url": null,
"description": "September 2017 to February 2018, was the greatest 5 months ever. Richmond \ud83d\udc2f\ud83d\udc2f\ud83d\udc2fwon the 2017 AFL Premiership! Philadelphia Eagles \ud83e\udd85\ud83e\udd85\ud83e\udd85 won Super Bowl LII",
"translator_type": "none",
"protected": false,
"verified": false,
"followers_count": 417,
"friends_count": 1061,
"listed_count": 15,
"favourites_count": 18852,
"statuses_count": 17796,
"created_at": "Tue Feb 15 04:55:59 +0000 2011",
"utc_offset": null,
"time_zone": null,
"geo_enabled": true,
"lang": null,
"contributors_enabled": false,
"is_translator": false,
"profile_background_color": "C0DEED",
"profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png",
"profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png",
"profile_background_tile": false,
"profile_link_color": "1DA1F2",
"profile_sidebar_border_color": "C0DEED",
"profile_sidebar_fill_color": "DDEEF6",
"profile_text_color": "333333",
"profile_use_background_image": true,
"profile_image_url": "http://pbs.twimg.com/profile_images/1112669591342211072/rnbV0dCK_normal.jpg",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/1112669591342211072/rnbV0dCK_normal.jpg",
"profile_banner_url": "https://pbs.twimg.com/profile_banners/252426781/1522377977",
"default_profile": true,
"default_profile_image": false,
"following": null,
"follow_request_sent": null,
"notifications": null
},
"geo": null,
"coordinates": null,
"place": {
"id": "01864a8a64df9dc4",
"url": "https://api.twitter.com/1.1/geo/id/01864a8a64df9dc4.json",
"place_type": "city",
"name": "Melbourne",
"full_name": "Melbourne, Victoria",
"country_code": "AU",
"country": "Australia",
"bounding_box": {
"type": "Polygon",
"coordinates": [
[
[
144.593742,
-38.433859
],
[
144.593742,
-37.511274
],
[
145.512529,
-37.511274
],
[
145.512529,
-38.433859
]
]
]
},
"attributes": {}
},
"contributors": null,
"is_quote_status": false,
"quote_count": 0,
"reply_count": 0,
"retweet_count": 0,
"favorite_count": 0,
"entities": {
"hashtags": [
{
"text": "AUSvENG",
"indices": [
46,
54
]
},
{
"text": "CmonAussie",
"indices": [
55,
66
]
},
{
"text": "CWC19",
"indices": [
67,
73
]
}
],
"urls": [],
"user_mentions": [],
"symbols": []
},
"favorited": false,
"retweeted": false,
"filter_level": "low",
"lang": "en",
"timestamp_ms": "1561495474599"
}
【439】Tweets processing by Python
原文:https://www.cnblogs.com/alex-bn-lee/p/11576335.html