002.08 新聞搜索 PySimpleGUI + News API
主题: 002.08 新闻搜索 PySimpleGUI + News API
建檔日期: 2019/12/15
更新日期: None
相关软件信息:
Win 10 | PYthon 3.7.2 | PySimpleGUI 4.11.0 | newsapi 0.1.1 |
说明: 所有内容欢迎引用, 只需注明来源及作者, 本文内容如有错误或用词不当, 敬请指正.
002.08 新闻搜索 PySimpleGUI + News API
最近看了一个提供超过30,000新闻来源的包, 为了方便自己搜索实时新闻, 写了一个简单的软件, 提供查询一个月以内(免费用户的限制)的相闗新闻简单说明, 再进一步到原新闻来源看完整的新闻内容.
1. 软件内容:
- 可选择语言类别, 目前只提供阿拉伯文,中文,荷兰文,英文,法语,德语,希伯来语,意大利语,北萨米语,挪威语,葡萄牙语,俄语,西班牙语,瑞典语.
- 可选择起始日期到结束日期.
- 在文章标题和正文中搜索的关键字或短语。
- 这里支持高级搜索:
- 用双引号(“)括住短语以精确匹配。
- 必须带有+符号的单词或短语。 例如:+比特币
- 不能带有-符号的单词。 例如:-bitcoin
- 可以使用AND / OR / NOT关键字,
- 可选地将这些内容用括号分组, 例如:crypto AND (ethereum OR litecoin) NOT bitcoin。
- 日期:免费用户只能选择不超过一个月的日期
- 速度:网页数据加载的page_size越大,速度越慢。现在将其设置为100(最大),不要着急,请稍等片刻。
您可以将其更改为较小的数字,例如20。 - URL:单击每个新闻的标题以浏览源URL。
2. 主要包PySimpleGUI以及newsapi的简单说明
PySimple部份:
建立窗口基本如下import PySimleGUI as sg layout = [[第一行元素(..., key='key1'), ....], [第二行元素(...,key='key2'), ....], ...., [第N行元素(...,key='keyN'), .....]] window = sg.Windows('标题', layout=layout, ....其他参数) while True: event, values = window.read() if event == None: break if event =='key1': do something if event =='key2': do something window.close()
- 元素基本上类似tkinter的部件, 为了便于使用, 只会有一些简单必要的参数, 所以如果有特殊要求, 那就是另一回事了.
- 窗口布局以layout来表示, 有些元素还可以再建layout
- ‘Key’用来在事件产生时, 代表元素(tkinter中称为部件, 主要是避免混淆)
- 所有事件以window.read()读取
newsapi部份:
from newsapi import NewsApiClient newsapi = NewsApiClient(api_key='1a8f46f807c44af9b261fae6ae659963') top_headlines = newsapi.get_top_headlines(q='bitcoin', sources='bbc-news,the-verge', category='business', language='en', country='us') all_articles = newsapi.get_everything(q='bitcoin', sources='bbc-news,the-verge', domains='bbc.co.uk,techcrunch.com', from_param='2017-12-01', to='2017-12-12', language='en', sort_by='relevancy', page=2) sources = newsapi.get_sources()
- 建立客户端类 NewsApiClient()
- 使用唯有的三个方法: get_top_headlines(), get_everything() 以及 newsapi.get_sources
- get_top_headlines():提供实时的头条新闻和重要新闻.
- get_everything(): 搜索来自30,000多个大型和小型新闻来源和博客的数百万篇文章
- newsapi.get_sources(): 可用于跟踪可用的发布者,并且可以将其直接传递给用户。
3. 输出画面
4. 代码
注意: 代码中有一行my_api_key = ‘xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx’, 这是授权码, 可以到newsapi网站上申请取得.
#!/usr/bin/python
'''
Search worldwide news with PySimpleGUI code & news API
Get breaking news headlines, and search for articles from over 30,000 news
sources and blogs with news API. News API is a simple and easy-to-use API
that returns JSON metadata for headlines and articles live all over the web
right now.
'''
import PySimpleGUI as sg
from tkinter import font as FONT
from newsapi import NewsApiClient
from PIL import Image
from io import BytesIO
import requests
import _thread
import webbrowser
import datetime
import dateutil.relativedelta
import base64
import ctypes
import os
class News():
'''
News class: Capture news by newsapi and load photo from souce web sites
'''
def __init__(self, text):
self.date = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
self.coding = 'utf-8'
self.text = text
self.stop = False
self.raw_data = self.read()
self.length = len(self.raw_data)
self.data = self.convert()
self.width = [0 for i in range(self.length)]
self.height = [0 for i in range(self.length)]
self.base64 = [0 for i in range(self.length)]
self.photo = []
self.where = 0
def convert(self):
# Convert raw data structure to my data structure
if self.length == 0: return []
result = [{} for i in range(self.length)]
for i in range(self.length):
for key, value in self.raw_data[i].items():
if key == 'source': value = self.raw_data[i]['source']['name']
if value == None: value = ''
new = [('<b>', ''), ('</b>', ''), ('\n', ' '), ('’',"'"),
('”',"'"), ('“',"'")]
mrep = lambda s, d: s if not d else mrep(s.replace(*d.pop()), d)
value = mrep(value, new)
value = value
result[i][key] = value
return result
def read(self):
# Load news from newapi web site
try:
newsapi = NewsApiClient(api_key=my_api_key)
result = newsapi.get_everything(
q=self.text,
language=Language[default],
page_size=page_size,
from_param=start, to=stop)
except:
sg.popup('Server link failed !')
return []
if result['status'] != 'ok':
sg.popup('Server link failed !')
return []
return result['articles']
def update(self):
# Update photos by call thread
if self.length == 0: return
for i in range(self.length):
# self.image(i) # Slow, but safe
_thread.start_new_thread(self.image, (i,)) # Quick, but bug
def image(self, i):
# Draw image on window canvas
if self.stop:
return
if not self.load(i): # load photo from web site by URL
ids = draw.DrawText('X',
(gap*2+int(im_w/2), canv_h-gap*2-int(im_h/2)-i*(gap*3.5+im_h)),
color='white', font=font)
return
offset = i*(gap*3.5+im_h)
ids = draw.DrawImage(data=self.base64[i],
location=(gap*2+(im_w-self.width[i])/2,
canv_h-gap*2.5-(im_h-self.height[i])/2-offset-self.where))
news.photo.append(ids)
return
def load(self, i):
# load, resize and convert to base64
url = self.data[i]['urlToImage']
if url == '': return False
try:
response = requests.get(url)
if response.status_code != requests.codes.ok:
return
im = Image.open(BytesIO(response.content))
except:
print('Failed: request/status code/open', url)
return False
if im.width==0 or im.height==0:
return False
im = im.convert(mode='RGBA')
if im.width*ratio >= im.height:
self.width[i], self.height[i] = im_w, int(im.height*im_w/im.width)
else:
self.width[i], self.height[i] = int(im.width*im_h/im.height), im_h
im = im.resize((self.width[i], self.height[i]), resample=Image.LANCZOS)
buffered = BytesIO()
im.save(buffered, format="PNG")
self.base64[i] = base64.b64encode(buffered.getvalue())
return True
def wheel(event):
# Mouse wheel event handler
delta = int(event.delta/2)
limit = -total_length+canv_h
if delta < 0:
if news.where+delta <= limit:
delta = limit - news.where
news.where = limit
else:
news.where += delta
elif delta > 0:
if news.where+delta >= 0:
delta = -news.where
news.where = 0
else:
news.where += delta
draw.Move(0, -delta)
def split(txt):
# Split text for space, ASCII string, non-Unicode char into list
txt = txt.strip()
if txt is '':
return []
result = []
string = ''
for i in range(len(txt)):
if txt[i] in [' ', '\n','\r']:
if string is not '':
result.append(string)
result.append(' ')
string = ''
elif txt[i] in ASCII:
string += txt[i]
else:
if string is not '':
result.append(string)
result.append(txt[i])
string = ''
if string != '':
result.append(string)
return result
def wrap(txt, dist, lines_limit):
# Wrap string by add '\n' into string for pixel width limit
if txt is '':
return '', 1
tmp = split(txt)
old_string = ''
string = ''
result = ''
length = len(tmp)
len_1 = length - 1
lines = 0
for i in range(length):
string += tmp[i]
if s.measure(string) > dist:
result += old_string + '\n'
lines += 1
if tmp[i] is ' ':
string = old_string = ''
else:
string = old_string = tmp[i]
else:
old_string = string
if lines == lines_limit:
old_string = ''
break
if old_string is not '':
result += old_string
lines += 1
return result
def Layout():
# Window main Layout
layout = [[sg.Text('Language', font=font, pad=((40,0),0)),
sg.Combo(values=language, default_value=default, size=(20,1),
enable_events=True, key='Combo', readonly=True, font=font),
sg.CalendarButton(start, size=(12,1), target='date1',
key='date1', format=date_fmt, font=font),
sg.CalendarButton(stop, size=(12,1), target='date2',
key='date2', format=date_fmt, font=font),
sg.Text('Key Words', font=font, pad=((5,0),0)),
sg.InputText(size=(50,1), font = font, pad=((10,0),0),
do_not_clear=True, focus=True)]]
return layout
def update_window():
global draw, total_length
# Update window when new search
global s
s = FONT.Font(family='Segoe', size=16)
if news.length == 0:
sg.popup('No news found or server failed')
return None
total_length = (3.5*gap+im_h)*news.length+gap
layout = Layout() + [[sg.Graph(canvas_size=(canv_w, canv_h), key='Graph',
graph_bottom_left=(0,0), graph_top_right=(win_w, win_h),
enable_events=True)]]
window = sg.Window('News Center', layout=layout, finalize=True,
return_keyboard_events=True)
draw = window['Graph']
for i in range(news.length):
# Each News
title = wrap(str(i+1)+'. '+news.data[i]['title'], title_w, title_h)
# Wrap description by desc_width
if news.data[i]['description'] is '':
desc = 'No description...'
else:
desc = wrap(news.data[i]['description'], desc_w, desc_h)
offset = i*(gap*3.5+im_h)
draw.DrawRectangle((gap, canv_h-gap-offset),
(canv_w-gap, canv_h-gap*3.5-im_h-offset), line_color='grey',
line_width=1)
draw.DrawRectangle((gap*2, canv_h-offset-gap+16),
(canv_w-gap*2, canv_h-offset-gap-16), line_color='green',
fill_color='green')
draw.DrawText(title, (gap*2+12, canv_h-int(gap/2)-offset),
color='white', font=font, text_location='n'+'w')
draw.DrawText(desc, (gap*3+im_w, canv_h-gap*2.5-offset),
color='white', font=font, text_location='n'+'w')
window['Graph'].Widget.bind('<MouseWheel>', wheel)
return window
ctypes.windll.user32.SetProcessDPIAware() # Set unit of GUI to pixels
# Usable option of Language for free user
Language = {'Arabic':'ar', 'Chinese':'zh', 'Dutch':'nl', 'English':'en',
'French':'fr', 'German':'de', 'Hebrew':'he', 'Italian':'it',
'Northern Sami':'se', 'Norwegian':'no', 'Portuguese':'pt',
'Russian':'ru', 'Spanish':'es', 'Swedish':'sv'}
language = list(Language.keys())
language.sort()
ASCII = [chr(i) for i in range(256)]
font = 'Segoe 16'
pad = 20
default = 'English'
date_fmt = '%Y-%m-%d'
now = datetime.datetime.now()
stop = now.strftime(date_fmt)
start = (now + dateutil.relativedelta.relativedelta(months=-1))
start = start.strftime(date_fmt)
month = start
page_size = 100 # 100 Max, more page_size, more slow
win_w = 1620
win_h = 720
im_w = 326
im_h = 145
ratio = im_h/im_w
canv_w = win_w
canv_h = win_h
gap = 25
title_w = canv_w - 4*gap - 12
title_h = 1
desc_w = canv_w - 5*gap - im_w
desc_h = 5
# You can get your API-Key on https://newsapi.org/register
my_api_key = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
help = '''
Keywords or phrases to search for in the article title and body.
Advanced search is supported here:
► Surround phrases with quotes (") for exact match.
► Prepend words or phrases that must appear with a + symbol. Eg: +bitcoin
► Prepend words that must not appear with a - symbol. Eg: -bitcoin
► Alternatively you can use the AND / OR / NOT keywords,
and optionally group these with parenthesis.
Eg: crypto AND (ethereum OR litecoin) NOT bitcoin.
Date: Should be selected not more than one month before now for free user.
Speed: It will be more slower as higer page_size for web data load.
Now it is set to 100 (Max), not hurry, just wait a moment.
You can change it to smaller number, like 20.
URL: Click on title of each news to browse source URL.
'''
sg.change_look_and_feel('DarkBrown2')
layout = Layout() + [[sg.Graph(canvas_size=(canv_w, canv_h), key='Graph',
graph_bottom_left=(0,0), graph_top_right=(win_w, win_h))]]
window = sg.Window('News Center', layout=layout, finalize=True,
return_keyboard_events=True)
draw = window['Graph'].DrawText(help, (canv_w/2, canv_h/2),
color='white', font=font)
while True:
event, values = window.read()
# Window Close
if event == None:
break
# Search Starting by Enter key pressed
if event == '\r':
if len(values[0])!=0:
# Update date information, free user limited in 1-month news
new_start = window['date1'].GetText()
new_stop = window['date2'].GetText()
start = new_start if new_start >= month else start
stop = new_stop if new_stop >= month else stop
if stop < start:
start, stop = stop, start
layout1 = []
news = News(values[0])
news.stop = True
window1 = update_window()
if window1 != None:
window.close()
window = window1
news.stop=False
news.update()
if event=='Graph':
# News link clicked, transfer to web browser
dist = (canv_h-values['Graph'][1]-news.where)
off = dist % (3.5*gap+im_h) - gap
index = int(dist / (3.5*gap+im_h))
if ((-16<=off<=16) and (2*gap<=values['Graph'][0]<=canv_w-2*gap)
and (index < news.length)):
webbrowser.open(news.data[index]['url'])
if event == 'Combo':
# Set default value to selection
default = values['Combo']
window.close()
本作品采用《CC 协议》,转载必须注明作者和本文链接