import jieba
word = "2012年我創立了一間公司 來教女孩寫程式"
seg_list = jieba.cut(word, cut_all=True)
print("Full Mode: " + "/ ".join(seg_list))
seg_list = jieba.cut_for_search(word)
print("Search Mode: " + "/ ".join(seg_list))
Building prefix dict from the default dictionary ... Loading model from cache /var/folders/54/vtjvmkpn2fq48gjfxh1xl3hr0000gn/T/jieba.cache Loading model cost 1.257 seconds. Prefix dict has been built succesfully.
Full Mode: 2012/ 年/ 我/ 創/ 立/ 了/ 一/ 間/ 公司/ / / 來/ 教女/ 女孩/ 寫/ 程式 Search Mode: 2012/ 年/ 我/ 創立/ 了/ 一間/ 公司/ / 來教/ 女孩/ 寫/ 程式
試試看,怎麼使用 jieba 把切出來的詞標註詞性
LINE Notify 登入
=> 個人頁面
=> 發行權杖
=> 選擇 「透過1對1聊天接收LINE Notify的通知」或是任何已存在群組
=> 發行
=> 複製權杖
權杖只會出現一次,請記得複製、保存好
import requests
token = "你的權杖"
msg = "用 Python 發 LINE Notify 通知"
url = "https://notify-api.line.me/api/notify"
headers = {
"Authorization": "Bearer " + token,
"Content-Type" : "application/x-www-form-urlencoded"
}
payload = {'message': msg}
r = requests.post(url, headers = headers, params = payload)
我們先點開來看看這個連結 http://dcard.tw/_api/forums ,
可以看到 Dcard 所有的看板(看板列表)
json.loads(字串)
:json格式字串 -> python字典型態json.dumps(字典)
:python字典型態 -> json格式字串輔助工具:Json Parser Online
import requests
import json
url = "https://www.dcard.tw/_api/forums"
rep = requests.get(url) # 回傳的Response物件,包含Header、網頁原始碼
html = rep.text # Response物件,網頁原始碼的部分
json_data = json.loads(html)
print (json_data[0]['name'])
午夜實驗室
這個是現在網站常用的開發方式:
後端單純將資料傳出,前端再負責呈現畫面,
因此找到後端跟前端溝通的網址,就可以只要處理資料,
不需要理會網頁畫面看到的圖片配色版型...etc.
這個網址我們常稱為「API」。
看板列表 | http://dcard.tw/_api/forums |
看板文章列表 | http://dcard.tw/_api/forums/{看板名稱}/posts |
文章內文 | http://dcard.tw/_api/posts/{文章編號} |
文章內引用連結 | http://dcard.tw/_api/posts/{文章編號}/links |
文章內留言 | http://dcard.tw/_api/posts/{文章編號}/comments |
fo = open("test1.txt","w")
fo.write("Hello")
fo.close()
# 非文字檔要在後面加上 "b"
import requests
img_src = "http://4.bp.blogspot.com/-6HCy6DZdqX4/U_3dySRjKPI/AAAAAAAAclI/5e4V6d7t56E/s1600/Photos4.jpg"
img_response = requests.get(img_src)
img = img_response.content
fo = open("image.jpg","wb")
fo.write(img)
fo.close()
fi = open("image.jpg","rb")
fo = open("image_copy.jpg","wb")
content = fi.read()
fo.write(content)
fi.close()
fo.close()
### 資料夾處理
import os, shutil
print(os.listdir())
for name in os.listdir():
print(os.path.isfile(name),os.path.isdir(name),name)
dir_name = "my_dir"
if dir_name not in os.listdir():
os.makedirs(dir_name)
else:
shutil.rmtree(dir_name)
print(os.listdir())
['02_conditional.slides.html', '.DS_Store', 'test1.txt', '02_conditional.ipynb', 'images', '04_loop.ipynb', '07_v2_applications.ipynb', '01_basicObject_IO.slides.html', '07_v2_applications.slides.html', '06_onlinejudge_doc.ipynb', '07_applications.slides.html', 'image_copy.jpg', '08_resources.ipynb', '08_resources.slides.html', '01_basicObject_IO.ipynb', '05_dictionary.ipynb', '07_applications.ipynb', '03_list.slides.html', '00_intro.ipynb', '06_onlinejudge_doc.slides.html', 'photo_dir', '00_intro.slides.html', '03_list.ipynb', '.ipynb_checkpoints', '04_loop.slides.html', 'reveal.js', '05_dictionary.slides.html', 'image.jpg'] True False 02_conditional.slides.html True False .DS_Store True False test1.txt True False 02_conditional.ipynb False True images True False 04_loop.ipynb True False 07_v2_applications.ipynb True False 01_basicObject_IO.slides.html True False 07_v2_applications.slides.html True False 06_onlinejudge_doc.ipynb True False 07_applications.slides.html True False image_copy.jpg True False 08_resources.ipynb True False 08_resources.slides.html True False 01_basicObject_IO.ipynb True False 05_dictionary.ipynb True False 07_applications.ipynb True False 03_list.slides.html True False 00_intro.ipynb True False 06_onlinejudge_doc.slides.html False True photo_dir True False 00_intro.slides.html True False 03_list.ipynb False True .ipynb_checkpoints True False 04_loop.slides.html False True reveal.js True False 05_dictionary.slides.html True False image.jpg ['02_conditional.slides.html', '.DS_Store', 'test1.txt', '02_conditional.ipynb', 'images', '04_loop.ipynb', '07_v2_applications.ipynb', '01_basicObject_IO.slides.html', '07_v2_applications.slides.html', '06_onlinejudge_doc.ipynb', '07_applications.slides.html', 'image_copy.jpg', '08_resources.ipynb', '08_resources.slides.html', '01_basicObject_IO.ipynb', '05_dictionary.ipynb', '07_applications.ipynb', '03_list.slides.html', '00_intro.ipynb', '06_onlinejudge_doc.slides.html', 'photo_dir', '00_intro.slides.html', '03_list.ipynb', '.ipynb_checkpoints', '04_loop.slides.html', 'reveal.js', '05_dictionary.slides.html', 'my_dir', 'image.jpg']
### 檔案操作
import os, shutil
filename1 = "test1.txt"
filename2 = "test2.txt"
filename3 = "test3.txt"
filename4 = "test4.txt"
filename5 = "test5.txt"
del_list = [filename3,filename4]
print(del_list)
# 前面已經有建立一個 test1.txt 檔案
if filename1 in os.listdir():
shutil.copyfile(filename1,filename2)
shutil.copyfile(filename1,filename3)
shutil.copyfile(filename1,filename4)
if filename1 in os.listdir():
shutil.move(filename2,filename5)
for name in os.listdir():
if name in [filename3,filename4]:
os.remove(name)
['test3.txt', 'test4.txt']
from PIL import Image
# 再用 PIL 的 Image 處理
# 前面已經有建立一個 image.jpg 檔案
image = Image.open("image.jpg")
width = int(image.size[0])
height = int(image.size[1])
print(width,height)
# resize
image.thumbnail((800,800))
image.save("image_resize.jpg", 'JPEG', quality=90)
width = int(image.size[0])
height = int(image.size[1])
print(width,height)
1600 1067 800 533
import requests,os
### 讀取網頁內容
url = "https://www.dcard.tw/f/pet/p/232135806"
response = requests.get(url)
html = response.text
### 建立資料夾
dir_name = "photo_dir"
if dir_name not in os.listdir():
os.makedirs(dir_name)
### 把圖片網址解析出來
for temp in html.split("<img"):
line = temp.split("/>")[0]
if ("src=" in line):
img_src = line.replace("\'","\"").split("src=\"")[-1].split("\"")[0]
if ( (".jpeg" in img_src) or (".jpg" in img_src)
or (".JPG" in img_src) or (".png" in img_src) ) :
### 抓取圖片
img_response = requests.get(img_src)
img_binary = img_response.content
### 建立圖片檔案
filename=img_src.split("/")[-1]
filepath= "{}/{}".format(dir_name,filename)
fo = open(filepath,"wb")
fo.write(img_binary)
fo.close()