吳佳鑫
社會觀察者、社會企業家、教育創新者、遊戲人、媒體人、廚師,小人物與您分享眼見耳聞的生活大小事。
Anything will understand slightly a spot, life more color spots
pip install beautifulsoup4
pip install requests
pip install lxml
pip install html5lib
import requestsfrom bs4 import BeautifulSoupurl = "http://127.0.0.1/py"response = requests.get(url)html_doc = response.textsoup = BeautifulSoup(html_doc, 'html.parser')print(soup.prettify())
import requestsfrom bs4 import BeautifulSoupwith open('index.htm') as f:html_doc = f.read()soup = BeautifulSoup(html_doc, 'html.parser')print(soup.prettify())
print(soup.head.title.string)
print(soup.find_all('a'))
for link in soup.find_all('a'):print(link.get('href'))
print(soup.find(id="a1"))
print(soup.get_text())
pip install selenium
# coding: utf-8from bs4 import BeautifulSoupfrom selenium import webdriverimport datetime#交通部氣象局 - 台中市url = 'https://www.cwb.gov.tw/V8/C/W/OBS_County.html?ID=66'#啟動模擬瀏覽器driver = webdriver.Chrome()#取得網頁driver.get(url)#指定 lxml 作為解析器soup = BeautifulSoup(driver.page_source, features='lxml')#<tbody id='stations'>tbody = soup.find('tbody',{'id':'stations'})#<tbody>内所有<tr>標籤trs = tbody.find_all('tr')#print(trs)#使用datetime取得 年year = str(datetime.datetime.now().year)#對list中的每一項 <tr>for tr in trs:#<tr>內的<th>標籤th = tr.th#取得下個標籤內的文字name = th.next_element.textif not name == '儀器故障':date = th.nextSibling.texttds = tr.find_all('td')#print(len(tds))if len(tds)>2 :temp = tds[1].textelse:temp = '-'print(name, temp, date)else:print(name, '儀器故障')#關閉模擬瀏覽器driver.quit()
from bs4 import BeautifulSoupfrom selenium import webdriverfrom selenium.webdriver.common.by import Byurl="https://mis.taifex.com.tw/futures/RegularSession/EquityIndices/FuturesDomestic"#啟動模擬瀏覽器driver = webdriver.Chrome()#取得網頁代馬driver.get(url)#btn = driver.find_element(By.CLASS_NAME, 'btn')btn = driver.find_element(By.TAG_NAME, 'button')btn.click()#指定 lxml 作為解析器soup = BeautifulSoup(driver.page_source, features='lxml')#print(soup)print(soup.prettify())table = soup.find('tbody')print(table)#關閉模擬瀏覽器driver.quit()
btn = driver.find_element_by_class_name("btn")
btn = driver.find_element(By.CLASS_NAME, 'btn')
<tbody></tbody>
# 下載資料套件import requests as r# 資料處理套件import pandas as pdfrom bs4 import BeautifulSoupurl = "https://mis.taifex.com.tw/futures/api/getQuoteList"payload = {"MarketType":"0","SymbolType":"F","KindID":"1","CID":"TXF","ExpireMonth":"","RowSize":"全部","PageNo":"","SortColumn":"","AscDesc":"A"}res = r.post(url, json = payload)data = res.json()df = pd.DataFrame(data['RtData']['QuoteList'])df = df[["DispCName", "Status", "CBidPrice1", "CBidSize1", "CAskPrice1", "CAskSize1", "CLastPrice", "CDiff", "CAmpRate", "CTotalVolume", "COpenPrice", "CHighPrice", "CLowPrice", "CRefPrice", "CTime"]]df.columns = ['商品', '狀態', '買進', '買量', '賣出', '賣量', '成交價', '漲跌', '振幅%', '成交量', '開盤', '最高', '最低', '參考價', '時間']df.to_csv('futures_regular_trading.csv')print(df)
pip install -U scikit-learnpip install seaborn
import sklearnimport seaborn as snsimport matplotlib.pyplot as pltimport numpy as npiris = sns.load_dataset('iris')iris.head()print(iris.head())sns.set()sns.pairplot(iris, hue='species', height=3);plt.show()
from sklearn import treefeatures = [[155,1],[160,0],[165,1],[172,0]]labels = [1,0,1,0]clf = tree.DecisionTreeClassifier() #分類clf = clf.fit(features,labels) #分類模型wantPredict = clf.predict([[158,1]]) #預測if wantPredict == [1]:print('This is man')elif wantPredict == [0]:print('This is woman')
from sklearn import datasetsfrom sklearn.model_selection import train_test_splitfrom sklearn.neighbors import KNeighborsClassifierimport numpy as npiris = datasets.load_iris() #載入資料#定義特徵iris_data = iris.datairis_label = iris.targetprint(iris_data[0:3])#將資料拆成 訓練資料與測試資料train_data , test_data , train_label , test_label = train_test_split(iris_data,iris_label,test_size=0.2)#使用KNeighbors分類knn = KNeighborsClassifier()#訓練資料knn.fit(train_data,train_label)#預測資料print(knn.predict(test_data))#驗證資料print(test_label)
from sklearn.linear_model import LinearRegressionimport numpy as np#建立資料x = np.array([1,2,3,4,5])y = np.array([5,7,9,11,13])#建立模型reg = LinearRegression()#訓練模型reg.fit(x.reshape(-1, 1), y)#預測print(reg.predict([[6.0]]))
y = 2x + 3
from sklearn.linear_model import LinearRegressionimport matplotlib.pyplot as pltfrom sklearn import datasetsimport numpy as np#建立資料x,y = datasets.make_regression(n_samples=200,n_features=1,n_targets=1,noise=10)plt.scatter(x,y,linewidths=0.1)plt.show()#建立模型reg = LinearRegression()#訓練模型reg.fit(x,y)predict = reg.predict(x[:200,:])plt.plot(x,predict,c="red")plt.scatter(x,y)plt.show()#預測print(reg.predict(np.array([6]).reshape(1,-1)))print(reg.predict([[6]]))
from sklearn.svm import SVC#建立資料x = [[1,2], [3,4], [5,6], [7,8]]y = [0,1,1,0]#建立模型clf = SVC()#訓練模型clf.fit(x, y)#預測print(clf.predict([[6,7]]))
from sklearn import preprocessing #特徵標準化import numpy as npfrom sklearn.model_selection import train_test_split #cross_validation 舊版 新版用model_selectionfrom sklearn.datasets import make_classificationfrom sklearn.svm import SVCimport matplotlib.pyplot as plt#產生 300筆資料,2個特徵,random_state用來保證每次生成的數據集相同。x,y = make_classification(n_samples=300,n_features=2,n_redundant=0,n_informative=2,random_state=3,scale=100,n_clusters_per_class=1)plt.scatter(x[:,0],x[:,1],c=y)plt.show()#SVC分類進行訓練x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2)clf = SVC()clf.fit(x_train,y_train)print(clf.score(x_test,y_test))#標準化x = preprocessing.scale(x)x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2)clf = SVC()clf.fit(x_train,y_train)print(clf.score(x_test,y_test))
from sklearn import datasetsfrom sklearn.model_selection import KFoldfrom sklearn.svm import SVCiris = datasets.load_iris()X = iris.datay = iris.targetkf = KFold(n_splits=5)for train_index, test_index in kf.split(X):X_train, X_test = X[train_index], X[test_index]y_train, y_test = y[train_index], y[test_index]clf = SVC()clf.fit(X_train, y_train)print(clf.score(X_test, y_test))
from sklearn.model_selection import cross_val_scorefrom sklearn import datasetsfrom sklearn.model_selection import train_test_splitfrom sklearn.neighbors import KNeighborsClassifierimport numpy as npimport matplotlib.pyplot as pltiris = datasets.load_iris()X = iris.datay = iris.targetknn = KNeighborsClassifier(n_neighbors=10)scores = cross_val_score(knn,X,y,cv=5,scoring='accuracy')print(scores)print(scores.mean())k_range = range(1,31)k_scores = []for k_number in k_range:knn = KNeighborsClassifier(n_neighbors=k_number)scores = cross_val_score(knn,X,y,cv=10,scoring='accuracy')k_scores.append(scores.mean())plt.plot(k_range,k_scores)plt.xlabel('Value of K for KNN')plt.ylabel('Cross-Validated Accuracy')plt.show()
pip install joblib
from sklearn import datasetsfrom sklearn.linear_model import LogisticRegressionfrom sklearn.model_selection import train_test_splitimport joblibiris = datasets.load_iris()X = iris.datay = iris.targetX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)# 訓練模型clf = LogisticRegression()clf.fit(X_train, y_train)# 儲存模型joblib.dump(clf, 'model.joblib')# 載入模型loaded_model = joblib.load('model.joblib')print(loaded_model.score(X_test, y_test))
import tensorflow as tffrom tensorflow import keras# 建立模型model = keras.Sequential()model.add(keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))model.add(keras.layers.MaxPooling2D((2, 2)))model.add(keras.layers.Conv2D(64, (3, 3), activation='relu'))model.add(keras.layers.MaxPooling2D((2, 2)))model.add(keras.layers.Conv2D(64, (3, 3), activation='relu'))model.add(keras.layers.Flatten())model.add(keras.layers.Dense(64, activation='relu'))model.add(keras.layers.Dense(10, activation='softmax'))# 編譯模型model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])# 載入 CIFAR-10 資料集(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()x_train = x_train / 255.0x_test = x_test / 255.0# 訓練模型model.fit(x_train, y_train, epochs=10, validation_split=0.1)# 評估模型test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)print(test_acc)