python 讀取 webcam 並置於最上層
請先安裝 pip install opencv-python
import cv2
# 開啟 webcam(0 代表預設攝影機)
cap = cv2.VideoCapture(0) # 在樹苺派5用這行
#cap = cv2.VideoCapture(0, cv2.CAP_DSHOW) # 在 Windows 用這行 (會使用 DirectShow)
# 設定視窗名稱
window_name = "Webcam Feed - press q to quit"
# 建立視窗
cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) # 使用者可以調整大小
cv2.resizeWindow(window_name, 160, 120) # 先給一個預設大小 160x120
# 設定視窗置頂
cv2.setWindowProperty(window_name, cv2.WND_PROP_TOPMOST, 1)
while True:
# 讀取攝影機畫面
ret, frame = cap.read()
if not ret:
break
# 顯示影像
cv2.imshow(window_name, frame)
# 按 'q' 鍵離開
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# 釋放攝影機資源
cap.release()
cv2.destroyAllWindows()
python 多執行緒
import threading
import time
i=0
def print_every_three_seconds():
global i
while True:
if i>5:
print("執行緒1 stopped")
break
print("執行緒1: 每 3 秒執行一次=="+str(i))
time.sleep(3)
def print_every_one_second():
global i
i=i+1
while True:
if i>5:
print("執行緒2 stopped")
break
print("執行緒2: 每 1 秒執行一次--"+str(i))
time.sleep(1)
# 創建兩個執行緒
t1 = threading.Thread(target=print_every_three_seconds)
t2 = threading.Thread(target=print_every_one_second)
# 啟動執行緒
t1.start()
t2.start()
# 讓主執行緒保持運行
t1.join()
t2.join()
print('兩個執行緒都停下來了')
運用 python 讓電腦即時監聽,錄音,辨識成文字。當聽到 再見 時,程式就會停下來。----語音助理前哨站
import threading
import time
import pyaudio
import numpy as np
import wave
import torch
import whisper # 這裡會用到 ffmpeg,需要先安裝。
from opencc import OpenCC
iIndexWrite=0
iIndexRead=0
iCount=0
iMax=10
booStop=False
def getFDecibel(oAudioData):
oAudioData=oAudioData.astype(np.float32) # 轉換為浮點數,避免整數溢出
fRMS=np.sqrt(np.mean(np.square(oAudioData))) # 計算rms
if fRMS>0:
fDecibel=20*np.log10(fRMS+1e-10) # 避免 log(0) 錯誤
else:
fDecibel=-np.inf
return fDecibel
def getStrAduioFilename(iIndex):
return "rec_"+str(iIndex)+".wav"
def threadA():
global iIndexWrite, iIndexRead, iCount, iMax, booStop
CHUNK=1024 # 單次讀取的樣本數
FORMAT=pyaudio.paInt16 # 音訊格式(16-bit)
CHANNELS=1 # 單聲道
RATE=16000 # 取樣率(Hz)
THRESHOLD_DB=70 # 觸發錄音的分貝閥值
SILENCE_DURATION=1 # 安靜維持幾秒後停止錄音
DROP_DURATION=2 # 長度不足就丟棄
CUT_DURATION=30 # 超過就截斷
oAudio=pyaudio.PyAudio()
oStream=oAudio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
booRecording=False
oFrames=[]
fSilenceStart=None
print("開始聆聽")
try:
while booStop==False:
# 讀取音訊數據
oAudioData=np.frombuffer(oStream.read(CHUNK, exception_on_overflow=False), dtype=np.int16)
fDecibel=getFDecibel(oAudioData)
# print(f"目前分貝數:{fDecibel:.2f} dB")
if fDecibel>THRESHOLD_DB:
fSilenceStart=None # 重置靜音計時
if booRecording==False and fDecibel>THRESHOLD_DB:
print("偵測到聲音,開始錄音...")
booRecording=True
oFrames=[]
oFrames.append(oAudioData.tobytes())
elif booRecording==True:
oFrames.append(oAudioData.tobytes())
fSeconds=len(oFrames)*CHUNK/RATE
if fSilenceStart is None:
fSilenceStart=time.time() # 開始計算靜音時間
elif ((time.time()-fSilenceStart)>=SILENCE_DURATION) or (fSeconds>CUT_DURATION):
if fSeconds<DROP_DURATION:
print("不足"+str(DROP_DURATION)+"秒,不予儲存")
else:
if fSeconds>CUT_DURATION:
print("超過"+str(CUT_DURATION)+"秒,截斷錄音。("+str(fSeconds)+")")
else:
print("偵測到靜音,停止錄音。("+str(fSeconds)+")")
if iCount<iMax:
strFilename=getStrAduioFilename(iIndexWrite)
print(f"正在儲存錄音檔案:{strFilename}")
oWF=wave.open(strFilename, 'wb')
oWF.setnchannels(CHANNELS)
oWF.setsampwidth(oAudio.get_sample_size(FORMAT))
oWF.setframerate(RATE)
oWF.writeframes(b''.join(oFrames))
oWF.close()
print("錄音儲存完成!")
iIndexWrite=(iIndexWrite+1) % iMax
iCount=iCount+1
else:
print("空間不足,不予儲存")
booRecording=False
oFrames=[]
fSilenceStart=None
except KeyboardInterrupt:
booStop=True
print("手動結束聆聽")
# 停止並關閉音訊流
oStream.stop_stream()
oStream.close()
oAudio.terminate()
def threadB():
global iIndexWrite, iIndexRead, iCount, iMax, booStop
strDevice="cuda" if torch.cuda.is_available() else "cpu"
booCuda=True if strDevice=="cuda" else False
oModel=whisper.load_model("medium").to(strDevice) # tiny, base, small, medium, large
oCC=OpenCC('s2t') # 's2t' 表示簡體轉繁體
print("使用裝置:"+strDevice)
while booStop==False:
if iCount>0:
strFilename=getStrAduioFilename(iIndexRead)
oResult=oModel.transcribe(strFilename, language="zh", fp16=booCuda) # 假如有cuda支援,用fp16=True會更快
strTraditionalText=oCC.convert(oResult["text"])
print("------")
print("辨識 "+strFilename+" 結果:"+strTraditionalText)
print("------")
iIndexRead=(iIndexRead+1) % iMax
iCount=iCount-1
if strTraditionalText.find("再見")!=-1:
print('Goodbye!')
booStop=True
time.sleep(0.1)
# 創建兩個執行緒
oThreadA=threading.Thread(target=threadA)
oThreadB=threading.Thread(target=threadB)
# 啟動執行緒
oThreadA.start()
oThreadB.start()
# 讓主執行緒保持運行
oThreadA.join()
oThreadB.join()
print('兩個執行緒都停下來了')
運用 nadermx/backgroundremover 來批次去背
from backgroundremover.bg import remove
def remove_bg(src_img_path, out_img_path):
model_choices = ["u2net", "u2net_human_seg", "u2netp"]
f = open(src_img_path, "rb")
data = f.read()
img = remove(data, model_name=model_choices[0],
alpha_matting=False,
alpha_matting_foreground_threshold=240,
alpha_matting_background_threshold=10,
alpha_matting_erode_structure_size=10,
alpha_matting_base_size=1000)
f.close()
f = open(out_img_path, "wb")
f.write(img)
f.close()
srcPath="D:/temp/20250113/images001/"
dstPath="D:/temp/20250113/images003/"
for i in range(524):
number=('0000'+str(i+1))[-4:]
srcFile=srcPath+number+".png"
dstFile=dstPath+number+".png"
remove_bg(srcFile, dstFile)
print(dstFile)
pyenv
[安裝]
curl -fsSL https://pyenv.run | bash
geany ~/.profile (加入下面三行,存檔)
export PYENV_ROOT="$HOME/.pyenv"
[[ -d $PYENV_ROOT/bin ]] && export PATH="$PYENV_ROOT/bin:$PATH"
eval "$(pyenv init - bash)"
geany ~/.bashrc (加入上面三行,存檔)
重開 terminal 即可
pyenv -v (查看目前 pyenv 版本)
pyenv versions (查看目前系統中有哪些版本,以及正使用那一版本)
pyenv install --list (查看有哪些版本可以安裝)
pyenv install 3.8.1 (示範安裝某個版本)
這邊會失敗,缺少元件,請用下面幾行補上
sudo apt install libbz2-dev
sudo apt install libncurses-dev
sudo apt install libffi-dev
sudo apt install libreadline-dev
sudo apt install libssl-dev
sudo apt install libsqlite3-dev
sudo apt install tk-dev
sudo apt install liblzma-dev
pyenv shell 3.12.7 (切換本次 terminal 使用哪個版本)
pyenv global 3.12.7 (變更作業系統預設使用版本)