備忘錄_20160105(定位)
修改
回首頁
程式 2022-05-09 16:55:26 1652086527 100
嘗試語音辨識 try speech recognition-step05
嘗試語音辨識 try speech recognition-step05
step04 是每次錄製 0.1 秒的聲音資料去分析,將有用的區塊連接起來。
然而用手機去處理時,會在 stop 到 start 之間聲音斷掉,這樣在辨識上的效果應該會很差。
所以新策略是每次錄製 1 秒,減少聲音的斷點,並且自動切割連續聲音的部分。前後秒的聲音若是應該連續,則會連成一塊。
暫時先沒做 Fast Fourier Transform
偵測聲音,切割,繪製振幅圖,且可聽聲音。
產生聲音的參考文章如下
Generate Sounds Programmatically With Javascript
Advanced techniques: Creating and sequencing audio
●20220509.php
<!doctype html>
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<title></title>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<style>
*
{
font-family: "WenQuanYi Zen Hei","文泉驛正黑","Heiti TC","黑體-繁","LiHei Pro","儷黑 Pro","PingFang TC","Droid Sans","Roboto","Microsoft JhengHei","微軟正黑體",sans-serif;
/* 【linux的字型】【ios字型】【android字型】【微軟正黑體】【無襯線字=黑體】 */
}
html, body
{
margin: 0;
border: 0;
padding: 0;
width: 100%;
height: 100%;
}
a:link, a:visited
{
color: blue;
text-decoration: none;
}
#divWrapper
{
padding: 1em;
}
</style>
</head>
<body>
<div id="divWrapper">
<div></div>
<div>
<button type="button" onclick="startRecording();">開始錄音</button>
<span id="spanRecordingStatus"></span>
<button type="button" onclick="stopRecording();">停止錄音</button>
</div>
<div id="divOutput"></div>
</div>
<script>
var booDebug=false;
if(!String.prototype.ltrim) { String.prototype.ltrim = function() { return this.replace(/^\s+/,''); }; }
if(!String.prototype.rtrim) { String.prototype.rtrim = function() { return this.replace(/\s+$/,''); }; }
if(!String.prototype.trim ) { String.prototype.trim = function() { return this.replace(/^\s+|\s+$/g,''); }; }
function gebi(strId)
{
return document.getElementById(strId);
}
function outputLine(strHtml)
{
output(strHtml+"<br>");
}
function output(strHtml)
{
var oDiv=document.createElement("div");
oDiv.innerHTML=strHtml;
gebi("divOutput").appendChild(oDiv);
}
function getFAvg(faTempData)
{
var fAvg=0;
for(var iIdxAvg=0; iIdxAvg<faTempData.length; iIdxAvg++)
{
fAvg+=Math.abs(faTempData[iIdxAvg]);
}
fAvg/=faTempData.length;
return fAvg;
}
window.addEventListener(
"load",
function()
{
initializeMediaRecorder();
iTimeoutId4DecodeAndSplit=window.setTimeout(decodeAndSplit, 400);
iTimeoutId4Drawing==window.setTimeout(checkForDrawing, 400);
}
);
</script>
<script>
// for media recorder and chunks
var oMediaRecorder=null;
var oChunk={};
var booRecording=false;
var iIntervalId4StatusRefreshing=null;
var iTimeoutId4Cutting=null;
var iMilliSeconds4Cutting=1000; // 1000 ms = 1 sec
function initializeMediaRecorder()
{
if(navigator.mediaDevices && navigator.mediaDevices.getUserMedia)
{
navigator.mediaDevices.getUserMedia({audio: true})
.then
(
function(oStream)
{
oMediaRecorder=new MediaRecorder(oStream);
oMediaRecorder.ondataavailable=function(oEvent)
{
if(booDebug==true) { outputLine("ondataavailable"); }
oChunk.oaBuffer.push(oEvent.data);
}
oMediaRecorder.onstop=function(oEvent)
{
if(booDebug==true) { outputLine("onstop"); }
oChunk.iIndex++;
var oTmp={};
oTmp.iIndex=oChunk.iIndex;
oTmp.oaData=oChunk.oaBuffer;
oChunk.oaChunk.push(oTmp);
oChunk.oaBuffer=[];
}
}
)
.catch
(
function(oErr)
{
outputLine("取得媒體資源時遇到意外!"+oErr);
}
);
}
else
{
outputLine("您的瀏覽器不支援 getUserMedia!");
}
}
function resetChunk()
{
oChunk.oaBuffer=[];
oChunk.iIndex=-1;
oChunk.oaChunk=[]; // {iIndex, oaData}
}
resetChunk();
function refreshStatus()
{
var iR=Math.floor(Math.random()*256);
var iG=Math.floor(Math.random()*256);
var iB=Math.floor(Math.random()*256);
gebi("spanRecordingStatus").style.color="rgb("+iR+","+iG+","+iB+")";
gebi("spanRecordingStatus").innerHTML="錄音中("+oChunk.oaBuffer.length+")";
}
function startRecording()
{
if(booRecording==true) { return; }
gebi("divOutput").innerHTML="";
gebi("spanRecordingStatus").innerHTML="";
resetChunk();
oMediaRecorder.start();
booRecording=true;
iIntervalId4StatusRefreshing=window.setInterval(refreshStatus, 400);
iTimeoutId4Cutting=window.setTimeout(requestBlob, iMilliSeconds4Cutting);
resetBlock();
}
function stopRecording()
{
if(booRecording==false) { return; }
gebi("spanRecordingStatus").innerHTML="";
booRecording=false;
oMediaRecorder.stop();
window.clearTimeout(iTimeoutId4Cutting);
window.clearInterval(iIntervalId4StatusRefreshing);
}
function requestBlob()
{
oMediaRecorder.stop();
oMediaRecorder.start();
iTimeoutId4Cutting=window.setTimeout(requestBlob, iMilliSeconds4Cutting);
}
</script>
<script>
var iTimeoutId4DecodeAndSplit=null;
var iSampleRate=8000; // 8000 Hz
var fThreshold1=0.1; // 振幅是 -1 ~ +1,0.1大約是5%
var fThreshold2=0.05;
var fThreshold3=0.01;
var oBlock={};
var fSecondsOfCheck=0.05; // 0.05 秒為一個檢查區間
function resetBlock()
{
oBlock.iIndex=-1;
oBlock.oaBuffer=[];
oBlock.oaBlock=[];
}
resetBlock();
// decode audio data and split into blocks
function decodeAndSplit()
{
var oCurrentChunk=null;
while(true)
{
if(oChunk.oaChunk.length<1) { break; }
oCurrentChunk=oChunk.oaChunk.shift();
(new Blob(oCurrentChunk.oaData)).arrayBuffer()
.then
(
oBuffer=>
{
if(booDebug==true) { outputLine("chunks轉到緩衝區成功!oBuffer.byteLength="+oBuffer.byteLength); }
var oAC=new (window.AudioContext || window.webkitAudioContext)({sampleRate: iSampleRate});
oAC.decodeAudioData(oBuffer)
.then
(
function(oDecodedData)
{
//for(var iChannel=0; iChannel<oDecodedData.numberOfChannels; iChannel++)
for(var iChannel=0; iChannel<1; iChannel++)
{
var fa32Data=new Float32Array(oDecodedData.getChannelData(iChannel));
var fMinVal=Math.min(...fa32Data);
var fMaxVal=Math.max(...fa32Data);
if(booDebug==true) { outputLine("緩衝區解析成功,陣列長度="+fa32Data.length+",min="+fMinVal+",max="+fMaxVal); }
dataComing(Array.from(fa32Data), fMinVal, fMaxVal, oCurrentChunk.iIndex);
}
}
)
.catch
(
function(oErr)
{
outputLine("解碼發生意外!"+oErr);
}
);
}
)
.catch
(
oErr=>{ outputLine("轉成陣列發生錯誤!"+oErr); }
);
break;
}
iTimeoutId4DecodeAndSplit=window.setTimeout(decodeAndSplit, 400);
}
function flushBlockBuffer()
{
if(oBlock.oaBuffer.length>0)
{
if(booDebug==true) { outputLine("block_flush"); }
oBlock.oaBlock.push(oBlock.oaBuffer[0]);
oBlock.oaBuffer=[];
}
}
function getOBlock(faData,iB,iE,iSteps,iIndex)
{
// faData,booBegin,booEnd,iIndex
var fa1=faData.slice(iB,iE+iSteps);
var booBegin=false;
var booEnd=false;
if(iB==0) { booBegin=true; }
if((iE+iSteps)>=faData.length) { booEnd=true; }
return {faData:fa1, booBegin:booBegin, booEnd:booEnd, iIndex:iIndex};
}
function dataComing(faData, fMinVal, fMaxVal, iIndex)
{
if(((fMaxVal-fMinVal)/2)>fThreshold1)
{
var iSteps=Math.floor(iSampleRate*fSecondsOfCheck);
if(iSteps<1) { iSteps=1; } // 避免掉入無窮迴圈
var iB=-1, iE=-1;
for(var i=0; i<faData.length; i+=iSteps)
{
var faDataSliced=faData.slice(i, i+iSteps);
fMinVal=Math.min(...faDataSliced);
fMaxVal=Math.max(...faDataSliced);
if((fMaxVal-fMinVal)>fThreshold2
&& Math.abs(fMaxVal)>fThreshold3
&& Math.abs(fMinVal)>fThreshold3)
{
if(iB==-1) { iB=i; }
iE=i;
}
else
{
if(iB!=-1)
{
blockComing(getOBlock(faData,iB,iE,iSteps,iIndex));
iB=-1;
iE=-1;
}
}
}
if(iB!=-1)
{
blockComing(getOBlock(faData,iB,iE,iSteps,iIndex));
iB=-1;
iE=-1;
}
}
else
{
// 一整塊資料沒有聲音,之前的緩衝資料(若有)直接放入正式資料中
flushBlockBuffer();
}
}
function blockComing(oCurrentBlock)
{
if(oCurrentBlock.booBegin==true && oBlock.oaBuffer.length>0)
{
// 可以串接前面的資料
if(booDebug==true) { outputLine("block進行串接"); }
var oTmpBlock=oBlock.oaBuffer[0];
oCurrentBlock.faData=oTmpBlock.faData.concat(oCurrentBlock.faData);
oBlock.oaBlock.push(oCurrentBlock);
// faData,booBegin,booEnd,iIndex
oBlock.oaBuffer=[];
}
else if(oCurrentBlock.booEnd==true)
{
if(booDebug==true) { outputLine("block放入緩衝區"); }
flushBlockBuffer();
oBlock.oaBuffer.push(oCurrentBlock);
}
else
{
if(booDebug==true) { outputLine("block放入正式資料"); }
flushBlockBuffer();
oBlock.oaBlock.push(oCurrentBlock);
}
}
</script>
<script>
// draw each block
var iTimeoutId4Drawing=null;
function checkForDrawing()
{
while(true)
{
if(oBlock.oaBlock.length<1) { break; }
if(oBlock.iIndex>=(oBlock.oaBlock.length-1)) { break; }
oBlock.iIndex++;
var oCurrentBlock=oBlock.oaBlock[oBlock.iIndex];
var straHtml=[];
straHtml.push("<br><br>");
straHtml.push("<button type='button' onclick='listenToThisBlock("+oBlock.iIndex+");'>");
straHtml.push("聽聽看");
straHtml.push("</button><br>");
output(straHtml.join(""));
drawWave(oCurrentBlock.faData, "white", "black");
break;
}
iTimeoutId4Drawing==window.setTimeout(checkForDrawing, 400);
}
function listenToThisBlock(iIndex4Block)
{
var oAudioCtx = new (window.AudioContext || window.webkitAudioContext);
var iBufferSize=oBlock.oaBlock[iIndex4Block].faData.length;
var oBuffer=oAudioCtx.createBuffer(1, iBufferSize, iSampleRate);
var oData=oBuffer.getChannelData(0);
for(var i=0; i<oBlock.oaBlock[iIndex4Block].faData.length; i++)
{
oData[i]=oBlock.oaBlock[iIndex4Block].faData[i];
}
var oSound=oAudioCtx.createBufferSource();
oSound.buffer=oBuffer;
oSound.connect(oAudioCtx.destination);
oSound.start();
}
function drawWave(faData, strFGColor, strBGColor)
{
var fMinVal=Math.min(...faData);
var fMaxVal=Math.max(...faData);
var oCanvas=document.createElement("canvas");
oCanvas.setAttribute("width", 200*(faData.length/iSampleRate)*3);
oCanvas.setAttribute("height", 200);
var oCtx=oCanvas.getContext("2d");
var iCanvasWidth=oCanvas.width;
var iCanvasHeight=oCanvas.height;
oCtx.beginPath();
oCtx.rect(0,0,iCanvasWidth,iCanvasHeight);
oCtx.fillStyle=strBGColor;
oCtx.fill();
oCtx.strokeStyle=strFGColor;
oCtx.beginPath();
var fYBottom=-1;
var fYTop=1;
var iIdxData=0;
var x=iIdxData/faData.length*iCanvasWidth;
var y=(1-(faData[iIdxData]-fYBottom)/(fYTop-fYBottom))*iCanvasHeight;
oCtx.moveTo(x,y);
for(var iIdxData=1; iIdxData<faData.length; iIdxData++)
{
x=iIdxData/faData.length*iCanvasWidth;
y=(1-(faData[iIdxData]-fYBottom)/(fYTop-fYBottom))*iCanvasHeight;
oCtx.lineTo(x,y);
}
oCtx.stroke();
var oDiv=document.createElement("div");
oDiv.innerHTML
="y-amplitude(-1~1), x-time(seconds), avg="+getFAvg(faData)
+"<br>"+"min="+Math.min(...faData)+", max="+Math.max(...faData);
gebi("divOutput").appendChild(oDiv);
gebi("divOutput").appendChild(oCanvas);
gebi("divOutput").appendChild(document.createElement("br"));
}
</script>
</body>
</html>