備忘錄_20160105(定位) 修改 回首頁

程式 2022-05-09 16:55:26 1652086527 100
嘗試語音辨識 try speech recognition-step05

嘗試語音辨識 try speech recognition-step05

step04 是每次錄製 0.1 秒的聲音資料去分析,將有用的區塊連接起來。
然而用手機去處理時,會在 stop 到 start 之間聲音斷掉,這樣在辨識上的效果應該會很差。
所以新策略是每次錄製 1 秒,減少聲音的斷點,並且自動切割連續聲音的部分。前後秒的聲音若是應該連續,則會連成一塊。
暫時先沒做 Fast Fourier Transform

偵測聲音,切割,繪製振幅圖,且可聽聲音。
產生聲音的參考文章如下
Generate Sounds Programmatically With Javascript
Advanced techniques: Creating and sequencing audio


●20220509.php

<!doctype html>
<html>
  <head>
    <meta http-equiv="content-type" content="text/html; charset=utf-8" />
    <title></title>
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <style>
      
      *
      {
        font-family: "WenQuanYi Zen Hei","文泉驛正黑","Heiti TC","黑體-繁","LiHei Pro","儷黑 Pro","PingFang TC","Droid Sans","Roboto","Microsoft JhengHei","微軟正黑體",sans-serif;
        /* 【linux的字型】【ios字型】【android字型】【微軟正黑體】【無襯線字=黑體】 */
      }

      html, body 
      {
        margin: 0;
        border: 0;
        padding: 0;
        width: 100%;
        height: 100%;
      }

      a:link, a:visited
      {
        color: blue;
        text-decoration: none;
      }
      
      #divWrapper
      {
        padding: 1em;
      }
      
    </style>
  </head>
  <body>
    
    <div id="divWrapper">
      <div></div>
      <div>
        <button type="button" onclick="startRecording();">開始錄音</button>
        <span id="spanRecordingStatus"></span>
        <button type="button" onclick="stopRecording();">停止錄音</button>
      </div>
      <div id="divOutput"></div>
    </div>
    
    <script>
      
      var booDebug=false;
      
      if(!String.prototype.ltrim) { String.prototype.ltrim = function() { return this.replace(/^\s+/,'');    }; }
      if(!String.prototype.rtrim) { String.prototype.rtrim = function() { return this.replace(/\s+$/,'');    }; }
      if(!String.prototype.trim ) { String.prototype.trim = function()  { return this.replace(/^\s+|\s+$/g,''); }; }
      
      function gebi(strId)
      {
        return document.getElementById(strId);
      }
      
      function outputLine(strHtml)
      {
        output(strHtml+"<br>");
      }
      
      function output(strHtml)
      {
        var oDiv=document.createElement("div");
        oDiv.innerHTML=strHtml;
        gebi("divOutput").appendChild(oDiv);
      }
      
      function getFAvg(faTempData)
      {
        var fAvg=0;
        for(var iIdxAvg=0; iIdxAvg<faTempData.length; iIdxAvg++)
        {
          fAvg+=Math.abs(faTempData[iIdxAvg]);
        }
        fAvg/=faTempData.length;
        
        return fAvg;
      }
      
      window.addEventListener(
        "load",
        function() 
        {
          initializeMediaRecorder();
          iTimeoutId4DecodeAndSplit=window.setTimeout(decodeAndSplit, 400);
          iTimeoutId4Drawing==window.setTimeout(checkForDrawing, 400);
        }
      );
      
    </script>
    
    <script>
      
      // for media recorder and chunks

      var oMediaRecorder=null;
      var oChunk={};
      var booRecording=false;
      var iIntervalId4StatusRefreshing=null;
      var iTimeoutId4Cutting=null;
      var iMilliSeconds4Cutting=1000; // 1000 ms = 1 sec
      
      function initializeMediaRecorder()
      {
        if(navigator.mediaDevices && navigator.mediaDevices.getUserMedia)
        {
          navigator.mediaDevices.getUserMedia({audio: true})
            .then
            (
              function(oStream)
              {
                oMediaRecorder=new MediaRecorder(oStream);
                
                oMediaRecorder.ondataavailable=function(oEvent)
                {
                  if(booDebug==true) { outputLine("ondataavailable"); }
                  oChunk.oaBuffer.push(oEvent.data);
                }
                
                oMediaRecorder.onstop=function(oEvent)
                {
                  if(booDebug==true) { outputLine("onstop"); }
                  
                  oChunk.iIndex++;
                  
                  var oTmp={};
                  oTmp.iIndex=oChunk.iIndex;
                  oTmp.oaData=oChunk.oaBuffer;
                  oChunk.oaChunk.push(oTmp);
                  
                  oChunk.oaBuffer=[];
                }
                
              }
            )
            .catch
            (
              function(oErr)
              {
                outputLine("取得媒體資源時遇到意外!"+oErr);
              }
            );
        }
        else
        {
          outputLine("您的瀏覽器不支援 getUserMedia!");
        }
      }
      
      function resetChunk()
      {
        oChunk.oaBuffer=[];
        
        oChunk.iIndex=-1;
        oChunk.oaChunk=[]; // {iIndex, oaData}
      }
      resetChunk();
      
      function refreshStatus()
      {
        var iR=Math.floor(Math.random()*256);
        var iG=Math.floor(Math.random()*256);
        var iB=Math.floor(Math.random()*256);
        gebi("spanRecordingStatus").style.color="rgb("+iR+","+iG+","+iB+")";
        gebi("spanRecordingStatus").innerHTML="錄音中("+oChunk.oaBuffer.length+")";
      }
      
      function startRecording()
      {
        if(booRecording==true) { return; }
        
        gebi("divOutput").innerHTML="";
        gebi("spanRecordingStatus").innerHTML="";
        resetChunk();
        oMediaRecorder.start();
        booRecording=true;
        iIntervalId4StatusRefreshing=window.setInterval(refreshStatus, 400);
        iTimeoutId4Cutting=window.setTimeout(requestBlob, iMilliSeconds4Cutting);
        
        resetBlock();
      }
      
      function stopRecording()
      {
        if(booRecording==false) { return; }
        
        gebi("spanRecordingStatus").innerHTML="";
        booRecording=false;
        oMediaRecorder.stop();
        window.clearTimeout(iTimeoutId4Cutting);
        window.clearInterval(iIntervalId4StatusRefreshing);
      }
      
      function requestBlob()
      {
        oMediaRecorder.stop();
        oMediaRecorder.start();
        iTimeoutId4Cutting=window.setTimeout(requestBlob, iMilliSeconds4Cutting);
      }
      
    </script>
    
    <script>
      
      var iTimeoutId4DecodeAndSplit=null;
      var iSampleRate=8000; // 8000 Hz
      var fThreshold1=0.1; // 振幅是 -1 ~ +1,0.1大約是5%
      var fThreshold2=0.05;
      var fThreshold3=0.01;
      var oBlock={};
      var fSecondsOfCheck=0.05; // 0.05 秒為一個檢查區間
      
      function resetBlock()
      {
        oBlock.iIndex=-1;
        oBlock.oaBuffer=[];
        oBlock.oaBlock=[];
      }
      resetBlock();
      
      // decode audio data and split into blocks
      function decodeAndSplit()
      {
        var oCurrentChunk=null;
        
        while(true)
        {
          if(oChunk.oaChunk.length<1) { break; }
          
          oCurrentChunk=oChunk.oaChunk.shift();
          (new Blob(oCurrentChunk.oaData)).arrayBuffer()
            .then
            (
              oBuffer=>
              {
                if(booDebug==true) { outputLine("chunks轉到緩衝區成功!oBuffer.byteLength="+oBuffer.byteLength); }
                var oAC=new (window.AudioContext || window.webkitAudioContext)({sampleRate: iSampleRate});
                oAC.decodeAudioData(oBuffer)
                  .then
                  (
                    function(oDecodedData)
                    {
                      //for(var iChannel=0; iChannel<oDecodedData.numberOfChannels; iChannel++)
                      for(var iChannel=0; iChannel<1; iChannel++)
                      {
                        var fa32Data=new Float32Array(oDecodedData.getChannelData(iChannel));
                        var fMinVal=Math.min(...fa32Data);
                        var fMaxVal=Math.max(...fa32Data);
                        if(booDebug==true) { outputLine("緩衝區解析成功,陣列長度="+fa32Data.length+",min="+fMinVal+",max="+fMaxVal); }
                        
                        dataComing(Array.from(fa32Data), fMinVal, fMaxVal, oCurrentChunk.iIndex);
                      }
                    }
                  )
                  .catch
                  (
                    function(oErr)
                    {
                      outputLine("解碼發生意外!"+oErr);
                    }
                  );
              }
            )
            .catch
            (
              oErr=>{ outputLine("轉成陣列發生錯誤!"+oErr); }
            );
          
          break;
        }
        
        iTimeoutId4DecodeAndSplit=window.setTimeout(decodeAndSplit, 400);
      }
      
      function flushBlockBuffer()
      {
        if(oBlock.oaBuffer.length>0)
        {
          if(booDebug==true) { outputLine("block_flush"); }
          oBlock.oaBlock.push(oBlock.oaBuffer[0]);
          oBlock.oaBuffer=[];
        }
      }
      
      function getOBlock(faData,iB,iE,iSteps,iIndex)
      {
        // faData,booBegin,booEnd,iIndex
        var fa1=faData.slice(iB,iE+iSteps);
        var booBegin=false;
        var booEnd=false;
        if(iB==0) { booBegin=true; }
        if((iE+iSteps)>=faData.length) { booEnd=true; }
        return {faData:fa1, booBegin:booBegin, booEnd:booEnd, iIndex:iIndex};
      }
      
      function dataComing(faData, fMinVal, fMaxVal, iIndex)
      {
        if(((fMaxVal-fMinVal)/2)>fThreshold1)
        {
          var iSteps=Math.floor(iSampleRate*fSecondsOfCheck);
          if(iSteps<1) { iSteps=1; } // 避免掉入無窮迴圈
          var iB=-1, iE=-1;
          
          for(var i=0; i<faData.length; i+=iSteps)
          {
            var faDataSliced=faData.slice(i, i+iSteps);
            fMinVal=Math.min(...faDataSliced);
            fMaxVal=Math.max(...faDataSliced);
            if((fMaxVal-fMinVal)>fThreshold2 
            && Math.abs(fMaxVal)>fThreshold3 
            && Math.abs(fMinVal)>fThreshold3)
            {
              if(iB==-1) { iB=i; }
              iE=i;
            }
            else
            {
              if(iB!=-1)
              {
                blockComing(getOBlock(faData,iB,iE,iSteps,iIndex));
                
                iB=-1;
                iE=-1;
              }
            }
          }
          if(iB!=-1)
          {
            blockComing(getOBlock(faData,iB,iE,iSteps,iIndex));
            
            iB=-1;
            iE=-1;
          }
          
        }
        else
        {
          // 一整塊資料沒有聲音,之前的緩衝資料(若有)直接放入正式資料中
          flushBlockBuffer();
        }
      }
      
      function blockComing(oCurrentBlock)
      {
        if(oCurrentBlock.booBegin==true && oBlock.oaBuffer.length>0)
        {
          // 可以串接前面的資料
          if(booDebug==true) { outputLine("block進行串接"); }
          var oTmpBlock=oBlock.oaBuffer[0];
          
          oCurrentBlock.faData=oTmpBlock.faData.concat(oCurrentBlock.faData);
          oBlock.oaBlock.push(oCurrentBlock);
          // faData,booBegin,booEnd,iIndex
          
          oBlock.oaBuffer=[];
        }
        else if(oCurrentBlock.booEnd==true)
        {
          if(booDebug==true) { outputLine("block放入緩衝區"); }
          flushBlockBuffer();
          oBlock.oaBuffer.push(oCurrentBlock);
        }
        else
        {
          if(booDebug==true) { outputLine("block放入正式資料"); }
          flushBlockBuffer();
          oBlock.oaBlock.push(oCurrentBlock);
        }
      }
      
    </script>
    
    <script>
      
      // draw each block
      var iTimeoutId4Drawing=null;
      
      function checkForDrawing()
      {
        while(true)
        {
          if(oBlock.oaBlock.length<1) { break; }
          if(oBlock.iIndex>=(oBlock.oaBlock.length-1)) { break; }
          
          oBlock.iIndex++;
          
          var oCurrentBlock=oBlock.oaBlock[oBlock.iIndex];
          
          var straHtml=[];
          straHtml.push("<br><br>");
          straHtml.push("<button type='button' onclick='listenToThisBlock("+oBlock.iIndex+");'>");
          straHtml.push("聽聽看");
          straHtml.push("</button><br>");
          output(straHtml.join(""));
          
          drawWave(oCurrentBlock.faData, "white", "black");
          
          break;
        }
        
        iTimeoutId4Drawing==window.setTimeout(checkForDrawing, 400);
      }
      
      function listenToThisBlock(iIndex4Block)
      {
        var oAudioCtx = new (window.AudioContext || window.webkitAudioContext);
        
        var iBufferSize=oBlock.oaBlock[iIndex4Block].faData.length;
        var oBuffer=oAudioCtx.createBuffer(1, iBufferSize, iSampleRate);
        var oData=oBuffer.getChannelData(0);
        for(var i=0; i<oBlock.oaBlock[iIndex4Block].faData.length; i++)
        {
          oData[i]=oBlock.oaBlock[iIndex4Block].faData[i];
        }
        var oSound=oAudioCtx.createBufferSource();
        oSound.buffer=oBuffer;
        oSound.connect(oAudioCtx.destination);
        oSound.start();
      }
      
      function drawWave(faData, strFGColor, strBGColor)
      {
        var fMinVal=Math.min(...faData);
        var fMaxVal=Math.max(...faData);
        
        var oCanvas=document.createElement("canvas");
        oCanvas.setAttribute("width", 200*(faData.length/iSampleRate)*3);
        oCanvas.setAttribute("height", 200);
        
        var oCtx=oCanvas.getContext("2d");
        var iCanvasWidth=oCanvas.width;
        var iCanvasHeight=oCanvas.height;
        oCtx.beginPath();
        oCtx.rect(0,0,iCanvasWidth,iCanvasHeight);
        oCtx.fillStyle=strBGColor;
        oCtx.fill();
        
        oCtx.strokeStyle=strFGColor;
        oCtx.beginPath();
        
        var fYBottom=-1;
        var fYTop=1;
        
        var iIdxData=0;
        var x=iIdxData/faData.length*iCanvasWidth;
        var y=(1-(faData[iIdxData]-fYBottom)/(fYTop-fYBottom))*iCanvasHeight;
        oCtx.moveTo(x,y);

        for(var iIdxData=1; iIdxData<faData.length; iIdxData++)
        {
          x=iIdxData/faData.length*iCanvasWidth;
          y=(1-(faData[iIdxData]-fYBottom)/(fYTop-fYBottom))*iCanvasHeight;
          oCtx.lineTo(x,y);
        }
        oCtx.stroke();

        var oDiv=document.createElement("div");
        oDiv.innerHTML
          ="y-amplitude(-1~1), x-time(seconds), avg="+getFAvg(faData)
          +"<br>"+"min="+Math.min(...faData)+", max="+Math.max(...faData);
        gebi("divOutput").appendChild(oDiv);
        gebi("divOutput").appendChild(oCanvas);
        gebi("divOutput").appendChild(document.createElement("br"));
      }
      
    </script>
    
  </body>
</html>