<em>Mac</em>Book项目 2009年学校开始实施<em>Mac</em>Book项目,所有师生配备一本<em>Mac</em>Book,并同步更新了校园无线网络。学校每周进行电脑技术更新,每月发送技术支持资料,极大改变了教学及学习方式。因此2011
2021-06-01 09:32:01
專案需要,要實現類似小愛同學的語音控制功能,並且要離線,不能花公司一分錢。第一步就是需要把音訊文字化。經過各種資料蒐集後,選擇了vosk。這是vosk的官方介紹:
Vosk is a speech recognition toolkit. The best things in Vosk are:
選擇它的理由,開源、可離線、可使用第三方的訓練模型,本次使用的官方提供的中文訓練模型,如果有需要可自行訓練,不過成本太大。具體見官網:https://alphacephei.com/vosk/,官方demo:https://github.com/alphacep/vosk-api。
本次使用springboot +maven實現,官方demo為springboot+gradle。
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <parent> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-parent</artifactId> <version>2.5.4</version> <relativePath/> <!-- lookup parent from repository --> </parent> <groupId>com.example</groupId> <artifactId>voice</artifactId> <version>0.0.1-SNAPSHOT</version> <name>voice-ai</name> <description>Demo project for Spring Boot</description> <properties> <java.version>1.8</java.version> </properties> <repositories> <repository> <id>com.alphacephei</id> <name>vosk</name> <url>https://alphacephei.com/maven/</url> </repository> </repositories> <dependencies> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-web</artifactId> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-test</artifactId> <scope>test</scope> </dependency> <dependency> <groupId>net.java.dev.jna</groupId> <artifactId>jna</artifactId> <version>5.7.0</version> </dependency> <dependency> <groupId>com.alphacephei</groupId> <artifactId>vosk</artifactId> <version>0.3.30</version> </dependency> <dependency> <groupId>org.projectlombok</groupId> <artifactId>lombok</artifactId> <optional>true</optional> </dependency> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.8</version> </dependency> </dependencies> <build> <plugins> <plugin> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-maven-plugin</artifactId> </plugin> </plugins> </build> </project>
特別說明一下,vosk的包在常見的maven倉庫裡面是沒有的,所以需要指定下載地址。
public class VoiceUtil { @Value("${leenleda.vosk.model}") private String VOSKMODELPATH; public String getWord(String filePath) throws IOException, UnsupportedAudioFileException { Assert.isTrue(StringUtils.hasLength(VOSKMODELPATH), "無效的VOS模組!"); byte[] bytes = Files.readAllBytes(Paths.get(filePath)); // 轉換為16KHZ reSamplingAndSave(bytes, filePath); File f = new File(filePath); RandomAccessFile rdf = null; rdf = new RandomAccessFile(f, "r"); log.info("聲音尺寸:{}", toInt(read(rdf, 4, 4))); log.info("音訊格式:{}", toShort(read(rdf, 20, 2))); short track=toShort(read(rdf, 22, 2)); log.info("1 單聲道 2 雙聲道: {}", track); log.info("取樣率、音訊取樣級別 16000 = 16KHz: {}", toInt(read(rdf, 24, 4))); log.info("每秒波形的資料量:{}", toShort(read(rdf, 22, 2))); log.info("取樣幀的大小:{}", toShort(read(rdf, 32, 2))); log.info("取樣位數:{}", toShort(read(rdf, 34, 2))); rdf.close(); LibVosk.setLogLevel(LogLevel.WARNINGS); try (Model model = new Model(VOSKMODELPATH); InputStream ais = AudioSystem.getAudioInputStream(new BufferedInputStream(new FileInputStream(filePath))); // 取樣率為音訊取樣率的聲道倍數 Recognizer recognizer = new Recognizer(model, 16000*track)) { int nbytes; byte[] b = new byte[4096]; int i = 0; while ((nbytes = ais.read(b)) >= 0) { i += 1; if (recognizer.acceptWaveForm(b, nbytes)) { // System.out.println(recognizer.getResult()); } else { // System.out.println(recognizer.getPartialResult()); } } String result = recognizer.getFinalResult(); log.info("識別結果:{}", result); if (StringUtils.hasLength(result)) { JSONObject jsonObject = JSON.parseObject(result); return jsonObject.getString("text").replace(" ", ""); } return ""; } } public static int toInt(byte[] b) { return (((b[3] & 0xff) << 24) + ((b[2] & 0xff) << 16) + ((b[1] & 0xff) << 8) + ((b[0] & 0xff) << 0)); } public static short toShort(byte[] b) { return (short) ((b[1] << 8) + (b[0] << 0)); } public static byte[] read(RandomAccessFile rdf, int pos, int length) throws IOException { rdf.seek(pos); byte result[] = new byte[length]; for (int i = 0; i < length; i++) { result[i] = rdf.readByte(); } return result; } public static void reSamplingAndSave(byte[] data, String path) throws IOException, UnsupportedAudioFileException { WaveFileReader reader = new WaveFileReader(); AudioInputStream audioIn = reader.getAudioInputStream(new ByteArrayInputStream(data)); AudioFormat srcFormat = audioIn.getFormat(); int targetSampleRate = 16000; AudioFormat dstFormat = new AudioFormat(srcFormat.getEncoding(), targetSampleRate, srcFormat.getSampleSizeInBits(), srcFormat.getChannels(), srcFormat.getFrameSize(), srcFormat.getFrameRate(), srcFormat.isBigEndian()); AudioInputStream convertedIn = AudioSystem.getAudioInputStream(dstFormat, audioIn); File file = new File(path); WaveFileWriter writer = new WaveFileWriter(); writer.write(convertedIn, AudioFileFormat.Type.WAVE, file); } }
有幾點需要說明一下,官方demo裡面對採集率是寫死了的,為16000。這是以16KHz來算的,所以我把所有拿到的音訊都轉成了16KHz。還有采集率的設定,需要設定為聲道數的倍數。
@RestController public class VoiceAiController { @Autowired VoiceUtil voiceUtil; @PostMapping("/getWord") public String getWord(MultipartFile file) { String path = "G:\leenleda\application\voice-ai\" + new Date().getTime() + ".wav"; File localFile = new File(path); try { file.transferTo(localFile); //把上傳的檔案儲存至本地 System.out.println(file.getOriginalFilename() + " 上傳成功"); // 上傳成功,開始解析 String text = voiceUtil.getWord(path); localFile.delete(); return text; } catch (IOException | UnsupportedAudioFileException e) { e.printStackTrace(); localFile.delete(); return "上傳失敗"; } } }
<!DOCTYPE html> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <title>聲音轉換</title> </head> <body> <div> <audio controls autoplay></audio> <input id="start" type="button" value="錄音" /> <input id="stop" type="button" value="停止" /> <input id="play" type="button" value="播放" /> <input id="upload" type="button" value="提交" /> <div id="text"> </div> </div> <script src="http://libs.baidu.com/jquery/2.1.4/jquery.min.js"></script> <script type="text/javascript" src="HZRecorder.js"></script> <script> var recorder; var audio = document.querySelector('audio'); $("#start").click(function () { HZRecorder.get(function (rec) { recorder = rec; recorder.start(); }); }) $("#stop").click(function () { recorder.stop(); }) $("#play").click(function () { recorder.play(audio); }) $("#upload").click(function () { recorder.upload("/admin/getWord", function (state, e) { switch (state) { case 'uploading': //var percentComplete = Math.round(e.loaded * 100 / e.total) + '%'; break; case 'ok': //alert(e.target.responseText); // alert("上傳成功"); break; case 'error': alert("上傳失敗"); break; case 'cancel': alert("上傳被取消"); break; } }); }) </script> </body> </html>
(function (window) { //相容 window.URL = window.URL || window.webkitURL; navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia || navigator.mozGetUserMedia || navigator.msGetUserMedia; var HZRecorder = function (stream, config) { config = config || {}; config.sampleBits = 16; //取樣數位 8, 16 config.sampleRate = 16000; //取樣率(1/6 44100) var context = new AudioContext(); var audioInput = context.createMediaStreamSource(stream); var recorder = context.createScriptProcessor(4096, 1, 1); var audioData = { size: 0 //錄音檔案長度 , buffer: [] //錄音快取 , inputSampleRate: context.sampleRate //輸入取樣率 , inputSampleBits: 16 //輸入取樣數位 8, 16 , outputSampleRate: config.sampleRate //輸出取樣率 , oututSampleBits: config.sampleBits //輸出取樣數位 8, 16 , input: function (data) { this.buffer.push(new Float32Array(data)); this.size += data.length; } , compress: function () { //合併壓縮 //合併 var data = new Float32Array(this.size); var offset = 0; for (var i = 0; i < this.buffer.length; i++) { data.set(this.buffer[i], offset); offset += this.buffer[i].length; } //壓縮 var compression = parseInt(this.inputSampleRate / this.outputSampleRate); var length = data.length / compression; var result = new Float32Array(length); var index = 0, j = 0; while (index < length) { result[index] = data[j]; j += compression; index++; } return result; } , encodeWAV: function () { var sampleRate = Math.min(this.inputSampleRate, this.outputSampleRate); var sampleBits = Math.min(this.inputSampleBits, this.oututSampleBits); var bytes = this.compress(); var dataLength = bytes.length * (sampleBits / 8); var buffer = new ArrayBuffer(44 + dataLength); var data = new DataView(buffer); var channelCount = 1;//單聲道 var offset = 0; var writeString = function (str) { for (var i = 0; i < str.length; i++) { data.setUint8(offset + i, str.charCodeAt(i)); } } // 資源交換檔案識別符號 writeString('RIFF'); offset += 4; // 下個地址開始到檔案尾總位元組數,即檔案大小-8 data.setUint32(offset, 36 + dataLength, true); offset += 4; // WAV檔案標誌 writeString('WAVE'); offset += 4; // 波形格式標誌 writeString('fmt '); offset += 4; // 過濾位元組,一般為 0x10 = 16 data.setUint32(offset, 16, true); offset += 4; // 格式類別 (PCM形式取樣資料) data.setUint16(offset, 1, true); offset += 2; // 通道數 data.setUint16(offset, channelCount, true); offset += 2; // 取樣率,每秒樣本數,表示每個通道的播放速度 data.setUint32(offset, sampleRate, true); offset += 4; // 波形資料傳輸率 (每秒平均位元組數) 單聲道×每秒資料位數×每樣本資料位/8 data.setUint32(offset, channelCount * sampleRate * (sampleBits / 8), true); offset += 4; // 快資料調整數 取樣一次佔用位元組數 單聲道×每樣本的資料位數/8 data.setUint16(offset, channelCount * (sampleBits / 8), true); offset += 2; // 每樣本資料位數 data.setUint16(offset, sampleBits, true); offset += 2; // 資料識別符號 writeString('data'); offset += 4; // 取樣資料總數,即資料總大小-44 data.setUint32(offset, dataLength, true); offset += 4; // 寫入取樣資料 if (sampleBits === 8) { for (var i = 0; i < bytes.length; i++, offset++) { var s = Math.max(-1, Math.min(1, bytes[i])); var val = s < 0 ? s * 0x8000 : s * 0x7FFF; val = parseInt(255 / (65535 / (val + 32768))); data.setInt8(offset, val, true); } } else { for (var i = 0; i < bytes.length; i++, offset += 2) { var s = Math.max(-1, Math.min(1, bytes[i])); data.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true); } } return new Blob([data], { type: 'audio/wav' }); } }; //開始錄音 this.start = function () { audioInput.connect(recorder); recorder.connect(context.destination); } //停止 this.stop = function () { recorder.disconnect(); } //獲取音訊檔 this.getBlob = function () { this.stop(); return audioData.encodeWAV(); } //回放 this.play = function (audio) { audio.src = window.URL.createObjectURL(this.getBlob()); } //上傳 this.upload = function (url, callback) { var fd = new FormData(); fd.append("file", this.getBlob()); var xhr = new XMLHttpRequest(); if (callback) { xhr.upload.addEventListener("progress", function (e) { callback('uploading', e); }, false); xhr.addEventListener("load", function (e) { callback('ok', e); }, false); xhr.addEventListener("error", function (e) { callback('error', e); }, false); xhr.addEventListener("abort", function (e) { callback('cancel', e); }, false); } xhr.open("POST", url); xhr.send(fd); xhr.onreadystatechange = function () { console.log("語音識別結果:"+xhr.responseText) $("#text").append('<h2>'+xhr.responseText+'</h2>'); } } //音訊採集 recorder.onaudioprocess = function (e) { audioData.input(e.inputBuffer.getChannelData(0)); //record(e.inputBuffer.getChannelData(0)); } }; //丟擲異常 HZRecorder.throwError = function (message) { alert(message); throw new function () { this.toString = function () { return message; } } } //是否支援錄音 HZRecorder.canRecording = (navigator.getUserMedia != null); //獲取錄音機 HZRecorder.get = function (callback, config) { if (callback) { if (navigator.getUserMedia) { navigator.getUserMedia( { audio: true } //只啟用音訊 , function (stream) { var rec = new HZRecorder(stream, config); callback(rec); } , function (error) { switch (error.code || error.name) { case 'PERMISSION_DENIED': case 'PermissionDeniedError': HZRecorder.throwError('使用者拒絕提供資訊。'); break; case 'NOT_SUPPORTED_ERROR': case 'NotSupportedError': HZRecorder.throwError('瀏覽器不支援硬體裝置。'); break; case 'MANDATORY_UNSATISFIED_ERROR': case 'MandatoryUnsatisfiedError': HZRecorder.throwError('無法發現指定的硬體裝置。'); break; default: HZRecorder.throwError('無法開啟麥克風。異常資訊:' + (error.code || error.name)); break; } }); } else { HZRecorder.throwErr('當前瀏覽器不支援錄音功能。'); return; } } } window.HZRecorder = HZRecorder; })(window);
到此這篇關於Java 離線中文語音文字識別 的文章就介紹到這了,更多相關java 離線語音文字識別 內容請搜尋it145.com以前的文章或繼續瀏覽下面的相關文章希望大家以後多多支援it145.com!
相關文章
<em>Mac</em>Book项目 2009年学校开始实施<em>Mac</em>Book项目,所有师生配备一本<em>Mac</em>Book,并同步更新了校园无线网络。学校每周进行电脑技术更新,每月发送技术支持资料,极大改变了教学及学习方式。因此2011
2021-06-01 09:32:01
综合看Anker超能充系列的性价比很高,并且与不仅和iPhone12/苹果<em>Mac</em>Book很配,而且适合多设备充电需求的日常使用或差旅场景,不管是安卓还是Switch同样也能用得上它,希望这次分享能给准备购入充电器的小伙伴们有所
2021-06-01 09:31:42
除了L4WUDU与吴亦凡已经多次共事,成为了明面上的厂牌成员,吴亦凡还曾带领20XXCLUB全队参加2020年的一场音乐节,这也是20XXCLUB首次全员合照,王嗣尧Turbo、陈彦希Regi、<em>Mac</em> Ova Seas、林渝植等人全部出场。然而让
2021-06-01 09:31:34
目前应用IPFS的机构:1 谷歌<em>浏览器</em>支持IPFS分布式协议 2 万维网 (历史档案博物馆)数据库 3 火狐<em>浏览器</em>支持 IPFS分布式协议 4 EOS 等数字货币数据存储 5 美国国会图书馆,历史资料永久保存在 IPFS 6 加
2021-06-01 09:31:24
开拓者的车机是兼容苹果和<em>安卓</em>,虽然我不怎么用,但确实兼顾了我家人的很多需求:副驾的门板还配有解锁开关,有的时候老婆开车,下车的时候偶尔会忘记解锁,我在副驾驶可以自己开门:第二排设计很好,不仅配置了一个很大的
2021-06-01 09:30:48
不仅是<em>安卓</em>手机,苹果手机的降价力度也是前所未有了,iPhone12也“跳水价”了,发布价是6799元,如今已经跌至5308元,降价幅度超过1400元,最新定价确认了。iPhone12是苹果首款5G手机,同时也是全球首款5nm芯片的智能机,它
2021-06-01 09:30:45