Commit d7cad875 authored by Sugon_ldc's avatar Sugon_ldc
Browse files

add new files

parents
Pipeline #1560 failed with stages
in 0 seconds
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild
.cxx
local.properties
# Android部署项目
简体中文 | [English](./README_en.md)
## 转换模型
1. 开始转换模型,请在`Whisper-Finetune`项目根目录下执行`convert-ggml.py`程序,把模型转换为Android项目所需的ggml格式的模型,需要转换的模型可以是原始的Transformers模型,也可以是微调的模型。
```shell
python convert-ggml.py --model_dir=models/whisper-tiny-finetune/ --output_path=models/ggml-model.bin
```
2. 把模型放在Android项目的`app/src/main/assets/models`目录下,然后就可以使用Android Studio打开项目了。
## 编译说明
1. 默认使用的NDK版本是`25.2.9519653`,如果下面修改其他版本,要修改`app/build.gradle`里面的配置。
2. **注意,在真正使用时,一定要发布`release`的APK包,这样推理速度才快。**
3. 本项目已经发布了`release`的APK包,请在`Whisper-Finetune`项目主页的最后扫码下载。
## 效果图
效果图如下,这里使用的模型是量化为半精度tiny模型,准确率不高。
<br/>
<div align="center">
<img src="../docs/images/android2.jpg" alt="Android效果图" width="200">
<img src="../docs/images/android1.jpg" alt="Android效果图" width="200">
<img src="../docs/images/android3.jpg" alt="Android效果图" width="200">
<img src="../docs/images/android4.jpg" alt="Android效果图" width="200">
</div>
## 下载安装包
可以点击这里下载[Android安装包](https://yeyupiaoling.cn/whisper.apk),注意,为了安装包小,这里使用的模型是量化为半精度tiny模型,准确率不高,如果想更换模型的,请执行编译项目。
<br/>
<div align="center">
<img src="../docs/images/android.jpg" alt="Android安装包" width="200">
</div>
# Android
[简体中文](./README.md) | English
**Disclaimer, this document was obtained through machine translation, please check the original document [here](./README.md).**
## Convert model
1. To convert your models, run `convert-ggml.py` from the root of your `Whisper-Finetune` project to convert your models to ggml format for your Android project. The models you need to convert can be original Transformers. It can also be a fine-tuned model.
```shell
python convert-ggml.py --model_dir=models/whisper-tiny-finetune/ --output_path=models/ggml-model.bin
```
2. Put the model in the Android project `app/SRC/main/assets/models` directory, and then you can use the Android open Studio project.
## Build notes
1. The default NDK version used is `25.2.9519653`, if you change the other version below, you will need to change the configuration in `app/build.gradle`.
2. **Note that in real use, be sure to release the `release` APK package so that inference is fast.**
3. This project has released the `release` APK package, please scan the code at the end of the `Whisker-finetune` project homepage to download it.
## Effect picture
The effect picture is as follows. The model used here is quantized as a half-precision tiny model, which has a low accuracy.
<br/>
<div align="center">
<img src="../docs/images/android2.jpg" alt="Android效果图" width="200">
<img src="../docs/images/android1.jpg" alt="Android效果图" width="200">
<img src="../docs/images/android3.jpg" alt="Android效果图" width="200">
<img src="../docs/images/android4.jpg" alt="Android效果图" width="200">
</div>
## Download APK
Can click here to download the [Android APK](https://yeyupiaoling.cn/whisper.apk), note that in order to install package is small, quantitative model used here for half a tiny model precision and accuracy is not high, if you want to change model, please compile the project execution.
<br/>
<div align="center">
<img src="../docs/images/android.jpg" alt="Android安装包" width="200">
</div>
/build/
/release/
/main/assets/
\ No newline at end of file
plugins {
id 'com.android.application'
id 'org.jetbrains.kotlin.android'
}
android {
namespace 'com.yeyupiaoling.whisper'
compileSdk 33
defaultConfig {
applicationId "com.yeyupiaoling.whisper"
minSdk 24
targetSdk 33
versionCode 1
versionName "1.0"
testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
vectorDrawables {
useSupportLibrary true
}
}
buildTypes {
release {
minifyEnabled false
proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
}
}
compileOptions {
sourceCompatibility JavaVersion.VERSION_1_8
targetCompatibility JavaVersion.VERSION_1_8
}
kotlinOptions {
jvmTarget = '1.8'
}
buildFeatures {
compose true
}
composeOptions {
kotlinCompilerExtensionVersion '1.3.2'
}
ndkVersion "25.2.9519653"
externalNativeBuild {
ndkBuild {
path 'src/main/jni/whisper/Android.mk'
}
}
packagingOptions {
resources {
excludes += '/META-INF/{AL2.0,LGPL2.1}'
}
}
}
dependencies {
implementation 'androidx.core:core-ktx:1.8.0'
implementation platform('org.jetbrains.kotlin:kotlin-bom:1.8.0')
implementation 'androidx.lifecycle:lifecycle-runtime-ktx:2.3.1'
implementation 'androidx.activity:activity-compose:1.5.1'
implementation platform('androidx.compose:compose-bom:2022.10.00')
implementation 'androidx.compose.ui:ui'
implementation 'androidx.compose.ui:ui-graphics'
implementation 'androidx.compose.ui:ui-tooling-preview'
implementation 'androidx.compose.material3:material3'
implementation 'androidx.appcompat:appcompat:1.4.1'
implementation 'com.google.android.material:material:1.5.0'
implementation 'androidx.constraintlayout:constraintlayout:2.1.3'
testImplementation 'junit:junit:4.13.2'
androidTestImplementation 'androidx.test.ext:junit:1.1.3'
androidTestImplementation 'androidx.test.espresso:espresso-core:3.4.0'
androidTestImplementation platform('androidx.compose:compose-bom:2022.10.00')
androidTestImplementation 'androidx.compose.ui:ui-test-junit4'
debugImplementation 'androidx.compose.ui:ui-tooling'
debugImplementation 'androidx.compose.ui:ui-test-manifest'
}
\ No newline at end of file
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
# http://developer.android.com/guide/developing/tools/proguard.html
# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
# public *;
#}
# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable
# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile
\ No newline at end of file
package com.yeyupiaoling.whisper
import androidx.test.platform.app.InstrumentationRegistry
import androidx.test.ext.junit.runners.AndroidJUnit4
import org.junit.Test
import org.junit.runner.RunWith
import org.junit.Assert.*
/**
* Instrumented test, which will execute on an Android device.
*
* See [testing documentation](http://d.android.com/tools/testing).
*/
@RunWith(AndroidJUnit4::class)
class ExampleInstrumentedTest {
@Test
fun useAppContext() {
// Context of the app under test.
val appContext = InstrumentationRegistry.getInstrumentation().targetContext
assertEquals("com.yeyupiaoling.whisper", appContext.packageName)
}
}
\ No newline at end of file
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:tools="http://schemas.android.com/tools">
<uses-permission android:name="android.permission.RECORD_AUDIO" />
<uses-permission android:name="android.permission.READ_MEDIA_AUDIO" />
<uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />
<application
android:allowBackup="true"
android:dataExtractionRules="@xml/data_extraction_rules"
android:fullBackupContent="@xml/backup_rules"
android:icon="@mipmap/ic_launcher"
android:label="@string/app_name"
android:roundIcon="@mipmap/ic_launcher_round"
android:supportsRtl="true"
android:theme="@style/Theme.AndroidDemo"
tools:targetApi="31">
<activity
android:name=".TestActivity"
android:exported="false" />
<activity
android:name=".RecordActivity"
android:exported="false" />
<activity
android:name=".AudioFileActivity"
android:exported="false" />
<activity
android:name=".MainActivity"
android:exported="true">
<intent-filter>
<action android:name="android.intent.action.MAIN" />
<category android:name="android.intent.category.LAUNCHER" />
</intent-filter>
</activity>
</application>
</manifest>
\ No newline at end of file
package com.yeyupiaoling.whisper
import android.Manifest
import android.annotation.SuppressLint
import android.content.Intent
import android.content.pm.PackageManager
import android.os.Build
import android.os.Bundle
import android.provider.MediaStore
import android.util.Log
import android.view.View
import android.widget.Button
import android.widget.TextView
import android.widget.Toast
import androidx.appcompat.app.AppCompatActivity
import androidx.lifecycle.lifecycleScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.cancel
import kotlinx.coroutines.isActive
import kotlinx.coroutines.launch
import kotlinx.coroutines.withContext
import java.io.File
class AudioFileActivity : AppCompatActivity() {
private var whisperContext: WhisperContext? = null
private var resultTextView: TextView? = null
private var selectAudioBtn: Button? = null
companion object {
private val TAG = AudioFileActivity::class.java.name
// assets里面的模型路径
private const val modelPath = "models/ggml-model.bin"
}
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
setContentView(R.layout.activity_audio_file)
// 请求权限
if (!hasPermission()) {
requestPermission()
}
resultTextView = findViewById(R.id.result_text)
selectAudioBtn = findViewById(R.id.select_audio_btn)
// 打开文件管理器
selectAudioBtn!!.setOnClickListener { v: View? ->
val intent =
Intent(Intent.ACTION_GET_CONTENT, MediaStore.Audio.Media.EXTERNAL_CONTENT_URI)
intent.addFlags(Intent.FLAG_ACTIVITY_CLEAR_TOP)
intent.setDataAndType(MediaStore.Audio.Media.EXTERNAL_CONTENT_URI, "audio/*")
startActivityForResult(intent, 1)
}
selectAudioBtn!!.isEnabled = false
// 启动协程
lifecycleScope.launch {
loadModel()
}
}
@SuppressLint("SetTextI18n")
private suspend fun loadModel() = withContext(Dispatchers.IO) {
val showText = "正在加载模型:$modelPath ...\n"
// 在 UI 线程中更新 UI
withContext(Dispatchers.Main) {
resultTextView!!.text = showText
}
whisperContext =
WhisperContext.createContextFromAsset(application.assets, modelPath)
// 在 UI 线程中更新 UI
withContext(Dispatchers.Main) {
selectAudioBtn!!.isEnabled = true
resultTextView!!.text = showText + "模型加载成功"
Toast.makeText(this@AudioFileActivity, "模型加载成功", Toast.LENGTH_SHORT).show()
}
}
@Deprecated("Deprecated in Java")
override fun onActivityResult(requestCode: Int, resultCode: Int, data: Intent?) {
super.onActivityResult(requestCode, resultCode, data)
if (resultCode == RESULT_OK) {
if (data !== null) {
val audioFilePath = getPathFromURI(this, data.data!!)
val file = File(audioFilePath!!)
// 开始识别
try {
selectAudioBtn!!.isEnabled = false
val startTime = System.currentTimeMillis()
resultTextView!!.text = "正在识别中..."
val audioData = decodeWaveFile(file)
// 启动协程
lifecycleScope.launch {
// 在协程中调用suspend函数
val text = whisperContext?.transcribeData(audioData)
val endTime = System.currentTimeMillis()
// 在 UI 线程中更新 UI
withContext(Dispatchers.Main) {
// 把结果显示在 TextView 中
val showText = "识别结果:${text.toString()}\n" +
"音频时间:${audioData.size / (16000 / 1000)} ms\n" +
"识别时间:${endTime - startTime} ms\n"
resultTextView!!.text = showText
Log.d(TAG, showText)
selectAudioBtn!!.isEnabled = true
}
}
} catch (e: Exception) {
e.printStackTrace()
selectAudioBtn!!.isEnabled = true
}
}
}
}
override fun onDestroy() {
super.onDestroy()
if (lifecycleScope.isActive) {
lifecycleScope.cancel()
}
lifecycleScope.launch {
whisperContext!!.release()
}
}
// check had permission
private fun hasPermission(): Boolean {
return if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.TIRAMISU) {
checkSelfPermission(Manifest.permission.READ_MEDIA_AUDIO) == PackageManager.PERMISSION_GRANTED
} else {
checkSelfPermission(Manifest.permission.READ_EXTERNAL_STORAGE) == PackageManager.PERMISSION_GRANTED
}
}
// request permission
private fun requestPermission() {
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.TIRAMISU) {
requestPermissions(arrayOf(Manifest.permission.READ_MEDIA_AUDIO), 1)
} else {
requestPermissions(arrayOf(Manifest.permission.READ_EXTERNAL_STORAGE), 1)
}
}
}
\ No newline at end of file
package com.yeyupiaoling.whisper
import android.content.Context
import android.graphics.Canvas
import android.graphics.Color
import android.graphics.Paint
import android.graphics.Path
import android.graphics.Point
import android.util.AttributeSet
import android.view.View
class AudioView : View {
private var upShowStyle = ShowStyle.STYLE_HOLLOW_LUMP
private var downShowStyle = ShowStyle.STYLE_WAVE
private var waveData: ByteArray? = null
var pointList: MutableList<Point>? = null
private var lumpPaint: Paint? = null
var wavePath = Path()
constructor(context: Context?) : super(context) {
init()
}
constructor(context: Context?, attrs: AttributeSet?) : super(context, attrs) {
init()
}
constructor(context: Context?, attrs: AttributeSet?, defStyleAttr: Int) : super(
context,
attrs,
defStyleAttr
) {
init()
}
private fun init() {
lumpPaint = Paint()
lumpPaint!!.isAntiAlias = true
lumpPaint!!.color = LUMP_COLOR
lumpPaint!!.strokeWidth = 2f
lumpPaint!!.style = Paint.Style.STROKE
}
fun setWaveData(data: ByteArray) {
waveData = readyData(data)
genSamplingPoint(data)
invalidate()
}
fun setStyle(upShowStyle: ShowStyle, downShowStyle: ShowStyle) {
this.upShowStyle = upShowStyle
this.downShowStyle = downShowStyle
}
override fun onDraw(canvas: Canvas) {
super.onDraw(canvas)
wavePath.reset()
for (i in 0 until LUMP_COUNT) {
if (waveData == null) {
canvas.drawRect(
((LUMP_WIDTH + LUMP_SPACE) * i).toFloat(),
(
LUMP_MAX_HEIGHT - LUMP_MIN_HEIGHT).toFloat(),
(
(LUMP_WIDTH + LUMP_SPACE) * i + LUMP_WIDTH).toFloat(),
LUMP_MAX_HEIGHT.toFloat(),
lumpPaint!!
)
continue
}
when (upShowStyle) {
ShowStyle.STYLE_HOLLOW_LUMP -> drawLump(canvas, i, false)
ShowStyle.STYLE_WAVE -> drawWave(canvas, i, false)
else -> {}
}
when (downShowStyle) {
ShowStyle.STYLE_HOLLOW_LUMP -> drawLump(canvas, i, true)
ShowStyle.STYLE_WAVE -> drawWave(canvas, i, true)
else -> {}
}
}
}
/**
* 绘制曲线
*
* @param canvas
* @param i
* @param reversal
*/
private fun drawWave(canvas: Canvas, i: Int, reversal: Boolean) {
if (pointList == null || pointList!!.size < 2) {
return
}
val ratio = SCALE * if (reversal) -1 else 1
if (i < pointList!!.size - 2) {
val point = pointList!![i]
val nextPoint = pointList!![i + 1]
val midX = point.x + nextPoint.x shr 1
if (i == 0) {
wavePath.moveTo(point.x.toFloat(), LUMP_MAX_HEIGHT - point.y * ratio)
}
wavePath.cubicTo(
midX.toFloat(), LUMP_MAX_HEIGHT - point.y * ratio,
midX.toFloat(), LUMP_MAX_HEIGHT - nextPoint.y * ratio,
nextPoint.x.toFloat(), LUMP_MAX_HEIGHT - nextPoint.y * ratio
)
canvas.drawPath(wavePath, lumpPaint!!)
}
}
/**
* 绘制矩形条
*/
private fun drawLump(canvas: Canvas, i: Int, reversal: Boolean) {
val minus = if (reversal) -1 else 1
val top = LUMP_MAX_HEIGHT - (LUMP_MIN_HEIGHT + waveData!![i] * SCALE) * minus
canvas.drawRect(
(LUMP_SIZE * i).toFloat(),
top,
(
LUMP_SIZE * i + LUMP_WIDTH).toFloat(),
LUMP_MAX_HEIGHT.toFloat(),
lumpPaint!!
)
}
/**
* 生成波形图的采样数据,减少计算量
*
* @param data
*/
private fun genSamplingPoint(data: ByteArray) {
if (upShowStyle != ShowStyle.STYLE_WAVE && downShowStyle != ShowStyle.STYLE_WAVE) {
return
}
if (pointList == null) {
pointList = ArrayList()
} else {
pointList!!.clear()
}
pointList!!.add(Point(0, 0))
var i = WAVE_SAMPLING_INTERVAL
while (i < LUMP_COUNT) {
pointList!!.add(Point(LUMP_SIZE * i, waveData!![i].toInt()))
i += WAVE_SAMPLING_INTERVAL
}
pointList!!.add(Point(LUMP_SIZE * LUMP_COUNT, 0))
}
/**
* 可视化样式
*/
enum class ShowStyle {
/**
* 空心的矩形小块
*/
STYLE_HOLLOW_LUMP,
/**
* 曲线
*/
STYLE_WAVE,
/**
* 不显示
*/
STYLE_NOTHING
}
companion object {
// 频谱数量
private const val LUMP_COUNT = 128
private const val LUMP_WIDTH = 6
private const val LUMP_SPACE = 2
private const val LUMP_MIN_HEIGHT = LUMP_WIDTH
private const val LUMP_MAX_HEIGHT = 200 //TODO: HEIGHT
private const val LUMP_SIZE = LUMP_WIDTH + LUMP_SPACE
private val LUMP_COLOR = Color.parseColor("#6de8fd")
private const val WAVE_SAMPLING_INTERVAL = 3
private const val SCALE = (LUMP_MAX_HEIGHT / LUMP_COUNT).toFloat()
/**
* 预处理数据
*
* @return
*/
private fun readyData(fft: ByteArray): ByteArray {
val newData = ByteArray(LUMP_COUNT)
var abs: Byte
for (i in 0 until LUMP_COUNT) {
abs = Math.abs(fft[i].toInt()).toByte()
//描述:Math.abs -128时越界
newData[i] = if (abs < 0) 127 else abs
}
return newData
}
}
}
\ No newline at end of file
package com.yeyupiaoling.whisper
import android.content.res.AssetManager
import android.os.Build
import android.util.Log
import kotlinx.coroutines.*
import java.io.File
import java.io.InputStream
import java.util.concurrent.Executors
private const val LOG_TAG = "LibWhisper"
class WhisperContext private constructor(private var ptr: Long) {
// Meet Whisper C++ constraint: Don't access from more than one thread at a time.
private val scope: CoroutineScope = CoroutineScope(
Executors.newSingleThreadExecutor().asCoroutineDispatcher()
)
suspend fun transcribeData(data: FloatArray): String = withContext(scope.coroutineContext) {
require(ptr != 0L)
val numThreads = WhisperCpuConfig.preferredThreadCount
Log.d(LOG_TAG, "Selecting $numThreads threads")
WhisperLib.fullTranscribe(ptr, numThreads, data)
val textCount = WhisperLib.getTextSegmentCount(ptr)
return@withContext buildString {
for (i in 0 until textCount) {
append(WhisperLib.getTextSegment(ptr, i))
}
}
}
suspend fun benchMemory(nthreads: Int): String = withContext(scope.coroutineContext) {
return@withContext WhisperLib.benchMemcpy(nthreads)
}
suspend fun benchGgmlMulMat(nthreads: Int): String = withContext(scope.coroutineContext) {
return@withContext WhisperLib.benchGgmlMulMat(nthreads)
}
suspend fun release() = withContext(scope.coroutineContext) {
if (ptr != 0L) {
WhisperLib.freeContext(ptr)
ptr = 0
}
}
protected fun finalize() {
runBlocking {
release()
}
}
companion object {
fun createContextFromFile(filePath: String): WhisperContext {
val ptr = WhisperLib.initContext(filePath)
if (ptr == 0L) {
throw java.lang.RuntimeException("Couldn't create context with path $filePath")
}
return WhisperContext(ptr)
}
fun createContextFromInputStream(stream: InputStream): WhisperContext {
val ptr = WhisperLib.initContextFromInputStream(stream)
if (ptr == 0L) {
throw java.lang.RuntimeException("Couldn't create context from input stream")
}
return WhisperContext(ptr)
}
fun createContextFromAsset(assetManager: AssetManager, assetPath: String): WhisperContext {
val ptr = WhisperLib.initContextFromAsset(assetManager, assetPath)
if (ptr == 0L) {
throw java.lang.RuntimeException("Couldn't create context from asset $assetPath")
}
return WhisperContext(ptr)
}
fun getSystemInfo(): String {
return WhisperLib.getSystemInfo()
}
}
}
private class WhisperLib {
companion object {
init {
Log.d(LOG_TAG, "Primary ABI: ${Build.SUPPORTED_ABIS[0]}")
var loadVfpv4 = false
var loadV8fp16 = false
if (isArmEabiV7a()) {
// armeabi-v7a needs runtime detection support
val cpuInfo = cpuInfo()
cpuInfo?.let {
Log.d(LOG_TAG, "CPU info: $cpuInfo")
if (cpuInfo.contains("vfpv4")) {
Log.d(LOG_TAG, "CPU supports vfpv4")
loadVfpv4 = true
}
}
} else if (isArmEabiV8a()) {
// ARMv8.2a needs runtime detection support
val cpuInfo = cpuInfo()
cpuInfo?.let {
Log.d(LOG_TAG, "CPU info: $cpuInfo")
if (cpuInfo.contains("fphp")) {
Log.d(LOG_TAG, "CPU supports fp16 arithmetic")
loadV8fp16 = true
}
}
}
if (loadVfpv4) {
Log.d(LOG_TAG, "Loading libwhisper_vfpv4.so")
System.loadLibrary("whisper_vfpv4")
} else if (loadV8fp16) {
Log.d(LOG_TAG, "Loading libwhisper_v8fp16_va.so")
System.loadLibrary("whisper_v8fp16_va")
} else {
Log.d(LOG_TAG, "Loading libwhisper.so")
System.loadLibrary("whisper")
}
}
// JNI methods
external fun initContextFromInputStream(inputStream: InputStream): Long
external fun initContextFromAsset(assetManager: AssetManager, assetPath: String): Long
external fun initContext(modelPath: String): Long
external fun freeContext(contextPtr: Long)
external fun fullTranscribe(contextPtr: Long, numThreads: Int, audioData: FloatArray)
external fun getTextSegmentCount(contextPtr: Long): Int
external fun getTextSegment(contextPtr: Long, index: Int): String
external fun getSystemInfo(): String
external fun benchMemcpy(nthread: Int): String
external fun benchGgmlMulMat(nthread: Int): String
}
}
private fun isArmEabiV7a(): Boolean {
return Build.SUPPORTED_ABIS[0].equals("armeabi-v7a")
}
private fun isArmEabiV8a(): Boolean {
return Build.SUPPORTED_ABIS[0].equals("arm64-v8a")
}
private fun cpuInfo(): String? {
return try {
File("/proc/cpuinfo").inputStream().bufferedReader().use {
it.readText()
}
} catch (e: Exception) {
Log.w(LOG_TAG, "Couldn't read /proc/cpuinfo", e)
null
}
}
\ No newline at end of file
package com.yeyupiaoling.whisper
import android.content.Intent
import android.os.Bundle
import android.view.View
import android.widget.Button
import androidx.appcompat.app.AppCompatActivity
class MainActivity : AppCompatActivity() {
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
setContentView(R.layout.activity_main)
val startRecordBtn = findViewById<Button>(R.id.start_record_activity_btn)
val startFileBtn = findViewById<Button>(R.id.start_file_activity_btn)
val testModelBtn = findViewById<Button>(R.id.start_test_activity_btn)
startRecordBtn.setOnClickListener { view: View? ->
val intent = Intent(this@MainActivity, RecordActivity::class.java)
startActivity(intent)
}
startFileBtn.setOnClickListener { view: View? ->
val intent = Intent(this@MainActivity, AudioFileActivity::class.java)
startActivity(intent)
}
testModelBtn.setOnClickListener { view: View? ->
val intent = Intent(this@MainActivity, TestActivity::class.java)
startActivity(intent)
}
}
}
\ No newline at end of file
package com.yeyupiaoling.whisper
import android.Manifest
import android.annotation.SuppressLint
import android.content.pm.PackageManager
import android.media.AudioFormat
import android.media.AudioRecord
import android.media.MediaRecorder
import android.os.Bundle
import android.util.Log
import android.view.MotionEvent
import android.view.View
import android.widget.Button
import android.widget.TextView
import android.widget.Toast
import androidx.appcompat.app.AppCompatActivity
import androidx.core.app.ActivityCompat
import androidx.lifecycle.lifecycleScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.cancel
import kotlinx.coroutines.isActive
import kotlinx.coroutines.launch
import kotlinx.coroutines.withContext
import java.io.ByteArrayOutputStream
import java.io.File
import java.io.FileOutputStream
import java.io.IOException
class RecordActivity : AppCompatActivity() {
private var audioRecord: AudioRecord? = null
private var mIsRecording = false
private var minBufferSize = 0
private var tempFile: File? = null
private var resultTextView: TextView? = null
private var mRecordButton: Button? = null
private var audioView: AudioView? = null
private var whisperContext: WhisperContext? = null
companion object {
private val TAG = AudioFileActivity::class.java.name
// 采样率
const val SAMPLE_RATE = 16000
// 声道数
const val CHANNEL = AudioFormat.CHANNEL_IN_MONO
// 返回的音频数据的格式
const val AUDIO_FORMAT = AudioFormat.ENCODING_PCM_16BIT
// assets里面的模型路径
private const val modelPath = "models/ggml-model.bin"
}
@SuppressLint("ClickableViewAccessibility")
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
setContentView(R.layout.activity_record)
// 请求权限
if (!hasPermission()) {
requestPermission()
}
minBufferSize = AudioRecord.getMinBufferSize(SAMPLE_RATE, CHANNEL, AUDIO_FORMAT)
resultTextView = findViewById(R.id.result_text)
// 录音情况显示器
audioView = findViewById(R.id.audioView)
audioView?.setStyle(
AudioView.ShowStyle.STYLE_HOLLOW_LUMP, AudioView.ShowStyle.STYLE_NOTHING
)
mRecordButton = findViewById(R.id.record_button)
mRecordButton!!.setOnTouchListener { v: View?, event: MotionEvent ->
if (event.action == MotionEvent.ACTION_UP) {
mIsRecording = false
stopRecording()
mRecordButton!!.text = "按下录音"
} else if (event.action == MotionEvent.ACTION_DOWN) {
mIsRecording = true
startRecording()
mRecordButton!!.text = "录音中..."
}
true
}
mRecordButton!!.isEnabled = false
// 启动协程
lifecycleScope.launch {
loadModel()
}
}
@SuppressLint("SetTextI18n")
private suspend fun loadModel() = withContext(Dispatchers.IO) {
val showText = "正在加载模型:$modelPath ...\n"
// 在 UI 线程中更新 UI
withContext(Dispatchers.Main) {
resultTextView!!.text = showText
}
whisperContext =
WhisperContext.createContextFromAsset(application.assets, modelPath)
// 在 UI 线程中更新 UI
withContext(Dispatchers.Main) {
mRecordButton!!.isEnabled = true
resultTextView!!.text = showText + "模型加载成功"
Toast.makeText(this@RecordActivity, "模型加载成功", Toast.LENGTH_SHORT).show()
}
}
// 开始录音
private fun startRecording() {
try {
if (ActivityCompat.checkSelfPermission(
this, Manifest.permission.RECORD_AUDIO
) != PackageManager.PERMISSION_GRANTED
) {
requestPermission()
return
}
tempFile = File.createTempFile("recording", "wav")
// 创建录音器
audioRecord = AudioRecord(
MediaRecorder.AudioSource.MIC, SAMPLE_RATE, CHANNEL, AUDIO_FORMAT, minBufferSize
)
} catch (e: IllegalStateException) {
e.printStackTrace()
}
// 开启一个线程将录音数据写入文件
val recordingAudioThread = Thread {
try {
writeAudioDataToWavFile(tempFile!!)
} catch (e: IOException) {
e.printStackTrace()
}
}
recordingAudioThread.start()
// 启动录音器
audioRecord!!.startRecording()
audioView!!.visibility = View.VISIBLE
}
// 停止录音
private fun stopRecording() {
// 停止录音器
audioRecord!!.stop()
audioRecord!!.release()
audioRecord = null
audioView!!.visibility = View.GONE
// 开始识别
try {
val startTime = System.currentTimeMillis()
resultTextView!!.text = "正在识别中..."
val audioData = decodeWaveFile(tempFile!!)
// 启动协程
lifecycleScope.launch {
mRecordButton!!.isEnabled = false
// 在协程中调用suspend函数
val text = whisperContext?.transcribeData(audioData)
val endTime = System.currentTimeMillis()
// 在 UI 线程中更新 UI
withContext(Dispatchers.Main) {
// 把结果显示在 TextView 中
val showText = "识别结果:${text.toString()}\n" +
"音频时间:${audioData.size / (16000 / 1000)} ms\n" +
"识别时间:${endTime - startTime} ms\n"
resultTextView!!.text = showText
Log.d(TAG, showText)
mRecordButton!!.isEnabled = true
}
}
} catch (e: Exception) {
e.printStackTrace()
mRecordButton!!.isEnabled = true
}
}
// 保存音频
@Throws(IOException::class)
private fun writeAudioDataToWavFile(file: File) {
val fos = FileOutputStream(file)
val bos = ByteArrayOutputStream()
val buffer = ByteArray(minBufferSize)
audioRecord!!.startRecording()
while (mIsRecording) {
val readSize = audioRecord!!.read(buffer, 0, minBufferSize)
if (readSize > 0) {
bos.write(buffer, 0, readSize)
audioView!!.post { audioView!!.setWaveData(buffer) }
}
}
val audioData = bos.toByteArray()
val totalAudioLen = audioData.size.toLong()
val totalDataLen = totalAudioLen + 36
writeWAVHeader(fos, totalAudioLen, totalDataLen, SAMPLE_RATE, 1, 16)
fos.write(audioData)
}
override fun onDestroy() {
super.onDestroy()
if (audioRecord != null) {
audioRecord!!.release()
}
if (lifecycleScope.isActive) {
lifecycleScope.cancel()
}
lifecycleScope.launch {
whisperContext!!.release()
}
}
// check had permission
private fun hasPermission(): Boolean {
return checkSelfPermission(Manifest.permission.RECORD_AUDIO) == PackageManager.PERMISSION_GRANTED && checkSelfPermission(
Manifest.permission.WRITE_EXTERNAL_STORAGE
) == PackageManager.PERMISSION_GRANTED
}
// request permission
private fun requestPermission() {
requestPermissions(
arrayOf(
Manifest.permission.RECORD_AUDIO, Manifest.permission.WRITE_EXTERNAL_STORAGE
), 1
)
}
}
\ No newline at end of file
package com.yeyupiaoling.whisper
import android.annotation.SuppressLint
import android.os.Bundle
import android.text.method.ScrollingMovementMethod
import android.util.Log
import android.view.View
import android.widget.Button
import android.widget.EditText
import android.widget.TextView
import android.widget.Toast
import androidx.appcompat.app.AppCompatActivity
import androidx.lifecycle.lifecycleScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.launch
import kotlinx.coroutines.withContext
import java.io.File
class TestActivity : AppCompatActivity() {
private var whisperContext: WhisperContext? = null
private var resultTextView: TextView? = null
private var numEdit: EditText? = null
private var startBtn: Button? = null
private var samplePath: File? = null
companion object {
private val TAG = AudioFileActivity::class.java.name
// assets里面的模型路径
private const val modelPath = "models/ggml-model.bin"
private const val wavPath = "samples/test.wav"
}
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
setContentView(R.layout.activity_test)
// 复制音频文件
samplePath = File(application.filesDir, "samples")
assets.open(wavPath).use { input ->
samplePath!!.outputStream().use { output ->
input.copyTo(output)
}
}
numEdit = findViewById(R.id.num_edit)
resultTextView = findViewById(R.id.result_text)
resultTextView!!.movementMethod = ScrollingMovementMethod.getInstance()
startBtn = findViewById(R.id.start_button)
startBtn!!.setOnClickListener { v: View? ->
start()
}
startBtn!!.isEnabled = false
// 启动协程
lifecycleScope.launch {
loadModel()
}
}
@SuppressLint("SetTextI18n")
private suspend fun loadModel() = withContext(Dispatchers.IO) {
val showText = "正在加载模型:${modelPath} ...\n"
// 在 UI 线程中更新 UI
withContext(Dispatchers.Main) {
resultTextView!!.text = showText
}
whisperContext =
WhisperContext.createContextFromAsset(application.assets, modelPath)
// 在 UI 线程中更新 UI
withContext(Dispatchers.Main) {
startBtn!!.isEnabled = true
resultTextView!!.text = showText + "模型加载成功"
Toast.makeText(this@TestActivity, "模型加载成功", Toast.LENGTH_SHORT).show()
}
}
// 开始测试
private fun start() {
resultTextView!!.text = ""
startBtn!!.isEnabled = false
val startTime2 = System.currentTimeMillis()
val audioData = decodeWaveFile(samplePath!!)
val endTime1 = System.currentTimeMillis()
val dataLen = audioData.size / (16000 / 1000)
var showText = "读取音频时间:${endTime1 - startTime2} ms\n音频时间:${dataLen} ms\n"
Log.d(TAG, showText)
val num = numEdit!!.text.toString().toInt()
var runNum = 0f
// 启动协程
lifecycleScope.launch {
val startTime = System.currentTimeMillis()
for (i in 0 until num) {
val startTime1 = System.currentTimeMillis()
// 在协程中调用suspend函数
val text = whisperContext?.transcribeData(audioData)
// 在 UI 线程中更新 UI
withContext(Dispatchers.Main) {
// 把结果显示在 TextView 中
showText = "${showText}\n识别结果:${text.toString()}\n" +
"识别时间:${System.currentTimeMillis() - startTime1} ms\n"
resultTextView!!.text = showText
Log.d(TAG, showText)
runNum++
}
}
val endTime = System.currentTimeMillis()
showText = "${showText}\n==================================\n" +
"测试次数:${runNum}\n" +
"平均识别时间:${(endTime - startTime) / runNum} ms\n" +
"实时率(RTF)为:${(endTime - startTime) / runNum / dataLen}"
resultTextView!!.text = showText
Log.d(TAG, showText)
startBtn!!.isEnabled = true
}
}
override fun onDestroy() {
super.onDestroy()
lifecycleScope.launch {
whisperContext!!.release()
}
}
}
\ No newline at end of file
package com.yeyupiaoling.whisper
import android.content.Context
import android.database.Cursor
import android.net.Uri
import android.provider.MediaStore
import android.provider.OpenableColumns
import java.io.ByteArrayOutputStream
import java.io.File
import java.io.FileInputStream
import java.io.FileOutputStream
import java.io.IOException
import java.io.InputStream
import java.nio.ByteBuffer
import java.nio.ByteOrder
// 读取音频数据用于输入预测
fun decodeWaveFile(file: File): FloatArray {
val baos = ByteArrayOutputStream()
file.inputStream().use { it.copyTo(baos) }
val buffer = ByteBuffer.wrap(baos.toByteArray())
buffer.order(ByteOrder.LITTLE_ENDIAN)
val channel = buffer.getShort(22).toInt()
buffer.position(44)
val shortBuffer = buffer.asShortBuffer()
val shortArray = ShortArray(shortBuffer.limit())
shortBuffer.get(shortArray)
return FloatArray(shortArray.size / channel) { index ->
when (channel) {
1 -> (shortArray[index] / 32767.0f).coerceIn(-1f..1f)
else -> ((shortArray[2 * index] + shortArray[2 * index + 1]) / 32767.0f / 2.0f).coerceIn(
-1f..1f
)
}
}
}
// 给录音的流加上文件头
@Throws(IOException::class)
fun writeWAVHeader(
fos: FileOutputStream, totalAudioLen: Long, totalDataLen: Long,
sampleRate: Int, channels: Int, bitRate: Int
) {
val byteRate = bitRate.toLong() * channels * sampleRate / 8
val header = ByteArray(44)
header[0] = 'R'.code.toByte() // RIFF/WAVE header
header[1] = 'I'.code.toByte()
header[2] = 'F'.code.toByte()
header[3] = 'F'.code.toByte()
header[4] = (totalDataLen and 0xffL).toByte()
header[5] = (totalDataLen shr 8 and 0xffL).toByte()
header[6] = (totalDataLen shr 16 and 0xffL).toByte()
header[7] = (totalDataLen shr 24 and 0xffL).toByte()
header[8] = 'W'.code.toByte()
header[9] = 'A'.code.toByte()
header[10] = 'V'.code.toByte()
header[11] = 'E'.code.toByte()
header[12] = 'f'.code.toByte() // 'fmt ' chunk
header[13] = 'm'.code.toByte()
header[14] = 't'.code.toByte()
header[15] = ' '.code.toByte()
header[16] = 16 // 4 bytes: size of 'fmt ' chunk
header[17] = 0
header[18] = 0
header[19] = 0
header[20] = 1 // format = 1
header[21] = 0
header[22] = channels.toByte()
header[23] = 0
header[24] = (sampleRate and 0xff).toByte()
header[25] = (sampleRate shr 8 and 0xff).toByte()
header[26] = (sampleRate shr 16 and 0xff).toByte()
header[27] = (sampleRate shr 24 and 0xff).toByte()
header[28] = (byteRate and 0xffL).toByte()
header[29] = (byteRate shr 8 and 0xffL).toByte()
header[30] = (byteRate shr 16 and 0xffL).toByte()
header[31] = (byteRate shr 24 and 0xffL).toByte()
header[32] = (channels * bitRate / 8).toByte()
header[33] = 0
header[34] = bitRate.toByte()
header[35] = 0
header[36] = 'd'.code.toByte()
header[37] = 'a'.code.toByte()
header[38] = 't'.code.toByte()
header[39] = 'a'.code.toByte()
header[40] = (totalAudioLen and 0xffL).toByte()
header[41] = (totalAudioLen shr 8 and 0xffL).toByte()
header[42] = (totalAudioLen shr 16 and 0xffL).toByte()
header[43] = (totalAudioLen shr 24 and 0xffL).toByte()
fos.write(header, 0, 44)
}
// 根据返回的URI转换为路径
fun getPathFromURI(context: Context, uri: Uri): String? {
try {
val returnCursor: Cursor? =
context.contentResolver.query(uri, null, null, null, null)
val nameIndex: Int = returnCursor!!.getColumnIndex(OpenableColumns.DISPLAY_NAME)
returnCursor.moveToFirst()
val name: String = returnCursor.getString(nameIndex)
val file = File(context.getFilesDir(), name)
val inputStream: InputStream? = context.contentResolver.openInputStream(uri)
val outputStream = FileOutputStream(file)
var read: Int
val maxBufferSize = 1 * 1024 * 1024
val bytesAvailable: Int = inputStream!!.available()
val bufferSize = Math.min(bytesAvailable, maxBufferSize)
val buffers = ByteArray(bufferSize)
while (inputStream.read(buffers).also { read = it } != -1) {
outputStream.write(buffers, 0, read)
}
returnCursor.close()
inputStream.close()
outputStream.close()
return file.getPath()
} catch (e: Exception) {
e.printStackTrace()
}
return null
}
package com.yeyupiaoling.whisper
import android.util.Log
import java.io.BufferedReader
import java.io.FileReader
object WhisperCpuConfig {
val preferredThreadCount: Int
// Always use at least 2 threads:
get() = CpuInfo.getHighPerfCpuCount().coerceAtLeast(2)
}
private class CpuInfo(private val lines: List<String>) {
private fun getHighPerfCpuCount(): Int = try {
getHighPerfCpuCountByFrequencies()
} catch (e: Exception) {
Log.d(LOG_TAG, "Couldn't read CPU frequencies", e)
getHighPerfCpuCountByVariant()
}
private fun getHighPerfCpuCountByFrequencies(): Int =
getCpuValues(property = "processor") { getMaxCpuFrequency(it.toInt()) }
.also { Log.d(LOG_TAG, "Binned cpu frequencies (frequency, count): ${it.binnedValues()}") }
.countDroppingMin()
private fun getHighPerfCpuCountByVariant(): Int =
getCpuValues(property = "CPU variant") { it.substringAfter("0x").toInt(radix = 16) }
.also { Log.d(LOG_TAG, "Binned cpu variants (variant, count): ${it.binnedValues()}") }
.countKeepingMin()
private fun List<Int>.binnedValues() = groupingBy { it }.eachCount()
private fun getCpuValues(property: String, mapper: (String) -> Int) = lines
.asSequence()
.filter { it.startsWith(property) }
.map { mapper(it.substringAfter(':').trim()) }
.sorted()
.toList()
private fun List<Int>.countDroppingMin(): Int {
val min = min()
return count { it > min }
}
private fun List<Int>.countKeepingMin(): Int {
val min = min()
return count { it == min }
}
companion object {
private const val LOG_TAG = "WhisperCpuConfig"
fun getHighPerfCpuCount(): Int = try {
readCpuInfo().getHighPerfCpuCount()
} catch (e: Exception) {
Log.d(LOG_TAG, "Couldn't read CPU info", e)
// Our best guess -- just return the # of CPUs minus 4.
(Runtime.getRuntime().availableProcessors() - 4).coerceAtLeast(0)
}
private fun readCpuInfo() = CpuInfo(
BufferedReader(FileReader("/proc/cpuinfo"))
.useLines { it.toList() }
)
private fun getMaxCpuFrequency(cpuIndex: Int): Int {
val path = "/sys/devices/system/cpu/cpu${cpuIndex}/cpufreq/cpuinfo_max_freq"
val maxFreq = BufferedReader(FileReader(path)).use { it.readLine() }
return maxFreq.toInt()
}
}
}
\ No newline at end of file
LOCAL_PATH := $(call my-dir)
include $(CLEAR_VARS)
LOCAL_MODULE := libwhisper
include $(LOCAL_PATH)/Whisper.mk
include $(BUILD_SHARED_LIBRARY)
ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
include $(CLEAR_VARS)
LOCAL_MODULE := libwhisper_vfpv4
include $(LOCAL_PATH)/Whisper.mk
# Allow building NEON FMA code.
# https://android.googlesource.com/platform/ndk/+/master/sources/android/cpufeatures/cpu-features.h
LOCAL_CFLAGS += -mfpu=neon-vfpv4
include $(BUILD_SHARED_LIBRARY)
endif
ifeq ($(TARGET_ARCH_ABI),arm64-v8a)
include $(CLEAR_VARS)
LOCAL_MODULE := libwhisper_v8fp16_va
include $(LOCAL_PATH)/Whisper.mk
# Allow building NEON FMA code.
# https://android.googlesource.com/platform/ndk/+/master/sources/android/cpufeatures/cpu-features.h
LOCAL_CFLAGS += -march=armv8.2-a+fp16
include $(BUILD_SHARED_LIBRARY)
endif
APP_STL := c++_static
\ No newline at end of file
WHISPER_LIB_DIR := libwhisper
LOCAL_LDLIBS := -landroid -llog
# Make the final output library smaller by only keeping the symbols referenced from the app.
ifneq ($(APP_OPTIM),debug)
LOCAL_CFLAGS += -O3
LOCAL_CFLAGS += -fvisibility=hidden -fvisibility-inlines-hidden
LOCAL_CFLAGS += -ffunction-sections -fdata-sections
LOCAL_LDFLAGS += -Wl,--gc-sections
LOCAL_LDFLAGS += -Wl,--exclude-libs,ALL
LOCAL_LDFLAGS += -flto
endif
LOCAL_CFLAGS += -DSTDC_HEADERS -std=c11 -I $(WHISPER_LIB_DIR)
LOCAL_CPPFLAGS += -std=c++11
LOCAL_SRC_FILES := $(WHISPER_LIB_DIR)/ggml.c \
$(WHISPER_LIB_DIR)/ggml-alloc.c \
$(WHISPER_LIB_DIR)/ggml-backend.c \
$(WHISPER_LIB_DIR)/ggml-quants.c \
$(WHISPER_LIB_DIR)/whisper.cpp \
$(LOCAL_PATH)/jni.c
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment