diff --git a/.github/workflows/android.yml b/.github/workflows/android.yml new file mode 100644 index 0000000..2d4a60d --- /dev/null +++ b/.github/workflows/android.yml @@ -0,0 +1,31 @@ +name: Android CI + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Set up JDK 11 + uses: actions/setup-java@v2 + with: + java-version: 11 + + - name: Build with Gradle + run: ./gradlew build + + - name: Upload APK + uses: actions/upload-artifact@v2 + with: + name: app-release.apk + path: android/app/build/outputs/apk/release/app-release.apk diff --git a/README.md b/README.md index 3629c0c..a87fed0 100644 --- a/README.md +++ b/README.md @@ -101,6 +101,23 @@ python The first argument is a path to an audio file and the second is the name of a Moonshine model. `moonshine/tiny` and `moonshine/base` are the currently available models. +## Building and Running the Android App + +### Prerequisites + +- Android Studio installed on your machine. +- Android device or emulator for testing. + +### Steps + +1. Open Android Studio and select "Open an existing Android Studio project". +2. Navigate to the `android` directory in this repository and open it. +3. Let Android Studio download any necessary dependencies. +4. Connect your Android device or start an emulator. +5. Click on the "Run" button in Android Studio to build and run the app on your device/emulator. + +The app allows you to pick audio or video files from your device and transcribe them to text. The transcriptions can be saved in md/txt/json formats. + ## TODO * [ ] Live transcription demo diff --git a/android/app/build.gradle b/android/app/build.gradle new file mode 100644 index 0000000..2304ccd --- /dev/null +++ b/android/app/build.gradle @@ -0,0 +1,29 @@ +apply plugin: 'com.android.application' + +android { + compileSdkVersion 30 + defaultConfig { + applicationId "com.usefulsensors.moonshine" + minSdkVersion 21 + targetSdkVersion 30 + versionCode 1 + versionName "1.0" + } + buildTypes { + release { + minifyEnabled false + proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro' + } + } +} + +dependencies { + implementation fileTree(dir: 'libs', include: ['*.jar']) + implementation 'androidx.appcompat:appcompat:1.2.0' + implementation 'androidx.core:core-ktx:1.3.2' + implementation 'com.google.android.material:material:1.3.0' + implementation 'androidx.constraintlayout:constraintlayout:2.0.4' + testImplementation 'junit:junit:4.13.1' + androidTestImplementation 'androidx.test.ext:junit:1.1.2' + androidTestImplementation 'androidx.test.espresso:espresso-core:3.3.0' +} diff --git a/android/app/src/main/AndroidManifest.xml b/android/app/src/main/AndroidManifest.xml new file mode 100644 index 0000000..5a49cb4 --- /dev/null +++ b/android/app/src/main/AndroidManifest.xml @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + diff --git a/android/app/src/main/java/com/usefulsensors/moonshine/AudioUtils.java b/android/app/src/main/java/com/usefulsensors/moonshine/AudioUtils.java new file mode 100644 index 0000000..a523534 --- /dev/null +++ b/android/app/src/main/java/com/usefulsensors/moonshine/AudioUtils.java @@ -0,0 +1,38 @@ +package com.usefulsensors.moonshine; + +import android.content.Context; +import android.net.Uri; +import android.util.Log; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; + +public class AudioUtils { + + private static final String TAG = "AudioUtils"; + + public static File getFileFromUri(Context context, Uri uri) throws IOException { + InputStream inputStream = context.getContentResolver().openInputStream(uri); + File tempFile = File.createTempFile("audio", null, context.getCacheDir()); + FileOutputStream outputStream = new FileOutputStream(tempFile); + + byte[] buffer = new byte[1024]; + int length; + while ((length = inputStream.read(buffer)) > 0) { + outputStream.write(buffer, 0, length); + } + + outputStream.close(); + inputStream.close(); + + return tempFile; + } + + public static String transcribeAudio(File audioFile) { + // Placeholder for actual transcription logic + Log.i(TAG, "Transcribing audio file: " + audioFile.getAbsolutePath()); + return "Transcription result"; + } +} diff --git a/android/app/src/main/java/com/usefulsensors/moonshine/FilePickerFragment.java b/android/app/src/main/java/com/usefulsensors/moonshine/FilePickerFragment.java new file mode 100644 index 0000000..854b51e --- /dev/null +++ b/android/app/src/main/java/com/usefulsensors/moonshine/FilePickerFragment.java @@ -0,0 +1,87 @@ +package com.usefulsensors.moonshine; + +import android.content.Intent; +import android.net.Uri; +import android.os.Bundle; +import android.provider.MediaStore; +import android.view.LayoutInflater; +import android.view.View; +import android.view.ViewGroup; +import android.widget.Button; +import android.widget.TextView; +import androidx.annotation.NonNull; +import androidx.annotation.Nullable; +import androidx.fragment.app.Fragment; + +public class FilePickerFragment extends Fragment { + + private static final int REQUEST_CODE_PICK_AUDIO = 1; + private static final int REQUEST_CODE_PICK_VIDEO = 2; + + private TextView statusTextView; + + @Nullable + @Override + public View onCreateView(@NonNull LayoutInflater inflater, @Nullable ViewGroup container, @Nullable Bundle savedInstanceState) { + View view = inflater.inflate(R.layout.fragment_file_picker, container, false); + + statusTextView = view.findViewById(R.id.statusTextView); + + Button pickAudioButton = view.findViewById(R.id.pickAudioButton); + pickAudioButton.setOnClickListener(new View.OnClickListener() { + @Override + public void onClick(View v) { + pickAudioFile(); + } + }); + + Button pickVideoButton = view.findViewById(R.id.pickVideoButton); + pickVideoButton.setOnClickListener(new View.OnClickListener() { + @Override + public void onClick(View v) { + pickVideoFile(); + } + }); + + return view; + } + + private void pickAudioFile() { + Intent intent = new Intent(Intent.ACTION_PICK, MediaStore.Audio.Media.EXTERNAL_CONTENT_URI); + startActivityForResult(intent, REQUEST_CODE_PICK_AUDIO); + } + + private void pickVideoFile() { + Intent intent = new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.EXTERNAL_CONTENT_URI); + startActivityForResult(intent, REQUEST_CODE_PICK_VIDEO); + } + + @Override + public void onActivityResult(int requestCode, int resultCode, @Nullable Intent data) { + super.onActivityResult(requestCode, resultCode, data); + if (resultCode == getActivity().RESULT_OK && data != null) { + Uri uri = data.getData(); + if (requestCode == REQUEST_CODE_PICK_AUDIO) { + transcribeAudio(uri); + } else if (requestCode == REQUEST_CODE_PICK_VIDEO) { + transcribeVideo(uri); + } + } + } + + private void transcribeAudio(Uri uri) { + Intent intent = new Intent(getActivity(), TranscriptionService.class); + intent.setData(uri); + intent.putExtra("type", "audio"); + getActivity().startService(intent); + statusTextView.setText("Transcribing audio..."); + } + + private void transcribeVideo(Uri uri) { + Intent intent = new Intent(getActivity(), TranscriptionService.class); + intent.setData(uri); + intent.putExtra("type", "video"); + getActivity().startService(intent); + statusTextView.setText("Transcribing video..."); + } +} diff --git a/android/app/src/main/java/com/usefulsensors/moonshine/MainActivity.java b/android/app/src/main/java/com/usefulsensors/moonshine/MainActivity.java new file mode 100644 index 0000000..9aca452 --- /dev/null +++ b/android/app/src/main/java/com/usefulsensors/moonshine/MainActivity.java @@ -0,0 +1,112 @@ +package com.usefulsensors.moonshine; + +import android.Manifest; +import android.content.Intent; +import android.content.pm.PackageManager; +import android.net.Uri; +import android.os.Bundle; +import android.provider.MediaStore; +import android.view.View; +import android.widget.Button; +import android.widget.TextView; +import androidx.annotation.NonNull; +import androidx.appcompat.app.AppCompatActivity; +import androidx.core.app.ActivityCompat; +import androidx.core.content.ContextCompat; + +public class MainActivity extends AppCompatActivity { + + private static final int REQUEST_CODE_PICK_AUDIO = 1; + private static final int REQUEST_CODE_PICK_VIDEO = 2; + private static final int REQUEST_CODE_PERMISSIONS = 3; + + private TextView statusTextView; + + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + setContentView(R.layout.activity_main); + + statusTextView = findViewById(R.id.statusTextView); + + Button pickAudioButton = findViewById(R.id.pickAudioButton); + pickAudioButton.setOnClickListener(new View.OnClickListener() { + @Override + public void onClick(View v) { + pickAudioFile(); + } + }); + + Button pickVideoButton = findViewById(R.id.pickVideoButton); + pickVideoButton.setOnClickListener(new View.OnClickListener() { + @Override + public void onClick(View v) { + pickVideoFile(); + } + }); + + if (!hasPermissions()) { + requestPermissions(); + } + } + + private boolean hasPermissions() { + return ContextCompat.checkSelfPermission(this, Manifest.permission.READ_EXTERNAL_STORAGE) == PackageManager.PERMISSION_GRANTED; + } + + private void requestPermissions() { + ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.READ_EXTERNAL_STORAGE}, REQUEST_CODE_PERMISSIONS); + } + + private void pickAudioFile() { + Intent intent = new Intent(Intent.ACTION_PICK, MediaStore.Audio.Media.EXTERNAL_CONTENT_URI); + startActivityForResult(intent, REQUEST_CODE_PICK_AUDIO); + } + + private void pickVideoFile() { + Intent intent = new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.EXTERNAL_CONTENT_URI); + startActivityForResult(intent, REQUEST_CODE_PICK_VIDEO); + } + + @Override + protected void onActivityResult(int requestCode, int resultCode, Intent data) { + super.onActivityResult(requestCode, resultCode, data); + if (resultCode == RESULT_OK && data != null) { + Uri uri = data.getData(); + if (requestCode == REQUEST_CODE_PICK_AUDIO) { + transcribeAudio(uri); + } else if (requestCode == REQUEST_CODE_PICK_VIDEO) { + transcribeVideo(uri); + } + } + } + + private void transcribeAudio(Uri uri) { + Intent intent = new Intent(this, TranscriptionService.class); + intent.setData(uri); + intent.putExtra("type", "audio"); + startService(intent); + statusTextView.setText("Transcribing audio..."); + } + + private void transcribeVideo(Uri uri) { + Intent intent = new Intent(this, TranscriptionService.class); + intent.setData(uri); + intent.putExtra("type", "video"); + startService(intent); + statusTextView.setText("Transcribing video..."); + } + + @Override + public void onRequestPermissionsResult(int requestCode, @NonNull String[] permissions, @NonNull int[] grantResults) { + super.onRequestPermissionsResult(requestCode, permissions, grantResults); + if (requestCode == REQUEST_CODE_PERMISSIONS) { + if (grantResults.length > 0 && grantResults[0] == PackageManager.PERMISSION_GRANTED) { + // Permissions granted + } else { + // Permissions denied + statusTextView.setText("Permissions denied. Cannot access files."); + } + } + } +} diff --git a/android/app/src/main/java/com/usefulsensors/moonshine/RealTimeASRFragment.java b/android/app/src/main/java/com/usefulsensors/moonshine/RealTimeASRFragment.java new file mode 100644 index 0000000..c062de9 --- /dev/null +++ b/android/app/src/main/java/com/usefulsensors/moonshine/RealTimeASRFragment.java @@ -0,0 +1,101 @@ +package com.usefulsensors.moonshine; + +import android.Manifest; +import android.content.pm.PackageManager; +import android.os.Bundle; +import android.os.Handler; +import android.os.Looper; +import android.view.LayoutInflater; +import android.view.View; +import android.view.ViewGroup; +import android.widget.Button; +import android.widget.Toast; +import androidx.annotation.NonNull; +import androidx.annotation.Nullable; +import androidx.core.app.ActivityCompat; +import androidx.core.content.ContextCompat; +import androidx.fragment.app.Fragment; +import androidx.recyclerview.widget.LinearLayoutManager; +import androidx.recyclerview.widget.RecyclerView; +import java.util.ArrayList; +import java.util.List; + +public class RealTimeASRFragment extends Fragment { + + private static final int REQUEST_CODE_PERMISSIONS = 1; + private Button startASRButton; + private Button stopASRButton; + private RecyclerView transcriptionRecyclerView; + private TranscriptionAdapter transcriptionAdapter; + private List transcriptionItems; + private Handler handler; + private Runnable asrRunnable; + + @Nullable + @Override + public View onCreateView(@NonNull LayoutInflater inflater, @Nullable ViewGroup container, @Nullable Bundle savedInstanceState) { + View view = inflater.inflate(R.layout.fragment_real_time_asr, container, false); + + startASRButton = view.findViewById(R.id.startASRButton); + stopASRButton = view.findViewById(R.id.stopASRButton); + transcriptionRecyclerView = view.findViewById(R.id.transcriptionRecyclerView); + + transcriptionItems = new ArrayList<>(); + transcriptionAdapter = new TranscriptionAdapter(transcriptionItems); + transcriptionRecyclerView.setLayoutManager(new LinearLayoutManager(getContext())); + transcriptionRecyclerView.setAdapter(transcriptionAdapter); + + startASRButton.setOnClickListener(v -> startRealTimeASR()); + stopASRButton.setOnClickListener(v -> stopRealTimeASR()); + + handler = new Handler(Looper.getMainLooper()); + + if (!hasPermissions()) { + requestPermissions(); + } + + return view; + } + + private boolean hasPermissions() { + return ContextCompat.checkSelfPermission(requireContext(), Manifest.permission.RECORD_AUDIO) == PackageManager.PERMISSION_GRANTED; + } + + private void requestPermissions() { + ActivityCompat.requestPermissions(requireActivity(), new String[]{Manifest.permission.RECORD_AUDIO}, REQUEST_CODE_PERMISSIONS); + } + + private void startRealTimeASR() { + Toast.makeText(getContext(), "Starting Real-Time ASR...", Toast.LENGTH_SHORT).show(); + asrRunnable = new Runnable() { + @Override + public void run() { + // Placeholder for actual ASR logic + String transcription = "Real-time transcription result with timestamp"; + long timestamp = System.currentTimeMillis(); + transcriptionItems.add(new TranscriptionItem(transcription, timestamp)); + transcriptionAdapter.notifyDataSetChanged(); + handler.postDelayed(this, 1000); // Simulate real-time ASR every second + } + }; + handler.post(asrRunnable); + } + + private void stopRealTimeASR() { + Toast.makeText(getContext(), "Stopping Real-Time ASR...", Toast.LENGTH_SHORT).show(); + handler.removeCallbacks(asrRunnable); + } + + @Override + public void onRequestPermissionsResult(int requestCode, @NonNull String[] permissions, @NonNull int[] grantResults) { + super.onRequestPermissionsResult(requestCode, permissions, grantResults); + if (requestCode == REQUEST_CODE_PERMISSIONS) { + if (grantResults.length > 0 && grantResults[0] == PackageManager.PERMISSION_GRANTED) { + // Permissions granted + } else { + // Permissions denied + Toast.makeText(getContext(), "Permissions denied. Cannot start real-time ASR.", Toast.LENGTH_SHORT).show(); + } + } + } +} diff --git a/android/app/src/main/java/com/usefulsensors/moonshine/TranscriptionAdapter.java b/android/app/src/main/java/com/usefulsensors/moonshine/TranscriptionAdapter.java new file mode 100644 index 0000000..7b1958f --- /dev/null +++ b/android/app/src/main/java/com/usefulsensors/moonshine/TranscriptionAdapter.java @@ -0,0 +1,57 @@ +package com.usefulsensors.moonshine; + +import android.view.LayoutInflater; +import android.view.View; +import android.view.ViewGroup; +import android.widget.TextView; +import androidx.annotation.NonNull; +import androidx.recyclerview.widget.RecyclerView; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.List; +import java.util.Locale; + +public class TranscriptionAdapter extends RecyclerView.Adapter { + + private List transcriptionItems; + + public TranscriptionAdapter(List transcriptionItems) { + this.transcriptionItems = transcriptionItems; + } + + @NonNull + @Override + public TranscriptionViewHolder onCreateViewHolder(@NonNull ViewGroup parent, int viewType) { + View view = LayoutInflater.from(parent.getContext()).inflate(R.layout.item_transcription, parent, false); + return new TranscriptionViewHolder(view); + } + + @Override + public void onBindViewHolder(@NonNull TranscriptionViewHolder holder, int position) { + TranscriptionItem item = transcriptionItems.get(position); + holder.transcriptionTextView.setText(item.getTranscription()); + holder.timestampTextView.setText(formatTimestamp(item.getTimestamp())); + } + + @Override + public int getItemCount() { + return transcriptionItems.size(); + } + + private String formatTimestamp(long timestamp) { + SimpleDateFormat sdf = new SimpleDateFormat("HH:mm:ss", Locale.getDefault()); + return sdf.format(new Date(timestamp)); + } + + static class TranscriptionViewHolder extends RecyclerView.ViewHolder { + + TextView transcriptionTextView; + TextView timestampTextView; + + public TranscriptionViewHolder(@NonNull View itemView) { + super(itemView); + transcriptionTextView = itemView.findViewById(R.id.transcriptionTextView); + timestampTextView = itemView.findViewById(R.id.timestampTextView); + } + } +} diff --git a/android/app/src/main/java/com/usefulsensors/moonshine/TranscriptionService.java b/android/app/src/main/java/com/usefulsensors/moonshine/TranscriptionService.java new file mode 100644 index 0000000..8a3848a --- /dev/null +++ b/android/app/src/main/java/com/usefulsensors/moonshine/TranscriptionService.java @@ -0,0 +1,66 @@ +package com.usefulsensors.moonshine; + +import android.app.Service; +import android.content.Intent; +import android.net.Uri; +import android.os.IBinder; +import android.util.Log; +import androidx.annotation.Nullable; + +import java.io.File; +import java.io.IOException; + +public class TranscriptionService extends Service { + + private static final String TAG = "TranscriptionService"; + + @Nullable + @Override + public IBinder onBind(Intent intent) { + return null; + } + + @Override + public int onStartCommand(Intent intent, int flags, int startId) { + Uri uri = intent.getData(); + String type = intent.getStringExtra("type"); + + if (uri != null && type != null) { + new Thread(() -> { + try { + if (type.equals("audio")) { + handleAudioTranscription(uri); + } else if (type.equals("video")) { + handleVideoTranscription(uri); + } + } catch (IOException e) { + Log.e(TAG, "Transcription failed", e); + } + }).start(); + } + + return START_STICKY; + } + + private void handleAudioTranscription(Uri uri) throws IOException { + File audioFile = AudioUtils.getFileFromUri(this, uri); + String transcription = AudioUtils.transcribeAudio(audioFile); + saveTranscription(transcription, "audio_transcription"); + } + + private void handleVideoTranscription(Uri uri) throws IOException { + File videoFile = VideoUtils.getFileFromUri(this, uri); + String transcription = VideoUtils.transcribeVideo(videoFile); + saveTranscription(transcription, "video_transcription"); + } + + private void saveTranscription(String transcription, String fileName) { + File file = new File(getExternalFilesDir(null), fileName + ".txt"); + try { + TranscriptionUtils.saveToFile(transcription, file); + Log.i(TAG, "Transcription saved: " + file.getAbsolutePath()); + } catch (IOException e) { + Log.e(TAG, "Failed to save transcription", e); + } + } +} diff --git a/android/app/src/main/java/com/usefulsensors/moonshine/TranscriptionUtils.java b/android/app/src/main/java/com/usefulsensors/moonshine/TranscriptionUtils.java new file mode 100644 index 0000000..3aa90c2 --- /dev/null +++ b/android/app/src/main/java/com/usefulsensors/moonshine/TranscriptionUtils.java @@ -0,0 +1,49 @@ +package com.usefulsensors.moonshine; + +import android.util.Log; + +import org.json.JSONObject; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; + +public class TranscriptionUtils { + + private static final String TAG = "TranscriptionUtils"; + + public static void saveToFile(String transcription, File file) throws IOException { + FileWriter writer = new FileWriter(file); + writer.write(transcription); + writer.close(); + } + + public static void saveToMd(String transcription, File file) throws IOException { + String mdContent = "# Transcription\n\n" + transcription; + saveToFile(mdContent, file); + } + + public static void saveToTxt(String transcription, File file) throws IOException { + saveToFile(transcription, file); + } + + public static void saveToJson(String transcription, File file) throws IOException { + JSONObject jsonObject = new JSONObject(); + jsonObject.put("transcription", transcription); + saveToFile(jsonObject.toString(), file); + } + + public static String convertToMd(String transcription) { + return "# Transcription\n\n" + transcription; + } + + public static String convertToTxt(String transcription) { + return transcription; + } + + public static String convertToJson(String transcription) { + JSONObject jsonObject = new JSONObject(); + jsonObject.put("transcription", transcription); + return jsonObject.toString(); + } +} diff --git a/android/app/src/main/java/com/usefulsensors/moonshine/VideoUtils.java b/android/app/src/main/java/com/usefulsensors/moonshine/VideoUtils.java new file mode 100644 index 0000000..5c8a0a1 --- /dev/null +++ b/android/app/src/main/java/com/usefulsensors/moonshine/VideoUtils.java @@ -0,0 +1,38 @@ +package com.usefulsensors.moonshine; + +import android.content.Context; +import android.net.Uri; +import android.util.Log; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; + +public class VideoUtils { + + private static final String TAG = "VideoUtils"; + + public static File getFileFromUri(Context context, Uri uri) throws IOException { + InputStream inputStream = context.getContentResolver().openInputStream(uri); + File tempFile = File.createTempFile("video", null, context.getCacheDir()); + FileOutputStream outputStream = new FileOutputStream(tempFile); + + byte[] buffer = new byte[1024]; + int length; + while ((length = inputStream.read(buffer)) > 0) { + outputStream.write(buffer, 0, length); + } + + outputStream.close(); + inputStream.close(); + + return tempFile; + } + + public static String transcribeVideo(File videoFile) { + // Placeholder for actual transcription logic + Log.i(TAG, "Transcribing video file: " + videoFile.getAbsolutePath()); + return "Transcription result"; + } +} diff --git a/android/app/src/main/res/layout/activity_main.xml b/android/app/src/main/res/layout/activity_main.xml new file mode 100644 index 0000000..3a34585 --- /dev/null +++ b/android/app/src/main/res/layout/activity_main.xml @@ -0,0 +1,29 @@ + + + + + +