This commit is contained in:
Kar
2025-06-17 15:53:01 +05:30
commit 4d20931ecc
411 changed files with 180695 additions and 0 deletions

View File

@@ -0,0 +1,49 @@
# whisper.objc
Minimal Obj-C application for automatic offline speech recognition.
The inference runs locally, on-device.
https://user-images.githubusercontent.com/1991296/197385372-962a6dea-bca1-4d50-bf96-1d8c27b98c81.mp4
Real-time transcription demo:
https://user-images.githubusercontent.com/1991296/204126266-ce4177c6-6eca-4bd9-bca8-0e46d9da2364.mp4
## Usage
```java
git clone https://github.com/ggerganov/whisper.cpp
open whisper.cpp/examples/whisper.objc/whisper.objc.xcodeproj/
// If you don't want to convert a Core ML model, you can skip this step by create dummy model
mkdir models/ggml-base.en-encoder.mlmodelc
```
Make sure to build the project in `Release`:
<img width="947" alt="image" src="https://user-images.githubusercontent.com/1991296/197382607-9e1e6d1b-79fa-496f-9d16-b71dc1535701.png">
Also, don't forget to add the `-DGGML_USE_ACCELERATE` compiler flag for `ggml.c` in Build Phases.
This can significantly improve the performance of the transcription:
<img width="1072" alt="image" src="https://user-images.githubusercontent.com/1991296/208511239-8d7cdbd1-aa48-41b5-becd-ca288d53cc07.png">
## Core ML
If you want to enable Core ML support, you can add the `-DWHISPER_USE_COREML -DWHISPER_COREML_ALLOW_FALLBACK` compiler flag for `whisper.cpp` in Build Phases:
<img width="1072" alt="image" src="https://github.com/ggerganov/whisper.cpp/assets/3001525/103e8f57-6eb6-490d-a60c-f6cf6c319324">
Then follow the [`Core ML support` section of readme](../../README.md#core-ml-support) for convert the model.
In this project, it also added `-O3 -DNDEBUG` to `Other C Flags`, but adding flags to app proj is not ideal in real world (applies to all C/C++ files), consider splitting xcodeproj in workspace in your own project.
## Metal
You can also enable Metal to make the inference run on the GPU of your device. This might or might not be more efficient
compared to Core ML depending on the model and device that you use.
To enable Metal, just add `-DGGML_USE_METAL` instead off the `-DWHISPER_USE_COREML` flag and you are ready.
This will make both the Encoder and the Decoder run on the GPU.
If you want to run the Encoder with Core ML and the Decoder with Metal then simply add both `-DWHISPER_USE_COREML -DGGML_USE_METAL` flags. That's all!

View File

@@ -0,0 +1,462 @@
// !$*UTF8*$!
{
archiveVersion = 1;
classes = {
};
objectVersion = 56;
objects = {
/* Begin PBXBuildFile section */
1844471A2AB211A2007D6BFE /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 184447182AB211A2007D6BFE /* ggml-alloc.c */; };
1844471C2AB21655007D6BFE /* ggml-metal.m in Sources */ = {isa = PBXBuildFile; fileRef = 1844471B2AB21655007D6BFE /* ggml-metal.m */; settings = {COMPILER_FLAGS = "-framework Foundation -framework Metal -framework MetalKit -fno-objc-arc"; }; };
184447212AB21B43007D6BFE /* ggml-metal.metal in CopyFiles */ = {isa = PBXBuildFile; fileRef = 1844471D2AB2195F007D6BFE /* ggml-metal.metal */; };
18627C7B29052BDF00BD2A04 /* AppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C7A29052BDF00BD2A04 /* AppDelegate.m */; };
18627C7E29052BDF00BD2A04 /* SceneDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C7D29052BDF00BD2A04 /* SceneDelegate.m */; };
18627C8129052BDF00BD2A04 /* ViewController.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C8029052BDF00BD2A04 /* ViewController.m */; };
18627C8429052BDF00BD2A04 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8229052BDF00BD2A04 /* Main.storyboard */; };
18627C8629052BE000BD2A04 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8529052BE000BD2A04 /* Assets.xcassets */; };
18627C8929052BE000BD2A04 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8729052BE000BD2A04 /* LaunchScreen.storyboard */; };
18627C8C29052BE000BD2A04 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C8B29052BE000BD2A04 /* main.m */; };
18627C9429052C4900BD2A04 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9329052C4900BD2A04 /* whisper.cpp */; settings = {COMPILER_FLAGS = "-DWHISPER_USE_COREML -DWHISPER_COREML_ALLOW_FALLBACK -DGGML_USE_METAL"; }; };
18627C9629052C5800BD2A04 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9529052C5800BD2A04 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL"; }; };
18627C9B29052CFF00BD2A04 /* ggml-base.en.bin in Resources */ = {isa = PBXBuildFile; fileRef = 18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */; };
18ABE15A2AF556340044A204 /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1572AF556340044A204 /* ggml-backend.c */; };
18ABE15B2AF556340044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1592AF556340044A204 /* ggml-quants.c */; };
7FE3424B2A0C3FA20015A058 /* whisper-encoder-impl.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE342452A0C3FA20015A058 /* whisper-encoder-impl.m */; };
7FE3424C2A0C3FA20015A058 /* whisper-encoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = 7FE342472A0C3FA20015A058 /* whisper-encoder.mm */; };
7FE3424D2A0C3FA20015A058 /* whisper-decoder-impl.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE3424A2A0C3FA20015A058 /* whisper-decoder-impl.m */; };
7FE3424F2A0C418A0015A058 /* ggml-base.en-encoder.mlmodelc in Resources */ = {isa = PBXBuildFile; fileRef = 7FE3424E2A0C418A0015A058 /* ggml-base.en-encoder.mlmodelc */; };
/* End PBXBuildFile section */
/* Begin PBXCopyFilesBuildPhase section */
184447202AB21B25007D6BFE /* CopyFiles */ = {
isa = PBXCopyFilesBuildPhase;
buildActionMask = 2147483647;
dstPath = "";
dstSubfolderSpec = 7;
files = (
184447212AB21B43007D6BFE /* ggml-metal.metal in CopyFiles */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXCopyFilesBuildPhase section */
/* Begin PBXFileReference section */
184447182AB211A2007D6BFE /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-alloc.c"; path = "../../../ggml-alloc.c"; sourceTree = "<group>"; };
184447192AB211A2007D6BFE /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-alloc.h"; path = "../../../ggml-alloc.h"; sourceTree = "<group>"; };
1844471B2AB21655007D6BFE /* ggml-metal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = "ggml-metal.m"; path = "../../../ggml-metal.m"; sourceTree = "<group>"; };
1844471D2AB2195F007D6BFE /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; name = "ggml-metal.metal"; path = "../../../ggml-metal.metal"; sourceTree = "<group>"; };
18627C7629052BDF00BD2A04 /* whisper.objc.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = whisper.objc.app; sourceTree = BUILT_PRODUCTS_DIR; };
18627C7929052BDF00BD2A04 /* AppDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
18627C7A29052BDF00BD2A04 /* AppDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = AppDelegate.m; sourceTree = "<group>"; };
18627C7C29052BDF00BD2A04 /* SceneDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = SceneDelegate.h; sourceTree = "<group>"; };
18627C7D29052BDF00BD2A04 /* SceneDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = SceneDelegate.m; sourceTree = "<group>"; };
18627C7F29052BDF00BD2A04 /* ViewController.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ViewController.h; sourceTree = "<group>"; };
18627C8029052BDF00BD2A04 /* ViewController.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = ViewController.m; sourceTree = "<group>"; };
18627C8329052BDF00BD2A04 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = "<group>"; };
18627C8529052BE000BD2A04 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
18627C8829052BE000BD2A04 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
18627C8A29052BE000BD2A04 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
18627C8B29052BE000BD2A04 /* main.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = main.m; sourceTree = "<group>"; };
18627C9229052C2B00BD2A04 /* whisper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = whisper.h; path = ../../../whisper.h; sourceTree = "<group>"; };
18627C9329052C4900BD2A04 /* whisper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = whisper.cpp; path = ../../../whisper.cpp; sourceTree = "<group>"; };
18627C9529052C5800BD2A04 /* ggml.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = ggml.c; path = ../../../ggml.c; sourceTree = "<group>"; };
18627C9729052C6600BD2A04 /* ggml.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ggml.h; path = ../../../ggml.h; sourceTree = "<group>"; };
18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */ = {isa = PBXFileReference; lastKnownFileType = archive.macbinary; name = "ggml-base.en.bin"; path = "../../../models/ggml-base.en.bin"; sourceTree = "<group>"; };
18ABE1542AF556340044A204 /* ggml-quants.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-quants.h"; path = "../../../ggml-quants.h"; sourceTree = "<group>"; };
18ABE1552AF556340044A204 /* ggml-backend.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-backend.h"; path = "../../../ggml-backend.h"; sourceTree = "<group>"; };
18ABE1562AF556340044A204 /* ggml-backend-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-backend-impl.h"; path = "../../../ggml-backend-impl.h"; sourceTree = "<group>"; };
18ABE1572AF556340044A204 /* ggml-backend.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-backend.c"; path = "../../../ggml-backend.c"; sourceTree = "<group>"; };
18ABE1582AF556340044A204 /* ggml-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-impl.h"; path = "../../../ggml-impl.h"; sourceTree = "<group>"; };
18ABE1592AF556340044A204 /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-quants.c"; path = "../../../ggml-quants.c"; sourceTree = "<group>"; };
7FE342452A0C3FA20015A058 /* whisper-encoder-impl.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "whisper-encoder-impl.m"; sourceTree = "<group>"; };
7FE342462A0C3FA20015A058 /* whisper-encoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "whisper-encoder.h"; sourceTree = "<group>"; };
7FE342472A0C3FA20015A058 /* whisper-encoder.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = "whisper-encoder.mm"; sourceTree = "<group>"; };
7FE342482A0C3FA20015A058 /* whisper-decoder-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "whisper-decoder-impl.h"; sourceTree = "<group>"; };
7FE342492A0C3FA20015A058 /* whisper-encoder-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "whisper-encoder-impl.h"; sourceTree = "<group>"; };
7FE3424A2A0C3FA20015A058 /* whisper-decoder-impl.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "whisper-decoder-impl.m"; sourceTree = "<group>"; };
7FE3424E2A0C418A0015A058 /* ggml-base.en-encoder.mlmodelc */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = "ggml-base.en-encoder.mlmodelc"; path = "../../../models/ggml-base.en-encoder.mlmodelc"; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
18627C7329052BDF00BD2A04 /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXFrameworksBuildPhase section */
/* Begin PBXGroup section */
18627C6D29052BDF00BD2A04 = {
isa = PBXGroup;
children = (
18627C7829052BDF00BD2A04 /* whisper.objc */,
18627C7729052BDF00BD2A04 /* Products */,
);
sourceTree = "<group>";
};
18627C7729052BDF00BD2A04 /* Products */ = {
isa = PBXGroup;
children = (
18627C7629052BDF00BD2A04 /* whisper.objc.app */,
);
name = Products;
sourceTree = "<group>";
};
18627C7829052BDF00BD2A04 /* whisper.objc */ = {
isa = PBXGroup;
children = (
18ABE1562AF556340044A204 /* ggml-backend-impl.h */,
18ABE1572AF556340044A204 /* ggml-backend.c */,
18ABE1552AF556340044A204 /* ggml-backend.h */,
18ABE1582AF556340044A204 /* ggml-impl.h */,
18ABE1592AF556340044A204 /* ggml-quants.c */,
18ABE1542AF556340044A204 /* ggml-quants.h */,
1844471D2AB2195F007D6BFE /* ggml-metal.metal */,
1844471B2AB21655007D6BFE /* ggml-metal.m */,
184447182AB211A2007D6BFE /* ggml-alloc.c */,
184447192AB211A2007D6BFE /* ggml-alloc.h */,
7FE3424E2A0C418A0015A058 /* ggml-base.en-encoder.mlmodelc */,
7FE342442A0C3FA20015A058 /* coreml */,
18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */,
18627C9729052C6600BD2A04 /* ggml.h */,
18627C9529052C5800BD2A04 /* ggml.c */,
18627C9329052C4900BD2A04 /* whisper.cpp */,
18627C9229052C2B00BD2A04 /* whisper.h */,
18627C7929052BDF00BD2A04 /* AppDelegate.h */,
18627C7A29052BDF00BD2A04 /* AppDelegate.m */,
18627C7C29052BDF00BD2A04 /* SceneDelegate.h */,
18627C7D29052BDF00BD2A04 /* SceneDelegate.m */,
18627C7F29052BDF00BD2A04 /* ViewController.h */,
18627C8029052BDF00BD2A04 /* ViewController.m */,
18627C8229052BDF00BD2A04 /* Main.storyboard */,
18627C8529052BE000BD2A04 /* Assets.xcassets */,
18627C8729052BE000BD2A04 /* LaunchScreen.storyboard */,
18627C8A29052BE000BD2A04 /* Info.plist */,
18627C8B29052BE000BD2A04 /* main.m */,
);
path = whisper.objc;
sourceTree = "<group>";
};
7FE342442A0C3FA20015A058 /* coreml */ = {
isa = PBXGroup;
children = (
7FE342452A0C3FA20015A058 /* whisper-encoder-impl.m */,
7FE342462A0C3FA20015A058 /* whisper-encoder.h */,
7FE342472A0C3FA20015A058 /* whisper-encoder.mm */,
7FE342482A0C3FA20015A058 /* whisper-decoder-impl.h */,
7FE342492A0C3FA20015A058 /* whisper-encoder-impl.h */,
7FE3424A2A0C3FA20015A058 /* whisper-decoder-impl.m */,
);
name = coreml;
path = ../../../coreml;
sourceTree = "<group>";
};
/* End PBXGroup section */
/* Begin PBXNativeTarget section */
18627C7529052BDF00BD2A04 /* whisper.objc */ = {
isa = PBXNativeTarget;
buildConfigurationList = 18627C8F29052BE000BD2A04 /* Build configuration list for PBXNativeTarget "whisper.objc" */;
buildPhases = (
18627C7229052BDF00BD2A04 /* Sources */,
18627C7329052BDF00BD2A04 /* Frameworks */,
18627C7429052BDF00BD2A04 /* Resources */,
184447202AB21B25007D6BFE /* CopyFiles */,
);
buildRules = (
);
dependencies = (
);
name = whisper.objc;
productName = whisper.objc;
productReference = 18627C7629052BDF00BD2A04 /* whisper.objc.app */;
productType = "com.apple.product-type.application";
};
/* End PBXNativeTarget section */
/* Begin PBXProject section */
18627C6E29052BDF00BD2A04 /* Project object */ = {
isa = PBXProject;
attributes = {
BuildIndependentTargetsInParallel = 1;
LastUpgradeCheck = 1400;
TargetAttributes = {
18627C7529052BDF00BD2A04 = {
CreatedOnToolsVersion = 14.0.1;
};
};
};
buildConfigurationList = 18627C7129052BDF00BD2A04 /* Build configuration list for PBXProject "whisper.objc" */;
compatibilityVersion = "Xcode 14.0";
developmentRegion = en;
hasScannedForEncodings = 0;
knownRegions = (
en,
Base,
);
mainGroup = 18627C6D29052BDF00BD2A04;
productRefGroup = 18627C7729052BDF00BD2A04 /* Products */;
projectDirPath = "";
projectRoot = "";
targets = (
18627C7529052BDF00BD2A04 /* whisper.objc */,
);
};
/* End PBXProject section */
/* Begin PBXResourcesBuildPhase section */
18627C7429052BDF00BD2A04 /* Resources */ = {
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
18627C8929052BE000BD2A04 /* LaunchScreen.storyboard in Resources */,
7FE3424F2A0C418A0015A058 /* ggml-base.en-encoder.mlmodelc in Resources */,
18627C8629052BE000BD2A04 /* Assets.xcassets in Resources */,
18627C8429052BDF00BD2A04 /* Main.storyboard in Resources */,
18627C9B29052CFF00BD2A04 /* ggml-base.en.bin in Resources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXResourcesBuildPhase section */
/* Begin PBXSourcesBuildPhase section */
18627C7229052BDF00BD2A04 /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
18627C8129052BDF00BD2A04 /* ViewController.m in Sources */,
18ABE15B2AF556340044A204 /* ggml-quants.c in Sources */,
7FE3424C2A0C3FA20015A058 /* whisper-encoder.mm in Sources */,
18627C9429052C4900BD2A04 /* whisper.cpp in Sources */,
18627C9629052C5800BD2A04 /* ggml.c in Sources */,
18627C7B29052BDF00BD2A04 /* AppDelegate.m in Sources */,
7FE3424D2A0C3FA20015A058 /* whisper-decoder-impl.m in Sources */,
1844471A2AB211A2007D6BFE /* ggml-alloc.c in Sources */,
18ABE15A2AF556340044A204 /* ggml-backend.c in Sources */,
18627C8C29052BE000BD2A04 /* main.m in Sources */,
18627C7E29052BDF00BD2A04 /* SceneDelegate.m in Sources */,
1844471C2AB21655007D6BFE /* ggml-metal.m in Sources */,
7FE3424B2A0C3FA20015A058 /* whisper-encoder-impl.m in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXSourcesBuildPhase section */
/* Begin PBXVariantGroup section */
18627C8229052BDF00BD2A04 /* Main.storyboard */ = {
isa = PBXVariantGroup;
children = (
18627C8329052BDF00BD2A04 /* Base */,
);
name = Main.storyboard;
sourceTree = "<group>";
};
18627C8729052BE000BD2A04 /* LaunchScreen.storyboard */ = {
isa = PBXVariantGroup;
children = (
18627C8829052BE000BD2A04 /* Base */,
);
name = LaunchScreen.storyboard;
sourceTree = "<group>";
};
/* End PBXVariantGroup section */
/* Begin XCBuildConfiguration section */
18627C8D29052BE000BD2A04 /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = dwarf;
ENABLE_STRICT_OBJC_MSGSEND = YES;
ENABLE_TESTABILITY = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_DYNAMIC_NO_PIC = NO;
GCC_NO_COMMON_BLOCKS = YES;
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"DEBUG=1",
"$(inherited)",
);
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
IPHONEOS_DEPLOYMENT_TARGET = 16.0;
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
MTL_FAST_MATH = YES;
ONLY_ACTIVE_ARCH = YES;
SDKROOT = iphoneos;
};
name = Debug;
};
18627C8E29052BE000BD2A04 /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
ENABLE_NS_ASSERTIONS = NO;
ENABLE_STRICT_OBJC_MSGSEND = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_NO_COMMON_BLOCKS = YES;
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
IPHONEOS_DEPLOYMENT_TARGET = 16.0;
MTL_ENABLE_DEBUG_INFO = NO;
MTL_FAST_MATH = YES;
OTHER_CFLAGS = (
"-O3",
"-DNDEBUG",
);
SDKROOT = iphoneos;
VALIDATE_PRODUCT = YES;
};
name = Release;
};
18627C9029052BE000BD2A04 /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CODE_SIGN_STYLE = Automatic;
CURRENT_PROJECT_VERSION = 1;
DEVELOPMENT_TEAM = P8JZH34X63;
GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
GENERATE_INFOPLIST_FILE = YES;
INFOPLIST_FILE = whisper.objc/Info.plist;
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
INFOPLIST_KEY_UIMainStoryboardFile = Main;
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
);
MARKETING_VERSION = 1.0;
PRODUCT_BUNDLE_IDENTIFIER = "com.ggerganov.whisper-objc";
PRODUCT_NAME = "$(TARGET_NAME)";
SWIFT_EMIT_LOC_STRINGS = YES;
TARGETED_DEVICE_FAMILY = "1,2";
};
name = Debug;
};
18627C9129052BE000BD2A04 /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CODE_SIGN_STYLE = Automatic;
CURRENT_PROJECT_VERSION = 1;
DEVELOPMENT_TEAM = P8JZH34X63;
GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
GENERATE_INFOPLIST_FILE = YES;
INFOPLIST_FILE = whisper.objc/Info.plist;
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
INFOPLIST_KEY_UIMainStoryboardFile = Main;
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
);
MARKETING_VERSION = 1.0;
PRODUCT_BUNDLE_IDENTIFIER = "com.ggerganov.whisper-objc";
PRODUCT_NAME = "$(TARGET_NAME)";
SWIFT_EMIT_LOC_STRINGS = YES;
TARGETED_DEVICE_FAMILY = "1,2";
};
name = Release;
};
/* End XCBuildConfiguration section */
/* Begin XCConfigurationList section */
18627C7129052BDF00BD2A04 /* Build configuration list for PBXProject "whisper.objc" */ = {
isa = XCConfigurationList;
buildConfigurations = (
18627C8D29052BE000BD2A04 /* Debug */,
18627C8E29052BE000BD2A04 /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
18627C8F29052BE000BD2A04 /* Build configuration list for PBXNativeTarget "whisper.objc" */ = {
isa = XCConfigurationList;
buildConfigurations = (
18627C9029052BE000BD2A04 /* Debug */,
18627C9129052BE000BD2A04 /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
};
rootObject = 18627C6E29052BDF00BD2A04 /* Project object */;
}

View File

@@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
version = "1.0">
<FileRef
location = "self:">
</FileRef>
</Workspace>

View File

@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>IDEDidComputeMac32BitWarning</key>
<true/>
</dict>
</plist>

View File

@@ -0,0 +1,14 @@
//
// AppDelegate.h
// whisper.objc
//
// Created by Georgi Gerganov on 23.10.22.
//
#import <UIKit/UIKit.h>
@interface AppDelegate : UIResponder <UIApplicationDelegate>
@end

View File

@@ -0,0 +1,40 @@
//
// AppDelegate.m
// whisper.objc
//
// Created by Georgi Gerganov on 23.10.22.
//
#import "AppDelegate.h"
@interface AppDelegate ()
@end
@implementation AppDelegate
- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {
// Override point for customization after application launch.
return YES;
}
#pragma mark - UISceneSession lifecycle
- (UISceneConfiguration *)application:(UIApplication *)application configurationForConnectingSceneSession:(UISceneSession *)connectingSceneSession options:(UISceneConnectionOptions *)options {
// Called when a new scene session is being created.
// Use this method to select a configuration to create the new scene with.
return [[UISceneConfiguration alloc] initWithName:@"Default Configuration" sessionRole:connectingSceneSession.role];
}
- (void)application:(UIApplication *)application didDiscardSceneSessions:(NSSet<UISceneSession *> *)sceneSessions {
// Called when the user discards a scene session.
// If any sessions were discarded while the application was not running, this will be called shortly after application:didFinishLaunchingWithOptions.
// Use this method to release any resources that were specific to the discarded scenes, as they will not return.
}
@end

View File

@@ -0,0 +1,11 @@
{
"colors" : [
{
"idiom" : "universal"
}
],
"info" : {
"author" : "xcode",
"version" : 1
}
}

View File

@@ -0,0 +1,13 @@
{
"images" : [
{
"idiom" : "universal",
"platform" : "ios",
"size" : "1024x1024"
}
],
"info" : {
"author" : "xcode",
"version" : 1
}
}

View File

@@ -0,0 +1,6 @@
{
"info" : {
"author" : "xcode",
"version" : 1
}
}

View File

@@ -0,0 +1,25 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="13122.16" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" launchScreen="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="01J-lp-oVM">
<dependencies>
<plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="13104.12"/>
<capability name="Safe area layout guides" minToolsVersion="9.0"/>
<capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
</dependencies>
<scenes>
<!--View Controller-->
<scene sceneID="EHf-IW-A2E">
<objects>
<viewController id="01J-lp-oVM" sceneMemberID="viewController">
<view key="view" contentMode="scaleToFill" id="Ze5-6b-2t3">
<rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
<autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
<color key="backgroundColor" xcode11CocoaTouchSystemColor="systemBackgroundColor" cocoaTouchSystemColor="whiteColor"/>
<viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
</view>
</viewController>
<placeholder placeholderIdentifier="IBFirstResponder" id="iYj-Kq-Ea1" userLabel="First Responder" sceneMemberID="firstResponder"/>
</objects>
<point key="canvasLocation" x="53" y="375"/>
</scene>
</scenes>
</document>

View File

@@ -0,0 +1,102 @@
<?xml version="1.0" encoding="UTF-8"?>
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="21507" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
<device id="retina6_0" orientation="portrait" appearance="light"/>
<dependencies>
<plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="21505"/>
<capability name="Safe area layout guides" minToolsVersion="9.0"/>
<capability name="System colors in document resources" minToolsVersion="11.0"/>
<capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
</dependencies>
<scenes>
<!--View Controller-->
<scene sceneID="tne-QT-ifu">
<objects>
<viewController id="BYZ-38-t0r" customClass="ViewController" sceneMemberID="viewController">
<view key="view" contentMode="scaleToFill" id="8bC-Xf-vdC">
<rect key="frame" x="0.0" y="0.0" width="390" height="844"/>
<autoresizingMask key="autoresizingMask" flexibleMinX="YES" widthSizable="YES" flexibleMinY="YES" heightSizable="YES"/>
<subviews>
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" lineBreakMode="middleTruncation" id="VOi-PT-Rbu">
<rect key="frame" x="35" y="121" width="156" height="49"/>
<autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
<color key="backgroundColor" systemColor="opaqueSeparatorColor"/>
<color key="tintColor" systemColor="opaqueSeparatorColor"/>
<state key="normal" title="Start Capturing">
<color key="titleColor" systemColor="labelColor"/>
</state>
<connections>
<action selector="toggleCapture:" destination="BYZ-38-t0r" eventType="touchUpInside" id="BuO-Wf-RgV"/>
</connections>
</button>
<label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" fixedFrame="YES" text="Status: Idle" textAlignment="natural" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="Tgu-2q-eHQ">
<rect key="frame" x="35" y="78" width="232" height="21"/>
<autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
<fontDescription key="fontDescription" type="system" pointSize="17"/>
<nil key="textColor"/>
<nil key="highlightedColor"/>
</label>
<textView clipsSubviews="YES" multipleTouchEnabled="YES" contentMode="scaleToFill" fixedFrame="YES" text="Record some speech and press &quot;Transcribe&quot;. The result will be displayed here." textAlignment="natural" translatesAutoresizingMaskIntoConstraints="NO" id="mv2-KD-7jn">
<rect key="frame" x="35" y="248" width="320" height="300"/>
<autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
<color key="backgroundColor" systemColor="systemBackgroundColor"/>
<color key="textColor" systemColor="labelColor"/>
<fontDescription key="fontDescription" name="Georgia" family="Georgia" pointSize="16"/>
<textInputTraits key="textInputTraits" autocapitalizationType="sentences"/>
</textView>
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" lineBreakMode="middleTruncation" id="Brs-xi-o8i">
<rect key="frame" x="35" y="191" width="156" height="49"/>
<autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
<color key="backgroundColor" systemColor="opaqueSeparatorColor"/>
<color key="tintColor" systemColor="opaqueSeparatorColor"/>
<state key="normal" title="Transcribe">
<color key="titleColor" systemColor="labelColor"/>
</state>
<connections>
<action selector="onTranscribe:" destination="BYZ-38-t0r" eventType="touchUpInside" id="ond-bx-48O"/>
<action selector="onTranscribePrepare:" destination="BYZ-38-t0r" eventType="touchDown" id="16T-dN-dfB"/>
</connections>
</button>
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" lineBreakMode="middleTruncation" id="AaW-T2-Ndw">
<rect key="frame" x="199" y="191" width="156" height="49"/>
<autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
<color key="backgroundColor" systemColor="opaqueSeparatorColor"/>
<color key="tintColor" systemColor="opaqueSeparatorColor"/>
<state key="normal" title="Real-time">
<color key="titleColor" systemColor="labelColor"/>
</state>
<connections>
<action selector="onRealtime:" destination="BYZ-38-t0r" eventType="touchUpInside" id="nhn-jT-aQJ"/>
</connections>
</button>
</subviews>
<viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
<color key="backgroundColor" systemColor="systemBackgroundColor"/>
<constraints>
<constraint firstItem="Brs-xi-o8i" firstAttribute="trailing" secondItem="VOi-PT-Rbu" secondAttribute="trailing" id="8mF-AW-cbc"/>
</constraints>
</view>
<connections>
<outlet property="buttonRealtime" destination="AaW-T2-Ndw" id="gcU-Ol-BOo"/>
<outlet property="buttonToggleCapture" destination="VOi-PT-Rbu" id="nis-VC-DQO"/>
<outlet property="buttonTranscribe" destination="Brs-xi-o8i" id="N8h-9W-ywb"/>
<outlet property="labelStatusInp" destination="Tgu-2q-eHQ" id="1hH-Ql-K6j"/>
<outlet property="textviewResult" destination="mv2-KD-7jn" id="RBw-0L-iGj"/>
</connections>
</viewController>
<placeholder placeholderIdentifier="IBFirstResponder" id="dkx-z0-nzr" sceneMemberID="firstResponder"/>
</objects>
<point key="canvasLocation" x="30.769230769230766" y="-28.436018957345969"/>
</scene>
</scenes>
<resources>
<systemColor name="labelColor">
<color red="0.0" green="0.0" blue="0.0" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
</systemColor>
<systemColor name="opaqueSeparatorColor">
<color red="0.77647058823529413" green="0.77647058823529413" blue="0.78431372549019607" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
</systemColor>
<systemColor name="systemBackgroundColor">
<color white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
</systemColor>
</resources>
</document>

View File

@@ -0,0 +1,27 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>NSMicrophoneUsageDescription</key>
<string>This app requires microphone access in order to transcribe speech</string>
<key>UIApplicationSceneManifest</key>
<dict>
<key>UIApplicationSupportsMultipleScenes</key>
<false/>
<key>UISceneConfigurations</key>
<dict>
<key>UIWindowSceneSessionRoleApplication</key>
<array>
<dict>
<key>UISceneConfigurationName</key>
<string>Default Configuration</string>
<key>UISceneDelegateClassName</key>
<string>SceneDelegate</string>
<key>UISceneStoryboardFile</key>
<string>Main</string>
</dict>
</array>
</dict>
</dict>
</dict>
</plist>

View File

@@ -0,0 +1,15 @@
//
// SceneDelegate.h
// whisper.objc
//
// Created by Georgi Gerganov on 23.10.22.
//
#import <UIKit/UIKit.h>
@interface SceneDelegate : UIResponder <UIWindowSceneDelegate>
@property (strong, nonatomic) UIWindow * window;
@end

View File

@@ -0,0 +1,57 @@
//
// SceneDelegate.m
// whisper.objc
//
// Created by Georgi Gerganov on 23.10.22.
//
#import "SceneDelegate.h"
@interface SceneDelegate ()
@end
@implementation SceneDelegate
- (void)scene:(UIScene *)scene willConnectToSession:(UISceneSession *)session options:(UISceneConnectionOptions *)connectionOptions {
// Use this method to optionally configure and attach the UIWindow `window` to the provided UIWindowScene `scene`.
// If using a storyboard, the `window` property will automatically be initialized and attached to the scene.
// This delegate does not imply the connecting scene or session are new (see `application:configurationForConnectingSceneSession` instead).
}
- (void)sceneDidDisconnect:(UIScene *)scene {
// Called as the scene is being released by the system.
// This occurs shortly after the scene enters the background, or when its session is discarded.
// Release any resources associated with this scene that can be re-created the next time the scene connects.
// The scene may re-connect later, as its session was not necessarily discarded (see `application:didDiscardSceneSessions` instead).
}
- (void)sceneDidBecomeActive:(UIScene *)scene {
// Called when the scene has moved from an inactive state to an active state.
// Use this method to restart any tasks that were paused (or not yet started) when the scene was inactive.
}
- (void)sceneWillResignActive:(UIScene *)scene {
// Called when the scene will move from an active state to an inactive state.
// This may occur due to temporary interruptions (ex. an incoming phone call).
}
- (void)sceneWillEnterForeground:(UIScene *)scene {
// Called as the scene transitions from the background to the foreground.
// Use this method to undo the changes made on entering the background.
}
- (void)sceneDidEnterBackground:(UIScene *)scene {
// Called as the scene transitions from the foreground to the background.
// Use this method to save data, release shared resources, and store enough scene-specific state information
// to restore the scene back to its current state.
}
@end

View File

@@ -0,0 +1,45 @@
//
// ViewController.h
// whisper.objc
//
// Created by Georgi Gerganov on 23.10.22.
//
#import <UIKit/UIKit.h>
#import <AVFoundation/AVFoundation.h>
#import <AudioToolbox/AudioQueue.h>
#define NUM_BUFFERS 3
#define MAX_AUDIO_SEC 30
#define SAMPLE_RATE 16000
struct whisper_context;
typedef struct
{
int ggwaveId;
bool isCapturing;
bool isTranscribing;
bool isRealtime;
UILabel * labelReceived;
AudioQueueRef queue;
AudioStreamBasicDescription dataFormat;
AudioQueueBufferRef buffers[NUM_BUFFERS];
int n_samples;
int16_t * audioBufferI16;
float * audioBufferF32;
struct whisper_context * ctx;
void * vc;
} StateInp;
@interface ViewController : UIViewController
{
StateInp stateInp;
}
@end

View File

@@ -0,0 +1,304 @@
//
// ViewController.m
// whisper.objc
//
// Created by Georgi Gerganov on 23.10.22.
//
#import "ViewController.h"
#import "whisper.h"
#define NUM_BYTES_PER_BUFFER 16*1024
// callback used to process captured audio
void AudioInputCallback(void * inUserData,
AudioQueueRef inAQ,
AudioQueueBufferRef inBuffer,
const AudioTimeStamp * inStartTime,
UInt32 inNumberPacketDescriptions,
const AudioStreamPacketDescription * inPacketDescs);
@interface ViewController ()
@property (weak, nonatomic) IBOutlet UILabel *labelStatusInp;
@property (weak, nonatomic) IBOutlet UIButton *buttonToggleCapture;
@property (weak, nonatomic) IBOutlet UIButton *buttonTranscribe;
@property (weak, nonatomic) IBOutlet UIButton *buttonRealtime;
@property (weak, nonatomic) IBOutlet UITextView *textviewResult;
@end
@implementation ViewController
- (void)setupAudioFormat:(AudioStreamBasicDescription*)format
{
format->mSampleRate = WHISPER_SAMPLE_RATE;
format->mFormatID = kAudioFormatLinearPCM;
format->mFramesPerPacket = 1;
format->mChannelsPerFrame = 1;
format->mBytesPerFrame = 2;
format->mBytesPerPacket = 2;
format->mBitsPerChannel = 16;
format->mReserved = 0;
format->mFormatFlags = kLinearPCMFormatFlagIsSignedInteger;
}
- (void)viewDidLoad {
[super viewDidLoad];
// whisper.cpp initialization
{
// load the model
NSString *modelPath = [[NSBundle mainBundle] pathForResource:@"ggml-base.en" ofType:@"bin"];
// check if the model exists
if (![[NSFileManager defaultManager] fileExistsAtPath:modelPath]) {
NSLog(@"Model file not found");
return;
}
NSLog(@"Loading model from %@", modelPath);
// create ggml context
struct whisper_context_params cparams = whisper_context_default_params();
#if TARGET_OS_SIMULATOR
cparams.use_gpu = false;
NSLog(@"Running on simulator, using CPU");
#endif
stateInp.ctx = whisper_init_from_file_with_params([modelPath UTF8String], cparams);
// check if the model was loaded successfully
if (stateInp.ctx == NULL) {
NSLog(@"Failed to load model");
return;
}
}
// initialize audio format and buffers
{
[self setupAudioFormat:&stateInp.dataFormat];
stateInp.n_samples = 0;
stateInp.audioBufferI16 = malloc(MAX_AUDIO_SEC*SAMPLE_RATE*sizeof(int16_t));
stateInp.audioBufferF32 = malloc(MAX_AUDIO_SEC*SAMPLE_RATE*sizeof(float));
}
stateInp.isTranscribing = false;
stateInp.isRealtime = false;
}
-(IBAction) stopCapturing {
NSLog(@"Stop capturing");
_labelStatusInp.text = @"Status: Idle";
[_buttonToggleCapture setTitle:@"Start capturing" forState:UIControlStateNormal];
[_buttonToggleCapture setBackgroundColor:[UIColor grayColor]];
stateInp.isCapturing = false;
AudioQueueStop(stateInp.queue, true);
for (int i = 0; i < NUM_BUFFERS; i++) {
AudioQueueFreeBuffer(stateInp.queue, stateInp.buffers[i]);
}
AudioQueueDispose(stateInp.queue, true);
}
- (IBAction)toggleCapture:(id)sender {
if (stateInp.isCapturing) {
// stop capturing
[self stopCapturing];
return;
}
// initiate audio capturing
NSLog(@"Start capturing");
stateInp.n_samples = 0;
stateInp.vc = (__bridge void *)(self);
OSStatus status = AudioQueueNewInput(&stateInp.dataFormat,
AudioInputCallback,
&stateInp,
CFRunLoopGetCurrent(),
kCFRunLoopCommonModes,
0,
&stateInp.queue);
if (status == 0) {
for (int i = 0; i < NUM_BUFFERS; i++) {
AudioQueueAllocateBuffer(stateInp.queue, NUM_BYTES_PER_BUFFER, &stateInp.buffers[i]);
AudioQueueEnqueueBuffer (stateInp.queue, stateInp.buffers[i], 0, NULL);
}
stateInp.isCapturing = true;
status = AudioQueueStart(stateInp.queue, NULL);
if (status == 0) {
_labelStatusInp.text = @"Status: Capturing";
[sender setTitle:@"Stop Capturing" forState:UIControlStateNormal];
[_buttonToggleCapture setBackgroundColor:[UIColor redColor]];
}
}
if (status != 0) {
[self stopCapturing];
}
}
- (IBAction)onTranscribePrepare:(id)sender {
_textviewResult.text = @"Processing - please wait ...";
if (stateInp.isRealtime) {
[self onRealtime:(id)sender];
}
if (stateInp.isCapturing) {
[self stopCapturing];
}
}
- (IBAction)onRealtime:(id)sender {
stateInp.isRealtime = !stateInp.isRealtime;
if (stateInp.isRealtime) {
[_buttonRealtime setBackgroundColor:[UIColor greenColor]];
} else {
[_buttonRealtime setBackgroundColor:[UIColor grayColor]];
}
NSLog(@"Realtime: %@", stateInp.isRealtime ? @"ON" : @"OFF");
}
- (IBAction)onTranscribe:(id)sender {
if (stateInp.isTranscribing) {
return;
}
NSLog(@"Processing %d samples", stateInp.n_samples);
stateInp.isTranscribing = true;
// dispatch the model to a background thread
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
// process captured audio
// convert I16 to F32
for (int i = 0; i < self->stateInp.n_samples; i++) {
self->stateInp.audioBufferF32[i] = (float)self->stateInp.audioBufferI16[i] / 32768.0f;
}
// run the model
struct whisper_full_params params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
// get maximum number of threads on this device (max 8)
const int max_threads = MIN(8, (int)[[NSProcessInfo processInfo] processorCount]);
params.print_realtime = true;
params.print_progress = false;
params.print_timestamps = true;
params.print_special = false;
params.translate = false;
params.language = "en";
params.n_threads = max_threads;
params.offset_ms = 0;
params.no_context = true;
params.single_segment = self->stateInp.isRealtime;
params.no_timestamps = params.single_segment;
CFTimeInterval startTime = CACurrentMediaTime();
whisper_reset_timings(self->stateInp.ctx);
if (whisper_full(self->stateInp.ctx, params, self->stateInp.audioBufferF32, self->stateInp.n_samples) != 0) {
NSLog(@"Failed to run the model");
self->_textviewResult.text = @"Failed to run the model";
return;
}
whisper_print_timings(self->stateInp.ctx);
CFTimeInterval endTime = CACurrentMediaTime();
NSLog(@"\nProcessing time: %5.3f, on %d threads", endTime - startTime, params.n_threads);
// result text
NSString *result = @"";
int n_segments = whisper_full_n_segments(self->stateInp.ctx);
for (int i = 0; i < n_segments; i++) {
const char * text_cur = whisper_full_get_segment_text(self->stateInp.ctx, i);
// append the text to the result
result = [result stringByAppendingString:[NSString stringWithUTF8String:text_cur]];
}
const float tRecording = (float)self->stateInp.n_samples / (float)self->stateInp.dataFormat.mSampleRate;
// append processing time
result = [result stringByAppendingString:[NSString stringWithFormat:@"\n\n[recording time: %5.3f s]", tRecording]];
result = [result stringByAppendingString:[NSString stringWithFormat:@" \n[processing time: %5.3f s]", endTime - startTime]];
// dispatch the result to the main thread
dispatch_async(dispatch_get_main_queue(), ^{
self->_textviewResult.text = result;
self->stateInp.isTranscribing = false;
});
});
}
//
// Callback implementation
//
void AudioInputCallback(void * inUserData,
AudioQueueRef inAQ,
AudioQueueBufferRef inBuffer,
const AudioTimeStamp * inStartTime,
UInt32 inNumberPacketDescriptions,
const AudioStreamPacketDescription * inPacketDescs)
{
StateInp * stateInp = (StateInp*)inUserData;
if (!stateInp->isCapturing) {
NSLog(@"Not capturing, ignoring audio");
return;
}
const int n = inBuffer->mAudioDataByteSize / 2;
NSLog(@"Captured %d new samples", n);
if (stateInp->n_samples + n > MAX_AUDIO_SEC*SAMPLE_RATE) {
NSLog(@"Too much audio data, ignoring");
dispatch_async(dispatch_get_main_queue(), ^{
ViewController * vc = (__bridge ViewController *)(stateInp->vc);
[vc stopCapturing];
});
return;
}
for (int i = 0; i < n; i++) {
stateInp->audioBufferI16[stateInp->n_samples + i] = ((short*)inBuffer->mAudioData)[i];
}
stateInp->n_samples += n;
// put the buffer back in the queue
AudioQueueEnqueueBuffer(stateInp->queue, inBuffer, 0, NULL);
if (stateInp->isRealtime) {
// dipatch onTranscribe() to the main thread
dispatch_async(dispatch_get_main_queue(), ^{
ViewController * vc = (__bridge ViewController *)(stateInp->vc);
[vc onTranscribe:nil];
});
}
}
@end

View File

@@ -0,0 +1,18 @@
//
// main.m
// whisper.objc
//
// Created by Georgi Gerganov on 23.10.22.
//
#import <UIKit/UIKit.h>
#import "AppDelegate.h"
int main(int argc, char * argv[]) {
NSString * appDelegateClassName;
@autoreleasepool {
// Setup code that might create autoreleased objects goes here.
appDelegateClassName = NSStringFromClass([AppDelegate class]);
}
return UIApplicationMain(argc, argv, nil, appDelegateClassName);
}