summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormatthewsotoudeh <matthewsot@outlook.com>2016-09-05 19:06:29 -0700
committermatthewsotoudeh <matthewsot@outlook.com>2016-09-05 19:06:29 -0700
commit00c79b6c8f5834f5cee51140b26aa3c3da64b832 (patch)
tree21896ccafa2ea49fc466088f6cecebd406dbdc52
parent256a96c01b193274cb6c2076f757d9de6fd00731 (diff)
updated the READMEHEADmaster
-rw-r--r--README.md60
-rw-r--r--RollingWavReader/RollingWavReader/RollingWavReader.cs6
-rw-r--r--RollingWavReader/RollingWavReader/RollingWavReader.csproj8
3 files changed, 65 insertions, 9 deletions
diff --git a/README.md b/README.md
index 1f9ce83..915f9ef 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,64 @@
# RollingWavReader
-A C#/.NET WAV reader that supports reading from a stream that's being updated in real-time.
+A C#/.NET WAV reader that supports reading from a stream while it is being updated.
+
+RollingWavReader was particularly designed to be used to read samples and extract audio features from smartphone microphone input in near-real-time while using the UWP MediaCapture API to capture audio for speech and speaker recognition systems.
# Usage
Once you've created a ``RollingWavReader`` instance, simply call the ``Update()`` function at regular intervals and ``FinalizeData()`` once the stream has been closed.
+In an (edited for clarity) UWP application this would look something like:
+
+```
+protected async override void OnNavigatedTo(NavigationEventArgs e)
+{
+ stream = new InMemoryRandomAccessStream();
+ await mediaCapture.StartRecordToStreamAsync(MediaEncodingProfile.CreateWav(AudioEncodingQuality.Auto), stream);
+
+ reader = new RollingWavReader(stream);
+
+ readTimer.Tick += ReadTimer_Tick;
+ readTimer.Start();
+}
+
+private async void ReadTimer_Tick(object sender, object e)
+{
+ await reader.Update();
+}
+
+private async void end_Click(object sender, RoutedEventArgs e)
+{
+ await mediaCapture.StopRecordAsync();
+
+ readTimer.Stop();
+
+ await reader.FinalizeData(); //Samples are in reader.Samples
+
+ mediaCapture.Dispose();
+}
+```
+
+# Real-time feature extraction
+RollingWavReader also supports extracting audio features in real-time. Assuming the previous example, you could incorporate a feature extraction ``Func`` that takes a ``double[]`` of samples and returns a ``double[]`` feature vector like so:
+
+```
+protected async override void OnNavigatedTo(NavigationEventArgs e)
+{
+ ...
+
+ extractionTimer.Tick += ExtractionTimer_Tick;
+ extractionTimer.Start();
+}
+
+private async void ExtractionTimer_Tick(object sender, object e)
+{
+ //20 ms window and a 10 ms window offset
+ await reader.FilterAndExtractRollingSamples(20, 10, featureExtractor);
+}
+
+private async void end_Click(object sender, RoutedEventArgs e)
+{
+ ...
+
+ var features = reader.FinishMFCCSamples(20, 10, featureExtractor);
+}
+``` \ No newline at end of file
diff --git a/RollingWavReader/RollingWavReader/RollingWavReader.cs b/RollingWavReader/RollingWavReader/RollingWavReader.cs
index cb5910f..7620a49 100644
--- a/RollingWavReader/RollingWavReader/RollingWavReader.cs
+++ b/RollingWavReader/RollingWavReader/RollingWavReader.cs
@@ -248,13 +248,11 @@ namespace VoicePrint
/// <param name="windowWidthMs">The width of a single window (in milliseconds)</param>
/// <param name="windowOffsetMs">The offset between windows (in milliseconds)</param>
/// <param name="featureExtractor">The feature extraction function</param>
- /// <param name="finishing">A value indicating whether this is the final feature extraction or not</param>
/// <param name="thresholdAmplitude">The threshold amplitude used to filter out noise</param>
/// <param name="thresholdOfSample">The proportion of samples in a single window that must be below thresholdAmplitude to cause the window to be discarded</param>
/// <param name="channel">The channel of audio to be sent to the feature extraction function</param>
public void FilterAndExtractRollingSamples(int windowWidthMs, int windowOffsetMs,
- Func<double[], double[]> featureExtractor,
- bool finishing = false, int thresholdAmplitude = 250, double thresholdOfSample = 0.3,
+ Func<double[], double[]> featureExtractor, int thresholdAmplitude = 250, double thresholdOfSample = 0.3,
int channel = 0)
{
var windowWidth = windowWidthMs * (SampleRate / 1000); //(in samples)
@@ -314,7 +312,7 @@ namespace VoicePrint
public async Task<double[][]> FinishMFCCSamples(int windowWidthMs, int windowOffsetMs,
Func<double[], double[]> featureExtractor, int checkingDelayMs = 250)
{
- FilterAndExtractRollingSamples(windowWidthMs, windowOffsetMs, featureExtractor, true);
+ FilterAndExtractRollingSamples(windowWidthMs, windowOffsetMs, featureExtractor);
while (AreExtractionsOngoing.Any(b => b))
{
await Task.Delay(TimeSpan.FromMilliseconds(checkingDelayMs));
diff --git a/RollingWavReader/RollingWavReader/RollingWavReader.csproj b/RollingWavReader/RollingWavReader/RollingWavReader.csproj
index c5918fc..2df8176 100644
--- a/RollingWavReader/RollingWavReader/RollingWavReader.csproj
+++ b/RollingWavReader/RollingWavReader/RollingWavReader.csproj
@@ -1,10 +1,10 @@
-<?xml version="1.0" encoding="utf-8"?>
+<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
- <ProjectGuid>{399223df-43a8-4238-84bb-c3df47034ce4}</ProjectGuid>
+ <ProjectGuid>{399223DF-43A8-4238-84BB-C3DF47034CE4}</ProjectGuid>
<OutputType>Library</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>RollingWavReader</RootNamespace>
@@ -107,7 +107,7 @@
<None Include="project.json" />
</ItemGroup>
<ItemGroup>
- <Compile Include="Class1.cs" />
+ <Compile Include="RollingWavReader.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<EmbeddedResource Include="Properties\RollingWavReader.rd.xml" />
</ItemGroup>
@@ -122,4 +122,4 @@
<Target Name="AfterBuild">
</Target>
-->
-</Project>
+</Project> \ No newline at end of file
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback