Tool to identify commercial MP3s

Introduction

Media files have become an important part of business content. Seeing that in some environments we cannot explicitly block MP3s, how do we identify what files are business recordings, and which are (possibly illegal) music?

IsMP3Song.exe is my attempt to address this issue. 

The idea is very simple, pass the filename to the tool and it returns a score indicating the confidence of it deeming the file as a commercial MP3.

The tool achieves this by doing a web-service call to iTunes and then scoring the iTunes results with the filename of the MP3.

Demo Execution

As you can see, the first couple of executions return a score because these are identified as commercial songs and the last one is scored as not commercial.

These scores are also returned by the application as an exit code, allowing return values to be used for other logic, such as email notifications or file classifications.

USING WITH POWERSHELL

Going a step further, the PowerShell snippet below can be used as a starting point to loop through folders and score all the MP3 files found.

$inputDir = "E:\SomeDataFolder";
$filterExt = "*.mp3";

$files = Get-ChildItem -Path $inputDir -Recurse -Filter $filterExt;
foreach ($file in $files)
{
$process = start-process .\IsMP3Song.exe -windowstyle Hidden -ArgumentList """$($file.Name)""" -PassThru -Wait
"$($file.Name) $($process.ExitCode)"
}

The C# code

using Newtonsoft.Json.Linq;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

namespace IsMP3Song
{
public class Song
{
public string ArtistName;
public string TrackName;
public int Match;
}
class Program
{
static void Main(string[] args)
{
List<Song> songs = new List<Song>();

try
{
if (args.Length == 1)
{
string searchTerm = FormatSearchTerm(args[0]);
string jsonReturn = GetRequest($"https://itunes.apple.com/search?term={searchTerm}");
dynamic jsonCollection = JObject.Parse(jsonReturn);
foreach (var result in jsonCollection.results)
{
dynamic jsonObject = JObject.Parse(result.ToString());
double match = ((CalculateSimilarity(args[0], $"{jsonObject.artistName} - {jsonObject.trackName}")) * 100);

Song song = new Song();
song.ArtistName = jsonObject.artistName;
song.TrackName = jsonObject.trackName;
song.Match = (int)match;
songs.Add(song);
}

if (songs.Count > 0)
{
Song bestMatch = songs.OrderByDescending(s => s.Match).First();
Console.WriteLine($"{bestMatch.ArtistName} - {bestMatch.TrackName} [{bestMatch.Match}]");
Environment.Exit(bestMatch.Match);
}
else
{
Console.WriteLine($"Seems that is not a commercial MP3...");
Environment.Exit(0);
}

}
}
catch
{

}
}
// Returns JSON string
static string GetRequest(string url)
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
try
{
WebResponse response = request.GetResponse();
using (Stream responseStream = response.GetResponseStream())
{
StreamReader reader = new StreamReader(responseStream, System.Text.Encoding.UTF8);
return reader.ReadToEnd();
}
}
catch (WebException ex)
{
WebResponse errorResponse = ex.Response;
using (Stream responseStream = errorResponse.GetResponseStream())
{
StreamReader reader = new StreamReader(responseStream, System.Text.Encoding.GetEncoding("utf-8"));
String errorText = reader.ReadToEnd();
// log errorText
}
throw;
}
}
public static double CalculateSimilarity(string source, string target)
{
if ((source == null) || (target == null)) return 0.0;
if ((source.Length == 0) || (target.Length == 0)) return 0.0;
if (source == target) return 1.0;

int stepsToSame = ComputeLevenshteinDistance(source, target);
return (1.0 - ((double)stepsToSame / (double)Math.Max(source.Length, target.Length)));
}
public static int ComputeLevenshteinDistance(string source, string target)
{
if ((source == null) || (target == null)) return 0;
if ((source.Length == 0) || (target.Length == 0)) return 0;
if (source == target) return source.Length;

int sourceWordCount = source.Length;
int targetWordCount = target.Length;

// Step 1
if (sourceWordCount == 0)
return targetWordCount;

if (targetWordCount == 0)
return sourceWordCount;

int[,] distance = new int[sourceWordCount + 1, targetWordCount + 1];

// Step 2
for (int i = 0; i <= sourceWordCount; distance[i, 0] = i++) ;
for (int j = 0; j <= targetWordCount; distance[0, j] = j++) ;

for (int i = 1; i <= sourceWordCount; i++)
{
for (int j = 1; j <= targetWordCount; j++)
{
// Step 3
int cost = (target[j - 1] == source[i - 1]) ? 0 : 1;

// Step 4
distance[i, j] = Math.Min(Math.Min(distance[i - 1, j] + 1, distance[i, j - 1] + 1), distance[i - 1, j - 1] + cost);
}
}

return distance[sourceWordCount, targetWordCount];
}
public static string FormatSearchTerm(string fileName)
{
//Get only filename from full path
fileName = fileName.Split('\\')[fileName.Split('\\').Length - 1];
string searchTerm = fileName;

//Remove MP3 extension if present
searchTerm = Regex.Replace(searchTerm, ".mp3", "", RegexOptions.IgnoreCase).Trim();

//Remove all non-alpha characters
Regex regEx = new Regex("[^a-zA-Z ]");
searchTerm = regEx.Replace(searchTerm, "");

//Remove duplicate spaces
RegexOptions options = RegexOptions.None;
Regex regex = new Regex("[ ]{2,}", options);
searchTerm = regex.Replace(searchTerm, " ");

return searchTerm.Trim().Replace(" ", "+");
}
}
}

Download

Visual Studio project download

https://bitbucket.org/svermaak/ismp3song/downloads/

Binary download

https://e8eaed.a2cdn1.secureserver.net/wp-content/uploads/2018/03/IsMP3Song.zip