diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..02dc534 --- /dev/null +++ b/Makefile @@ -0,0 +1,20 @@ +CINC = -Iinclude +SRC = src +BIN = bin +CC = g++ -Wall -O3 -Wno-unused-result $(CINC) + +all: ${BIN}/build ${BIN}/recog + +# Generates a compiler for an d_lang language, +${BIN}/build: $(SRC)/build.cpp $(SRC)/hash.cpp $(SRC)/fingerprint.cpp $(SRC)/wav.cpp $(SRC)/bmp.cpp + mkdir -p ${BIN} + $(CC) -o $(BIN)/build $(SRC)/build.cpp $(SRC)/hash.cpp $(SRC)/fingerprint.cpp $(SRC)/bmp.cpp $(SRC)/wav.cpp -ll -lm -lfftw3 + +${BIN}/recog: $(SRC)/recog.cpp $(SRC)/hash.cpp $(SRC)/fingerprint.cpp $(SRC)/wav.cpp $(SRC)/bmp.cpp + mkdir -p ${BIN} + $(CC) -o $(BIN)/recog $(SRC)/recog.cpp $(SRC)/hash.cpp $(SRC)/fingerprint.cpp $(SRC)/bmp.cpp $(SRC)/wav.cpp -ll -lm -lfftw3 + +clean: + rm -f *.BAK *.o core *~* *.a + rm -f $(BIN)/build + rm -f $(BIN)/recog diff --git a/README.md b/README.md index cf2cf36..7c7d65b 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,40 @@ -Robust Music/Audio Recognition Algorithm in C++ +Music/Audio Recognition Application written in C++ =============================================== +Features: +----- + * Robust Audio Recognition + * High efficiency (recognize in less than 0.1 second per song) + * Memory reduction ( 4G is enough for 10000 songs) + * The Data Structure now support 2,500,000 songs and each song less than 7 minutes. + * return the TIME POINT of the cut song in original song. + +Dependencies: +----- + * fftw3: + If under Debian/Ubuntu, run `apt-get install libfftw3-dev`. + +Compile & Run: +----- + * extract songs to ./bin/ dir. + * `make` to produce two excutive files in ./bin/ + * The songs should be .wav format. + * The songs should be contained in ./bin/samples/ + * You may download the songs. + link: + password: + * run `cd bin`, `./build` and it'll produce a file called `database` + * run `./recog ${filename}` to recog the songs that in samples list. + TODO: ----- - - Transfer the codes from windows to linux. - - Android app to test the algorithm interactively. - - The Http request/response and such hustle and bustle. - - ... + - Docs DONE: ----- - - Solve the BaiduMusic download tools. - All the codes in Windows. + - Transfer the codes from windows to linux. + - Solve the BaiduMusic download tools to update new songs automatically. -My partner and I finshed this project on Windows8 and got the *First Prize* delivered by [National Engineering Laboratory for Speech and Language Informatinon Processing](http://nelslip.ustc.edu.cn/html/yunews/detail_2014_05/30/191.shtml). +At the begining, it's a COMPETITION. My partner and I finshed this project on Windows8 and won the *First Prize* delivered by [National Engineering Laboratory for Speech and Language Informatinon Processing](http://nelslip.ustc.edu.cn/html/yunews/detail_2014_05/30/191.shtml). Feel free to contact me yjh199511 at gmail diff --git a/include/bmp.h b/include/bmp.h new file mode 100644 index 0000000..6d5e8d4 --- /dev/null +++ b/include/bmp.h @@ -0,0 +1,30 @@ +#ifndef BMP_H +#define BMP_H + +#define BMP_TEST + +#include +#include +#include +#include +#define checkborder(x,y) (x>=0 && x=0 && y #include #include -#include +#include #include #include diff --git a/src/Hash.h b/include/hash.h similarity index 71% rename from src/Hash.h rename to include/hash.h index 1253010..7ff4e23 100644 --- a/src/Hash.h +++ b/include/hash.h @@ -18,14 +18,14 @@ #define OverFlowThreshold 1<<28 using namespace std; -//Hash的key类型,用于iFluBuild +//Hash的key类型,用于Build //(f1, f2_f1, t) struct HashKeyInfo{ size_t* start; size_t length; HashKeyInfo* next; //指针用于扩容 }; -//Hash的key类型,用于iFluSelect +//Hash的key类型,用于Recog struct HashKeyTable{ size_t *start; size_t length; @@ -43,26 +43,26 @@ class THash{ HashKeyInfo *key_info; HashKeyTable *key_table; - THash::THash(); - THash::~THash(); - void THash::ReBuildInit(); + THash(); + ~THash(); + void ReBuildInit(); /************************************ Functions for build tracks.(iFlyBuild) ************************************************/ //此函数用于对于iFlyBuild的情况,从wav到Hash_Table. //Finished. - void THash::BuildInit(); - void THash::BuildUnInit(); + void BuildInit(); + void BuildUnInit(); //加歌名,更新歌曲数。 //Finished. - void THash::AddSongList(const char *filename); + void AddSongList(const char *filename); //往Value的内存块里加数据,更新Key_table. //Finished. - void THash::InsertHash(size_t f1, size_t f2_f1, size_t t, size_t id, size_t offset); + void InsertHash(size_t f1, size_t f2_f1, size_t t, size_t id, size_t offset); //将Hash表往文件里刷(不是刷整个内存,这样会在iFlySelect里浪费内存空间)//Finished - void THash::Hash2File(const char* filename); + void Hash2File(const char* filename); /************************************ Functions for select tracks.(iFlySelect) ************************************************/ - size_t* THash::GetHash(size_t f1, size_t f2_f1, size_t t); - void THash::File2Hash(const char* filename); + size_t* GetHash(size_t f1, size_t f2_f1, size_t t); + void File2Hash(const char* filename); //Functions for vote and save the top voted id to QueryId. - void THash::VoteInit(); - void THash::Vote(size_t f1, size_t f2_f1, size_t t, size_t offset); - size_t THash::VoteResult(size_t &offset); + void VoteInit(); + void Vote(size_t f1, size_t f2_f1, size_t t, size_t offset); + size_t VoteResult(size_t &offset); }; #endif // _HashFunc_h_ diff --git a/src/recog.h b/include/recog.h similarity index 100% rename from src/recog.h rename to include/recog.h diff --git a/src/wav.h b/include/wav.h similarity index 90% rename from src/wav.h rename to include/wav.h index 8b6c44f..94b16c8 100644 --- a/src/wav.h +++ b/include/wav.h @@ -4,7 +4,7 @@ #include #include #include -#include "stdafx.h" +#include "stdio.h" //#define WAV_TEST using namespace std; diff --git a/references/Audio Fingerprinting.pdf b/references/Audio Fingerprinting.pdf new file mode 100644 index 0000000..6abcf15 Binary files /dev/null and b/references/Audio Fingerprinting.pdf differ diff --git a/references/Wang03-shazam.pdf b/references/Wang03-shazam.pdf new file mode 100644 index 0000000..c90ebb9 Binary files /dev/null and b/references/Wang03-shazam.pdf differ diff --git a/src/bmp.cpp b/src/bmp.cpp new file mode 100644 index 0000000..34376a7 --- /dev/null +++ b/src/bmp.cpp @@ -0,0 +1,165 @@ +#include "bmp.h" + +TBMP::TBMP(const char *filename, int w, int h, int mode){ +#ifdef BMP_TEST + char bufferc[4]; + /* char ch = 255; */ + unsigned long buffer4; + unsigned short buffer2; + + fp = fopen(filename, "wb"); + if (fp == NULL) printf("!!"); + W = w; + H = h; + MODE = mode%2; + if (W%4!=0) W=(W/4+1)*4; + + int i, j; + bitmap = (char **)malloc(H * sizeof(char *)); + if (bitmap == NULL) printf("00"); + for(i=0; i=0; j--) + fwrite(bitmap[j], 1, W, fp); + fclose(fp); + //realease resources +#endif +} + +int TBMP::setcolor(int grey){ +#ifdef BMP_TEST + if (grey<0 || grey>255) + return 1; + + Grey = grey; + return 0; +#endif +} + +int TBMP::drawpoint(int x, int y){ +#ifdef BMP_TEST + if (!checkborder(x,y)) + return 1; + + if (MODE) + bitmap[H-1-y][x] = Grey; + else + bitmap[y][x] = Grey; + return 0; +#endif +} + +int TBMP::drawline(int x0, int y0, int x1, int y1){ +#ifdef BMP_TEST + if (!checkborder(x0, y0) || !checkborder(x1, y1)) + return 1; + if (x0==x1 && y0==y1) + return drawpoint(x0, y0); + + int xs, ys, xe, ye; + int i, j; + double t; + if (abs(x0-x1) >= abs(y0-y1)){ + if (x0 < x1){ + xs = x0; + ys = y0; + xe = x1; + ye = y1; + } + else{ + xs = x1; + ys = y1; + xe = x0; + ye = y0; + } + t = (double)(ye - ys) / (double)(xe - xs); + for (i=xs; i<=xe; i++){ + j = (int)((i - xs) * t + ys); + drawpoint(i,j); + //bitmap[j][i] = Grey; + } + } + else{ + if (y0 < y1){ + xs = x0; + ys = y0; + xe = x1; + ye = y1; + } + else{ + xs = x1; + ys = y1; + xe = x0; + ye = y0; + } + t = (double)(xe - xs) / (double)(ye - ys); + for (j=ys; j<=ye; j++){ + i = (int)((j-ys) * t + xs); + drawpoint(i,j); + //bitmap[j][i] =Grey; + } + } + return 0; +#endif +} + +int TBMP::drawrect(int x0, int y0, int x1, int y1){ +#ifdef BMP_TEST + return drawline(x0, y0, x0, y1) || drawline(x0, y0, x1, y0) \ + || drawline(x1, y0, x1, y1) || drawline(x0, y1, x1, y1); +#endif +} diff --git a/src/build.cpp b/src/build.cpp index 83879be..22fdb0c 100644 --- a/src/build.cpp +++ b/src/build.cpp @@ -1,6 +1,10 @@ -#include "iFlyBuild.h" #include "hash.h" #include "fingerprint.h" +#include // 提供open()函数 +#include // 提供目录流操作函数 +#include // 提供属性操作函数 +#include // 提供mode_t 类型 +#include using namespace std; @@ -8,17 +12,45 @@ THash myhash; TExtractFeature fingerprint; int Initialize(){ - myhash.BuildInit(); + myhash.BuildInit(); return 0; } int ProcessBuildRes(const char *szResDir, const char *szResPath){ - myhash.AddSongList(T2A(FileName)); - fingerprint.ExtractPair(T2A(finder.GetFilePath()), myhash, 0, (float)0.98, index, 0); - fingerprint.UnInit(); - myhash.Hash2File(szResPath); + int index = 0; + char FileName[80]; + DIR *dp; // 定义子目录流指针 + struct dirent *entry; // 定义dirent结构指针保存后续目录 + struct stat statbuf; // 定义statbuf结构保存文件属性 + if((dp = opendir(szResDir)) == NULL) // 打开目录,获取子目录流指针,判断操作是否成功 + { + printf("Can't open dir.\n"); + return 1; + } + chdir(szResDir); // 切换到当前目录 + while ( (entry = readdir(dp)) != NULL ) //获取下一级目录信息,如果未否则循环 + { + lstat(entry->d_name, &statbuf); // 获取下一级成员属性 + if (!(S_IFDIR &statbuf.st_mode)) // 判断下一级成员不是目录 + { + strcpy(FileName, entry->d_name); + printf("%d: %s ",index , FileName); // 输出属性不是目录的成员 + myhash.AddSongList(FileName); + char FilePath[80] = ""; + strcat(FilePath, szResDir); + strcat(FilePath, FileName); + printf("%s\n",FilePath); // 输出属性不是目录的成员 + fingerprint.ExtractPair(FilePath, myhash, 0, (float)0.98, index, 0); + fingerprint.UnInit(); + index ++; + } + } printf("%d.\n", index); - system("pause"); + chdir(".."); + closedir(dp); // 关闭子目录流 + //write to the database + myhash.Hash2File(szResPath); + return 0; } @@ -26,3 +58,12 @@ int unInitialize(){ myhash.BuildUnInit(); return 0; } + +int +main( int argc, char **argv ) +{ + Initialize(); + ProcessBuildRes("/home/jiah/WS/git/AudioRecognition/bin/samples/", "./database"); + unInitialize(); + return 0; +} diff --git a/src/fingerprint.cpp b/src/fingerprint.cpp index 138dd14..a8a2c0d 100644 --- a/src/fingerprint.cpp +++ b/src/fingerprint.cpp @@ -1,4 +1,5 @@ #include "fingerprint.h" +using namespace std; TExtractFeature::TExtractFeature(){ fftw_in = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)* FRAME_SIZE); @@ -57,9 +58,9 @@ int TExtractFeature::NotExtremum(int j, float Arr[]){ void TExtractFeature::ExtractPair(const char *filename, THash &hash, int offset, float decay, int Id, int mode){ decay_factor = decay; Wav2Spectrum(filename, offset); - //printf("w2s finish\n"); + /* printf("w2s finish\n"); */ Spectrum2Feature(); - //printf("s2f finish\n"); + /* printf("s2f finish\n"); */ if (mode == 0) Feature2PairInsert(hash, Id); @@ -174,7 +175,7 @@ int TExtractFeature::Feature2PairInsert(THash &hash, int id){ int pi, pj; int count; int flag; - int index = 0; + // [> int index = 0; <] for (int i = 0; i= MAX_PAIR_PER_FEATURE) flag = 0; + /* if (count >= MAX_PAIR_PER_FEATURE) flag = 0; */ } } return 0; diff --git a/src/Hash.cpp b/src/hash.cpp similarity index 85% rename from src/Hash.cpp rename to src/hash.cpp index 31bf473..2a6398b 100644 --- a/src/Hash.cpp +++ b/src/hash.cpp @@ -1,5 +1,4 @@ #include "hash.h" -#include "stdafx.h" //外部函数 inline size_t HashTableOffset(size_t f1, size_t f2_f1, size_t t){ @@ -24,7 +23,7 @@ void THash::BuildInit(){ //初始化HashKey表 key_info = (HashKeyInfo*)malloc(sizeof(HashKeyInfo) * HashKeyNum); for (int i = 0; i < HashKeyNum; i++){ - key_info[i].next = nullptr; + key_info[i].next = NULL; key_info[i].length = 0; } } @@ -45,7 +44,7 @@ void THash::BuildUnInit(){ //加歌名,更新歌曲数。 void THash::AddSongList(const char *filename){ - strcpy_s(song_list[song_num], strlen(filename) + 1, filename); + strncpy(song_list[song_num], filename, strlen(filename) + 1); song_num++; } @@ -83,13 +82,13 @@ void THash::InsertHash(size_t f1, size_t f2_f1, size_t t, size_t id, size_t offs //将Hash表往文件里刷(不是刷整个内存,这样会在iFlySelect里浪费内存空间) void THash::Hash2File(const char* filename){ FILE *fp; - fopen_s(&fp, filename, "wb"); + fp = fopen(filename, "wb"); if (fp == NULL){ printf("File open WRONG.\n"); } //Write SongName fwrite(&song_num, sizeof(size_t), 1, fp); - printf("共%d首歌\n", song_num); + printf("共%zu首歌\n", song_num); for (size_t i = 0; ilength; while (length){ length--; - size_t offset_value = (*(pKey->start + length) << ID_BITS) >> ID_BITS; + /* size_t offset_value = (*(pKey->start + length) << ID_BITS) >> ID_BITS; */ + size_t offset_value = (*(pKey->start + length)) & 0x00003FFF ; // 0x00003FFF means the first 14 bits in total 32 bits. + /* printf("Here5,%zu, %zu\n",*(pKey->start + length), offset_value); */ if (offset_value < offset) continue; // 为失效投票,这种情况的投票结果为错的 + /* printf("Here6,%zu, %zu\n",(*(pKey->start + length)) >> OFFSET_BITS, offset_value - offset); */ vote_table[(*(pKey->start + length)) >> OFFSET_BITS][offset_value - offset]++; } return; @@ -178,12 +180,12 @@ size_t THash::VoteResult(size_t &offset){ } THash::THash(){ - pValueStart = nullptr; - pValueEnd = nullptr; - vote_table = nullptr; + pValueStart = NULL; + pValueEnd = NULL; + vote_table = NULL; data_num = 0; song_num = 0; - key_info = nullptr; + key_info = NULL; song_list = (char **)malloc(MAX_SONG_NUM*sizeof(char*)); for (int i=0; i