I developed the below code within the environment:

  • OpenCV 2.2
  • Mac OS X 10.6.6
  • Xcode 3.2.5 64 bits
  • GCC 4.2
/*
* Hierchical K-mean algorithm for GPS spatial clustering
* 
* Developed by Pil Ho Kim, 2011.
* 
* History: 
* 2011-02-11 First revision
*
*/
 
#include <iostream>
#include <opencv.hpp>
#include <sstream>
#include <fstream>
#include <vector>
#include <string>
#include <unistd.h>
 
using namespace std;
 
char m_sSourceFile[1024];
char m_sTargetFile[1024];
char m_sRegionFile[1024];
 
#define ID_MAX 0
#define ID_MIN 1
#define ID_LAT 0
#define ID_LOG 1
 
int m_iRegionID = 0;
 
#define CLUSTER_MIN_RADIUS 0.01 // Minimum cluster radius in Kilometer
#define CLUSTER_MIN_POINTS 2 // Minimum number of GPS points contained in the cluster
 
int getSampleCount(char *sFile) {
    FILE *fp;
    char fstr[200];
    int k;
 
    if (!(fp = fopen(sFile, "r"))) return 0;
 
    k = 0;
    while(fgets(fstr,200,fp)){                
        k++;
    }
    fclose(fp);
 
    return k;
}
 
#define d2r (M_PI / 180.0)
 
// /calculate haversine distance for linear distance
// http://stackoverflow.com/questions/365826/calculate-distance-between-2-gps-coordinates
double getDistance(CvPoint2D32f pt_max, CvPoint2D32f pt_min) {
    double lat1 = pt_max.x;
    double long1 = pt_max.y;
    double lat2 = pt_min.x;
    double long2 = pt_min.y;
 
    double dlong = (long2 - long1) * d2r;
    double dlat = (lat2 - lat1) * d2r;
    double a = pow(sin(dlat/2.0), 2) + cos(lat1*d2r) * cos(lat2*d2r) * pow(sin(dlong/2.0), 2);
    double c = 2 * atan2(sqrt(a), sqrt(1-a));
    double d = 6367 * c;
 
    return d;
}
 
int classifyRegion(CvMat* points, CvMat* clusters, int iRegionID) {
    FILE *fp_point_cluster;
    FILE *fp_point_region;
    int m_iRegionCount[2];
    CvPoint2D32f pt_maxmin[2][2];
    CvPoint2D32f pt, pt_center[2];
    CvPoint2D32f* pt_target;
    double lat, log, region_radius;
    int cluster_idx;
    int i, j, k, iResult;
    int points_count = cvGetDimSize(points, 0);
    int m_iSubRegionID[2];
 
    // i is a cluster id
    for (i = 0; i < 2; ++i) {
        pt_maxmin[ID_MAX][i].x = -1000;
        pt_maxmin[ID_MIN][i].x = 1000;
        pt_maxmin[ID_MAX][i].y = -1000;
        pt_maxmin[ID_MIN][i].y = 1000;
 
        m_iRegionCount[i] = 0;
        m_iSubRegionID[i] = ++m_iRegionID;
 
        pt_center[i].x = pt_center[i].y = 0;
    }
 
    iResult = cvKMeans2( points, 2, clusters,
            cvTermCriteria( CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10, 1.0));
 
    // Write down the cluster
    if (!(fp_point_cluster = fopen(m_sTargetFile, "a"))) return 0;
    for( i = 0; i < points_count; i++ )
    {
        pt = ((CvPoint2D32f*)points->data.fl)[i];
        lat = pt.x;
        log = pt.y;
 
        cluster_idx = clusters->data.i[i];
 
        // Compute statistics
 
        // Weighted average to calculate the region center
        pt_center[cluster_idx].x += pt.x;
        pt_center[cluster_idx].y += pt.y;
 
        if (lat > pt_maxmin[ID_MAX][cluster_idx].x)
            pt_maxmin[ID_MAX][cluster_idx].x = lat;
        if (lat < pt_maxmin[ID_MIN][cluster_idx].x)
            pt_maxmin[ID_MIN][cluster_idx].x = lat;
        if (log > pt_maxmin[ID_MAX][cluster_idx].y)
            pt_maxmin[ID_MAX][cluster_idx].y= log;
        if (log < pt_maxmin[ID_MIN][cluster_idx].y)
            pt_maxmin[ID_MIN][cluster_idx].y = log;
 
        m_iRegionCount[cluster_idx] = m_iRegionCount[cluster_idx] + 1;
 
        fprintf(fp_point_cluster, "%d\t%d\t%f\t%f\n", 
            iRegionID,
            m_iSubRegionID[cluster_idx],
            pt.x,
            pt.y
        );
    }
    fclose(fp_point_cluster);
 
    printf("Region\t%d:\tClustered into\t%d\tand\t%d.\n", iRegionID, m_iRegionCount[0], m_iRegionCount[1]);
 
    // Check the distribution
    for (i = 0; i < 2; ++i) {
        // If the radius is bigger than 10 meter, then split.
        region_radius = getDistance(pt_maxmin[ID_MAX][i], pt_maxmin[ID_MIN][i]);
 
        // Log the region information
        if (m_iRegionCount[i] > 0) {
            if (!(fp_point_region = fopen(m_sRegionFile, "a"))) return 0;
            fprintf(fp_point_region, "%d\t%d\t%f\t%f\t%f\t%f\t%f\t%f\t%f\n", 
                m_iSubRegionID[i],
                m_iRegionCount[i],
                region_radius,
                pt_center[i].x / m_iRegionCount[i],
                pt_center[i].y / m_iRegionCount[i],
                pt_maxmin[ID_MAX][i].x,
                pt_maxmin[ID_MAX][i].y,
                pt_maxmin[ID_MIN][i].x,
                pt_maxmin[ID_MIN][i].y
            );
            fclose(fp_point_region);
        }
 
        if (region_radius > CLUSTER_MIN_RADIUS && m_iRegionCount[i] > CLUSTER_MIN_POINTS) {
            // Recursive iteration
            CvMat* subpoints = cvCreateMat( m_iRegionCount[i], 1, CV_32FC2 );
            CvMat* subclusters = cvCreateMat( m_iRegionCount[i], 1, CV_32SC1 );
 
            // Copy points
            k = 0;
 
            for (j = 0; j < points_count; ++j) {
                if (clusters->data.i[j] == i) {
                    pt = ((CvPoint2D32f*)points->data.fl)[j];
                    pt_target = (CvPoint2D32f*)subpoints->data.fl + k;
 
                    pt_target->x = pt.x;
                    pt_target->y = pt.y;
 
                    ++k;
                } 
            }
 
            classifyRegion(subpoints, subclusters, m_iSubRegionID[i]);
 
            cvReleaseMat( &subpoints );
            cvReleaseMat( &subclusters );
        }
    }
 
    return 1;
}
 
int main (int argc, char * const argv[]) {
    // insert code here...
    int i;
    int iResult;
    CvPoint2D32f* pt;
    char fstr[200];
 
    FILE *fp;
    FILE *fp_point_cluster;
 
    sprintf(m_sSourceFile, "iphone_gps.txt");
    sprintf(m_sTargetFile, "iphone_gps_clusters.txt");
    sprintf(m_sRegionFile, "iphone_gps_regions.txt");
 
    int sample_count = getSampleCount(m_sSourceFile);
 
    CvMat* points = cvCreateMat( sample_count, 1, CV_32FC2 );
    CvMat* clusters = cvCreateMat( sample_count, 1, CV_32SC1 );
 
    i = 0;
 
    if (!(fp = fopen(m_sSourceFile, "r"))) return 0;
    while(fgets(fstr,200,fp)){
        pt = (CvPoint2D32f*)points->data.fl +i;
 
        sscanf(fstr,"%f %f", &(pt->x), &(pt->y));
        ++i;
    }
    fclose(fp);
 
    // Initialize region ID
    m_iRegionID = 0;    
    if (!(fp_point_cluster = fopen(m_sTargetFile, "w"))) return 0;
    fclose(fp_point_cluster);
    if (!(fp_point_cluster = fopen(m_sRegionFile, "w"))) return 0;
    fclose(fp_point_cluster);
 
    iResult = classifyRegion(points, clusters, m_iRegionID);
 
    cvReleaseMat( &points );
    cvReleaseMat( &clusters );
 
    return 0;
}