Implementing FAST
Jordan S. Ford

  
  /* jsf_fast.h - v0.0 - public domain FAST feature detection -
http://www.jrdnfrd.com/jsf no warranty implied; use at your own risk

   Do this:
       #define JSF_FAST_IMPLEMENTATION
   before you include this file in *one* C or C++ file to create the
implementation.

   // i.e. it should look like this:
   #include ...
   #include ...
   #include ...
   #define JSF_FAST_IMPLEMENTATION
   #include "jsf_fast.h"

LICENSE

    Please contact the author at jsford94@gmail.com for license information.

 ==================== Contributors ====================

 FAST Detectors
    Jordan Ford (FAST_9_16)

*/

#pragma once
#ifndef JSF_INCLUDE_JSF_FAST_H
#define JSF_INCLUDE_JSF_FAST_H

#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>

#define JSFDEF

#ifdef __cplusplus
extern "C" {
#endif

////////////////////////////////////////////////////////////////////////
//
// PRIMARY API
//

typedef struct {
    int row;
    int col;
} jsf_feat_t;

// FAST_9_16 feature point detection
JSFDEF int jsf_detect_fast_9_16(uint8_t const *data, int rows, int cols, uint8_t threshold,
                                bool nonmax_suppression, jsf_feat_t **features);

// free the detected features -- this is just free()
JSFDEF void jsf_features_free(void *features);

#ifdef __cplusplus
}
#endif

//
//
////  end header file  /////////////////////////////////////////////////
#endif // JSF_INCLUDE_JSF_FAST_H

#ifdef JSF_FAST_IMPLEMENTATION

// x86/64 detection
#if defined(__x86_64__) || defined(_M_X64)
#define JSF__X64_TARGET
#elif defined(i386) || defined(_M_IX86)
#define JSF__X86_TARGET
#endif

#if defined(__GNUC__) && defined(JSF__X86_TARGET) && !defined(__SSE2__) && !defined(JSF_NO_SIMD)
#define JSF_NO_SIMD
#endif

JSFDEF void jsf_make_offsets(int *offsets, int row_stride, int pattern_size) {
    const char offset16[16][2] = {{0, 3},  {1, 3},  {2, 2},  {3, 1},   {3, 0},   {3, -1},
                                  {2, -2}, {1, -3}, {0, -3}, {-1, -3}, {-2, -2}, {-3, -1},
                                  {-3, 0}, {-3, 1}, {-2, 2}, {-1, 3}};

    for (int i = 0; i < pattern_size; ++i) {
        offsets[i] = offset16[i][0] + row_stride * offset16[i][1];
    }

    for (int i = pattern_size; i < 25; ++i) {
        offsets[i] = offsets[i - pattern_size];
    }
}

int jsf_min(int a, int b) { return (a < b) ? a : b; }

int jsf_max(int a, int b) { return (a > b) ? a : b; }

int jsf_clamp(int v, int min, int max) { return jsf_min(jsf_max(v, min), max); }

int16_t jsf_fast_score_16(const uint8_t *pix, const int offsets[25], int threshold) {
    int diff[25];
    uint8_t v = pix[0];

    for (int i = 0; i < 25; ++i) {
        diff[i] = (int)v - (int)pix[offsets[i]];
    }

    int a0 = threshold;
    for (int i = 0; i < 16; i += 2) {
        int a = jsf_min(diff[i + 1], diff[i + 2]);
        a = jsf_min(a, diff[i + 3]);
        if (a <= a0) {
            continue;
        }
        a = jsf_min(a, diff[i + 4]);
        a = jsf_min(a, diff[i + 5]);
        a = jsf_min(a, diff[i + 6]);
        a = jsf_min(a, diff[i + 7]);
        a = jsf_min(a, diff[i + 8]);
        a0 = jsf_max(a0, jsf_min(a, diff[i]));
        a0 = jsf_max(a0, jsf_min(a, diff[i + 9]));
    }

    int b0 = -a0;
    for (int i = 0; i < 16; i += 2) {
        int b = jsf_max(diff[i + 1], diff[i + 2]);
        b = jsf_max(b, diff[i + 3]);
        b = jsf_max(b, diff[i + 4]);
        b = jsf_max(b, diff[i + 5]);
        if (b >= b0) {
            continue;
        }
        b = jsf_max(b, diff[i + 6]);
        b = jsf_max(b, diff[i + 7]);
        b = jsf_max(b, diff[i + 8]);
        b0 = jsf_min(b0, jsf_max(b, diff[i]));
        b0 = jsf_min(b0, jsf_max(b, diff[i + 9]));
    }

    threshold = -b0 - 1;
    return threshold;
}

JSFDEF int jsf_detect_fast_9_16(uint8_t const *data, int rows, int cols, uint8_t threshold,
                                bool nonmax_suppression, jsf_feat_t **features) {
    int num_features = 0;
    int num_features_capacity = 10;
    *features = (jsf_feat_t *)malloc(num_features_capacity * sizeof(jsf_feat_t));

    // This is a lookup table to speed up
    // comparisons against the threshold window.
    // If x < -threshold, lut[x+255] = 0x01;
    // If x > threshold, lut[x+255] = 0x02;
    // Else, lut[x+255] = 0x00;
    unsigned char threshold_lut[512];
    for (int i = -255; i < 256; ++i) {
        threshold_lut[i + 255] =
            (unsigned char)(i < -threshold ? 0x01 : i > threshold ? 0x02 : 0x00);
    }

    // These offsets define the FAST feature pattern.
    int offsets[25];
    jsf_make_offsets(offsets, cols, 16);

    // Iterate over rows of the image.
    for (int i = 3; i < rows - 3; ++i) {
        uint8_t const *pix = &data[i * cols] + 3;

        // Iterate over columns of the image.
        for (int j = 3; j < cols - 3; ++j, ++pix) {

            // Offset your view into the threshold lut.
            const unsigned char *lut = (&threshold_lut[0] + 255) - pix[0];

            int d = lut[pix[offsets[0]]] | lut[pix[offsets[8]]];

            if (d == 0) {
                continue;
            }

            d &= lut[pix[offsets[2]]] | lut[pix[offsets[10]]];
            d &= lut[pix[offsets[4]]] | lut[pix[offsets[12]]];
            d &= lut[pix[offsets[6]]] | lut[pix[offsets[14]]];

            if (d == 0) {
                continue;
            }

            d &= lut[pix[offsets[1]]] | lut[pix[offsets[9]]];
            d &= lut[pix[offsets[3]]] | lut[pix[offsets[11]]];
            d &= lut[pix[offsets[5]]] | lut[pix[offsets[13]]];
            d &= lut[pix[offsets[7]]] | lut[pix[offsets[15]]];

            if (d & 0x01) {
                // This might be a corner with darker surroundings.
                const int vt = pix[0] - threshold;
                int count = 0;

                // Scan around the circle and check for a run of dark points.
                for (int k = 0; k < 16 + 8 + 1; ++k) {
                    if (pix[offsets[k]] < vt) {
                        if (++count > 8) {
                            // Add this point to the accumulated feature points.
                            if (num_features == num_features_capacity) {
                                num_features_capacity *= 2;
                                *features = (jsf_feat_t *)realloc(
                                    *features, num_features_capacity * sizeof(jsf_feat_t));
                            }
                            (*features)[num_features].row = i;
                            (*features)[num_features].col = j;
                            num_features++;
                            break;
                        }
                    } else {
                        count = 0;
                    }
                }
            }
            if (d & 0x02) {
                // This might be a corner with lighter surroundings.
                const int vt = pix[0] + threshold;
                int count = 0;

                // Scan around the circle and check for a run of light points.
                for (int k = 0; k < 16 + 8 + 1; ++k) {
                    if (pix[offsets[k]] > vt) {
                        if (++count > 8) {
                            // Add this point to the accumulated feature points.
                            if (num_features == num_features_capacity) {
                                num_features_capacity *= 2;
                                *features = (jsf_feat_t *)realloc(
                                    *features, num_features_capacity * sizeof(jsf_feat_t));
                            }
                            (*features)[num_features].row = i;
                            (*features)[num_features].col = j;
                            num_features++;
                            break;
                        }
                    } else {
                        count = 0;
                    }
                }
            }
        }
    }

    if (nonmax_suppression) {
        jsf_feat_t *filtered_features = (jsf_feat_t *)malloc(num_features * sizeof(jsf_feat_t));
        int num_filtered_features = 0;

        jsf_feat_t *prev_row = features[0];
        jsf_feat_t *curr_row = prev_row;
        while ((curr_row - features[0]) < num_features && curr_row->row <= prev_row->row) {
            curr_row++;
        }
        jsf_feat_t *next_row = curr_row;
        while ((next_row - features[0]) < num_features && next_row->row <= curr_row->row) {
            next_row++;
        }

        enum { CACHE_ZERO = 0, CACHE_PREV = 1, CACHE_CURR = 2, CACHE_NEXT = 3, CACHE_ROWS = 4 };
        int16_t cache[CACHE_ROWS][cols];
        memset(cache, 0, CACHE_ROWS * cols * sizeof(int16_t));

        int16_t *zero_row = &cache[CACHE_ZERO][0];
        int16_t *prev_row_scores = &cache[CACHE_ZERO][0];
        int16_t *curr_row_scores = &cache[CACHE_ZERO][0];
        int16_t *next_row_scores = &cache[CACHE_ZERO][0];

        // This loops over feature points grouped by row.
        while ((next_row - features[0]) < num_features) {
            // Find next_next_row, the end of the next row.
            jsf_feat_t *next_next_row = next_row;
            while ((next_next_row - features[0]) < num_features &&
                   next_next_row->row <= next_row->row) {
                next_next_row++;
            }

            if ((curr_row->row - prev_row->row) == 1) {
                if (prev_row_scores == zero_row) {
                    prev_row_scores = cache[CACHE_PREV];
                    memset(prev_row_scores, 0, cols * sizeof(prev_row_scores[0]));
                    for (jsf_feat_t *p = prev_row; p != curr_row; ++p) {
                        const uint8_t *pix = &data[p->col + p->row * cols];
                        prev_row_scores[p->col] = jsf_fast_score_16(pix, offsets, threshold);
                    }
                }
            } else {
                prev_row_scores = zero_row;
            }

            if (curr_row_scores == zero_row) {
                curr_row_scores = cache[CACHE_CURR];
                memset(curr_row_scores, 0, cols * sizeof(curr_row_scores[0]));
                for (jsf_feat_t *f = curr_row; f != next_row; ++f) {
                    const uint8_t *pix = &data[f->col + f->row * cols];
                    curr_row_scores[f->col] = jsf_fast_score_16(pix, offsets, threshold);
                }
            }

            if ((next_row->row - curr_row->row) == 1) {
                next_row_scores = cache[CACHE_NEXT];
                memset(next_row_scores, 0, cols * sizeof(next_row_scores[0]));
                for (jsf_feat_t *n = next_row; n != next_next_row; ++n) {
                    const uint8_t *pix = &data[n->col + n->row * cols];
                    next_row_scores[n->col] = jsf_fast_score_16(pix, offsets, threshold);
                }
            } else {
                next_row_scores = zero_row;
            }

            for (jsf_feat_t *f = curr_row; f != next_row; ++f) {
                const int row = f->row;
                const int col = f->col;
                const int16_t score = curr_row_scores[col];

                if (score > prev_row_scores[col - 1] && score > prev_row_scores[col] &&
                    score > prev_row_scores[col + 1] && score > curr_row_scores[col - 1] &&
                    score > curr_row_scores[col + 1] && score > next_row_scores[col - 1] &&
                    score > next_row_scores[col] && score > next_row_scores[col + 1]) {
                    filtered_features[num_filtered_features].row = row;
                    filtered_features[num_filtered_features].col = col;
                    num_filtered_features++;
                }
            }

            // If the current row is still useful, copy it to the previous row
            // for next time.
            if ((curr_row->row - prev_row->row) == 1) {
                memcpy(prev_row_scores, curr_row_scores, cols * sizeof(curr_row_scores[0]));
            } else {
                prev_row_scores = zero_row;
            }
            // If the next row is still useful, copy it to the current row for
            // next time.
            if ((next_row->row - curr_row->row) == 1) {
                memcpy(curr_row_scores, next_row_scores, cols * sizeof(curr_row_scores[0]));
            } else {
                curr_row_scores = zero_row;
            }

            prev_row = curr_row;
            curr_row = next_row;
            next_row = next_next_row;
        }
        // Free the original features.
        jsf_features_free(*features);
        // Resize the filtered features to fit.
        *features =
            (jsf_feat_t *)realloc(filtered_features, num_filtered_features * sizeof(jsf_feat_t));

        // Return the filtered features.
        return num_filtered_features;
    }

    *features = (jsf_feat_t *)realloc(*features, num_features * sizeof(jsf_feat_t));
    return num_features;
}

JSFDEF void jsf_features_free(void *features) {
    free(features);
    features = NULL;
}

#endif // JSF_FAST_IMPLEMENTATION