后缀自动机

Description

后缀自动机是一个可以识别一个字符串所有的后缀的自动机,同时这个自动机满足最简的性质,即结点数最少。

可以参考 陈立杰的ppt https://wenku.baidu.com/view/fa02d3fff111f18582d05a81.html 以及 http://blog.csdn.net/qq_35649707/article/details/66473069 学习

设有一个字符串ababcc,现求出它的所以子串和其在原字符串中出现的位置:a{0,2}、b{1,3}、c{4,5}、ab{1,3}、ba{2}、bc{4}、cc{5}、aba{2}、bab{3}、abc{4}、bcc{5}、abab{3}、babc{4}、abcc{5}、ababc{4}、babcc{5}、ababcc{5}

将位置相同的子串分为同一组:

{0,2} - {a}

{1,3} - {b、ab}

{4,5} - {c}

{2} - {ba、aba}

{3} - {bab、abab}

{4} - {bc、abc、babc、ababc}

{5} - {cc、bcc、abcc、babcc、ababcc}

将其作为自动机的状态,再加一个初始状态,画出这个自动机,就是这样

                              a    +----------+     b     +-------------+
                 +----------------->{2}|ba aba+----------->{3}|bab abab |
                 |                 +----------+           +----+--------+
         b  +----+-----+              c                        |
  +--------->{1,3}|b ab+------------------------+              |
  |         +---^------+                        |             c|
  |             |                               |              |
  |            b|                               |              |
  |             |                               |              |
+-+--+   a  +---+----+                        +-v--------------v----+
|root+------>{0,2}|a |                        |{4}|bc abc babc ababc|
+-+--+      +--------+                        +----------+----------+
  |                                                      |
  |                                                     c|
  |                                                      |
  |      c  +-------+          c        +----------------v-----------+
  +--------->{4,5}|c+------------------->{5}|cc bcc abcc babcc ababcc|
            +-------+                   +----------------------------+

还有与之相关的一个parent树

                  +------------------+
                  |{0,1,2,3,4,5}|root|
                  +-------+----------+
                          |
      +---------------------------------------------------+
      |                   |                               |
      |                   |                               |
  +---v----+         +----v-----+                     +---v---+
  |{0,2}|a |         |{1,3}|b ab|                     |{4,5}|c|
  +---+----+         +---+------+                     +--+----+
      |                  |                               |
      |                  |                               |
      |                  |                     +---------+------------------+
      |                  |                     |                            |
      |                  |                     |                            |
+-----v----+      +------v------+   +----------v----------+    +------------v---------------+
|{2}|ba aba|      |{3}|bab abab |   |{4}|bc abc babc ababc|    |{5}|cc bcc abcc babcc ababcc|
+----------+      +-------------+   +---------------------+    +----------------------------+

Template

const int N = 2e5 + 100;
const int M = 26;
struct SAM {
    int ch[N][M];
    int par[N];
    int max_len[N];
    int root, last, sz;
    void init() {
        sz = 0;
        root = sz ++;
        mem(ch[root], -1);
        par[root] = -1;
        max_len[root] = 0;
        last = root;
    }
    void extend(int x) {
        int p = last, np = sz ++;
        mem(ch[np], -1);
        max_len[np] = max_len[p] + 1;
        while (p != -1 && ch[p][x] == -1) 
            ch[p][x] = np, p = par[p];
        if (p == -1) 
            par[np] = root;
        else {
            int q = ch[p][x];
            if (max_len[q] == max_len[p] + 1) 
                par[np] = q;
            else {
                int nq = sz ++;
                memcpy(ch[nq], ch[q], sizeof(ch[nq]));
                max_len[nq] = max_len[p] + 1;
                par[nq] = par[q];
                par[q] = nq;
                par[np] = nq;
                while (p != -1 && ch[p][x] == q) 
                    ch[p][x] = nq, p = par[p];
            }
        }
        last = np;
    }
    int l[N];
    int ra[N];
    void count() {
        for (int i = 0; i <= max_len[last]; i++) 
            l[i] = 0;
        for (int i = 0; i < sz; i++) 
            l[max_len[i]] ++;
        for (int i = 1; i <= max_len[last]; i++) 
            l[i] += l[i - 1];
        for (int i = sz - 1; i >= 0; i--) 
            ra[--l[max_len[i]]] = i;
        for (int i = 0; i < sz; i++) 
            printf("%d%c", ra[i], " \n"[i==sz-1]);
    }
    void debug() {
        for (int i = 0; i < sz; i++) 
            for (int j = 0; j < M; j++) if (ch[i][j] != -1) 
                printf("%d %c %d\n", i, j + 'a', ch[i][j]);
    }
} t;

Problems

http://acm.hdu.edu.cn/showproblem.php?pid=1403

对于串s和t,先对s建立后缀自动机,用后缀自动机读入串t,同时维护一下当前匹配的长度,如果失配就顺着parent树走,直到root或有相应的后继,同时用当前结点 的max_len来更新当前匹配的长度。

代码

# include <bits/stdc++.h>
using namespace std;

# define x first
# define y second
# define sq(x) ((x)*(x))
# define cmin(x,y) ((x)=min(x,y))
# define cmax(x,y) ((x)=max(x,y))
# define mem(x,y) memset(x,y,sizeof(x))
# define show(x) cerr << #x << " = " << x << endl
# define time cerr<<"\n\n"<<"Time Spec = "<<clock()/1000.<<"s\n"

typedef double DB;
typedef long long LL;
typedef pair<int, int> Pr;
typedef unsigned long long U;

const LL INF = 0x3f3f3f3f;
const DB EPS = 1e-8;
const int N = 2e5 + 100;
const int M = 26;
const int MOD = 1e9 + 7;

struct SAM {
    int ch[N][M];
    int par[N];
    int max_len[N];
    int root, last, sz;

    void init() {
        sz = 0;
        root = sz ++;
        mem(ch[root], -1);
        par[root] = -1;
        max_len[root] = 0;
        last = root;
    }

    void extend(int x) {
        int p = last, np = sz ++;
        mem(ch[np], -1);
        max_len[np] = max_len[p] + 1;
        while (p != -1 && ch[p][x] == -1) 
            ch[p][x] = np, p = par[p];
        if (p == -1) 
            par[np] = root;
        else {
            int q = ch[p][x];
            if (max_len[q] == max_len[p] + 1) 
                par[np] = q;
            else {
                int nq = sz ++;
                memcpy(ch[nq], ch[q], sizeof(ch[nq]));
                max_len[nq] = max_len[p] + 1;
                par[nq] = par[q];
                par[q] = nq;
                par[np] = nq;
                while (p != -1 && ch[p][x] == q) 
                    ch[p][x] = nq, p = par[p];
            }
        }
        last = np;
    }

    int l[N];
    int ra[N];
    void count() {
        for (int i = 0; i <= max_len[last]; i++) 
            l[i] = 0;
        for (int i = 0; i < sz; i++) 
            l[max_len[i]] ++;
        for (int i = 1; i <= max_len[last]; i++) 
            l[i] += l[i - 1];
        for (int i = sz - 1; i >= 0; i--) 
            ra[--l[max_len[i]]] = i;

        for (int i = 0; i < sz; i++) 
            printf("%d%c", ra[i], " \n"[i==sz-1]);
    }

    void debug() {
        for (int i = 0; i < sz; i++) 
            for (int j = 0; j < M; j++) if (ch[i][j] != -1) 
                printf("%d %c %d\n", i, j + 'a', ch[i][j]);
    }

    int lcs(char *s) {
        int len = 0, ret = 0;;
        int cur = root;
        for (int i = 0; s[i]; i++) {
            int x = s[i] - 'a';
            while (cur != root && ch[cur][x] == -1) 
                cur = par[cur], cmin(len, max_len[cur]);
            if (ch[cur][x] != -1) 
                cur = ch[cur][x], len ++;

            cmax(ret, len);
        }
        return ret;
    }
} t;

char str1[N], str2[N];

int main() {
# ifdef owly
    freopen("in.txt", "r", stdin);
    // freopen("ou.txt", "w", stdout);
# endif

    while (~scanf("%s%s", str1, str2)) {
        t.init();
        for (int i = 0; str1[i]; i++) 
            t.extend(str1[i] - 'a');

        printf("%d\n", t.lcs(str2));
    }

    return 0;
}

results matching ""

    No results matching ""