天天看點

HDU 4691 Front compression(字尾數組+線段樹/ST表)

Frontcompression

Time Limit: 5000/5000 MS(Java/Others)    Memory Limit: 102400/102400 K(Java/Others)

Total Submission(s): 2609    Accepted Submission(s): 880

Problem Description

Front compressionis a type of delta encoding compression algorithm whereby common prefixes andtheir lengths are recorded so that they need not be duplicated. For example:

HDU 4691 Front compression(字尾數組+線段樹/ST表)

The size of the input is 43 bytes, while the size of the compressed output is 40.Here, every space and newline is also counted as 1 byte.

Given the input, each line of which is a substring of a long string, what aresizes of it and corresponding compressed output? 

Input

There are multipletest cases. Process to the End of File.

The first line of each test case is a long string S made up of lowercaseletters, whose length doesn't exceed 100,000. The second line contains ainteger 1 ≤ N ≤ 100,000, which is the number of lines in the input. Each of thefollowing N lines contains two integers 0 ≤ A < B ≤ length(S), indicatingthat that line of the input is substring [A, B) of S. 

Output

For each testcase, output the sizes of the input and corresponding compressed output. 

Sample Input

frcode

2

0 6

0 6

unitedstatesofamerica

3

0 6

0 12

0 21

myxophytamyxopodnabnabbednabbingnabit

6

0 9

9 16

16 19

19 25

25 32

32 37 

Sample Output

14 12

42 31

43 40 

Author

Zejun Wu (watashi)

Source

​​2013 Multi-University Training Contest 9 ​​

      大緻題意,就是把一些字元串壓縮,如果第i個子串與第i-1個有字首相同的,那麼可以表示為 x string 的形式,即lcp長度加上不同部分。現在,問你這樣子壓縮之後和壓縮之前各有多少個字元(空格和換行符也要算上)。

      根據一個簡單的定理,這個在求height數組的時候貌似有些資料提到過,lcp(i,j)=min(h[k] | i<k<=j),即排名第i和第j的兩個字尾,他們的lcp為二者的左開右閉區間的height[Rank[]]數組的最小值。有N個這樣的子串,我們隻需要做N次這樣的區間最小值即可。而求區間最小,顯然用線段樹即可,由于height數組也不會變化,是以用ST表也可。

      但是,你可能會問,這個height的最小值是字尾的lcp,而這裡的子串并不一定是字尾。其實還是一樣,所有的子串一定是某一個字尾的字首,是以我們直接用它對應的字尾來做lcp即可。唯一的差別就是,如果lcp長度大于前一個或者後一個任意一個的時候,我們要去小的那個,及lcp是三者的最小值。知道兩個子串的lcp之後,就可以知道前面那個數字的占位以及剩下字元串的占位,直接統計即可。

        要注意的是,由于總串為10^5,N的範圍也是10^5,是以我們統計長度要用LL,同時注意數字占位并不是1,而是要根據數字本身的位數來,并且0也要占位。具體見代碼(感覺這個代碼寫得略醜……):

#include<bits/stdc++.h>
#define LL long long
#define N 100010

using namespace std;

struct Suffix_Array
{
    int sa[N],Rank[N],h[N],n;
    int xx[N],yy[N],c[N]; char *s;
    bool cmp(int *s,int x,int y,int k)
    {return (s[x]==s[y])&&(s[x+k]==s[y+k]);}
    void ins(char *str) {s=str;n=strlen(s)+1;}

    void DA()
    {
        memset(c,0,sizeof(c));
        int *x=xx,*y=yy,m=130,*t,i;
        for(i=0;i<n;i++) x[i]=s[i];
        for(i=0;i<n;i++) c[x[i]]++;
        for(i=1;i<m;i++) c[i]+=c[i-1];
        for(i=n-1;i>=0;i--) sa[--c[x[i]]]=i;
        for(int k=1,tot=0;tot<n;k<<=1,m=tot)
        {
            memset(c,0,sizeof(c));
            for(i=0;i<n;i++) c[x[i]]++;
            for(i=1;i<m;i++) c[i]+=c[i-1];
            for(i=n-k,tot=0;i<n;i++) y[tot++]=i;
            for(i=0;i<n;i++) if (sa[i]>=k) y[tot++]=sa[i]-k;
            for(i=n-1;i>=0;i--) sa[--c[x[y[i]]]]=y[i];
            for(i=tot=1,t=x,x=y,y=t,x[sa[0]]=0;i<n;i++)
                x[sa[i]]=cmp(y,sa[i-1],sa[i],k)?tot-1:tot++;
        }
    }

    void cal_height()
    {
        int i,j,k=0;
        for(i=1;i<n;i++) Rank[sa[i]]=i;
        for(i=0;i<n-1;h[Rank[i++]]=k)
            for(k?k--:0,j=sa[Rank[i]-1];s[i+k]==s[j+k];k++);
    }

} SA;

struct ST
{
    struct node{int min,l,r;} tree[N<<2];

    inline void build(int i,int l,int r)
    {
        int mid=(l+r)>>1;
        tree[i].r=r; tree[i].l=l;
        if (l==r){tree[i].min=SA.h[l];return;}
        build(i<<1,l,mid);build(i<<1|1,mid+1,r);
        tree[i].min=min(tree[i<<1].min,tree[i<<1|1].min);
    }

    inline int getmin(int i,int l,int r)
    {
        if (l>r) return 0;
        if (tree[i].l==l&&tree[i].r==r) return tree[i].min;
        int mid=(tree[i].l+tree[i].r)>>1;
        if (mid>=r) return getmin(i<<1,l,r);
        else if (mid<l) return getmin(i<<1|1,l,r);
        else return min(getmin(i<<1,l,mid),getmin(i<<1|1,mid+1,r));
    }

} seg;

char s[N];

int Count(int lcp)
{
    int res=0;
    while(lcp)res++,lcp/=10;
    return max(res,1);
}

int main()
{
    while(~scanf("%s",s))
    {
        int n;
        SA.ins(s);
        scanf("%d",&n);
        int cur_b,cur_e;
        LL ans1=0,ans2=0;
        int pre_b=-1,pre_e=-1;
        SA.DA();SA.cal_height();
        seg.build(1,1,strlen(s));
        while(n--)
        {
            scanf("%d%d",&cur_b,&cur_e);
            ans2+=cur_e-cur_b+1;
            if (pre_b>-1&&pre_e>-1)
            {
                int l=SA.Rank[pre_b];
                int r=SA.Rank[cur_b];
                if (l>r) swap(l,r);
                int tmp=seg.getmin(1,l+1,r);
                if (l==r) tmp=cur_e-cur_b;
                int lcp=min(cur_e-cur_b,min(tmp,pre_e-pre_b));
                ans1+=cur_e-cur_b-lcp+2+Count(lcp);
            } else ans1+=3+cur_e-cur_b; pre_b=cur_b,pre_e=cur_e;
        }
        printf("%lld %lld\n",ans2,ans1);
    }
    return 0;
}