Frontcompression
Time Limit: 5000/5000 MS(Java/Others) Memory Limit: 102400/102400 K(Java/Others)
Total Submission(s): 2609 Accepted Submission(s): 880
Problem Description
Front compressionis a type of delta encoding compression algorithm whereby common prefixes andtheir lengths are recorded so that they need not be duplicated. For example:
![](https://img.laitimes.com/img/_0nNw4CM6IyYiwiM6ICdiwiI0gTMx81dsQWZ4lmZf1GLlpXazVmcvwFciV2dsQXYtJ3bm9CX9s2RkBnVHFmb1clWvB3MaVnRtp1XlBXe0xCMy81dvRWYoNHLwEzX5xCMx8FesU2cfdGLwMzX0xiRGZkRGZ0Xy9GbvNGLpZTY1EmMZVDUSFTU4VFRR9Fd4VGdsYTMfVmepNHLrJXYtJXZ0F2dvwVZnFWbp1zczV2YvJHctM3cv1Ce-cGcq5yNzMDO2AzY3YDNmhTMlZDMzYzXzIjM0ATM1IzLchDMyIDMy8CXn9Gbi9CXzV2Zh1WavwVbvNmLvR3YxUjLyM3Lc9CX6MHc0RHaiojIsJye.jpg)
The size of the input is 43 bytes, while the size of the compressed output is 40.Here, every space and newline is also counted as 1 byte.
Given the input, each line of which is a substring of a long string, what aresizes of it and corresponding compressed output?
Input
There are multipletest cases. Process to the End of File.
The first line of each test case is a long string S made up of lowercaseletters, whose length doesn't exceed 100,000. The second line contains ainteger 1 ≤ N ≤ 100,000, which is the number of lines in the input. Each of thefollowing N lines contains two integers 0 ≤ A < B ≤ length(S), indicatingthat that line of the input is substring [A, B) of S.
Output
For each testcase, output the sizes of the input and corresponding compressed output.
Sample Input
frcode
2
0 6
0 6
unitedstatesofamerica
3
0 6
0 12
0 21
myxophytamyxopodnabnabbednabbingnabit
6
0 9
9 16
16 19
19 25
25 32
32 37
Sample Output
14 12
42 31
43 40
Author
Zejun Wu (watashi)
Source
2013 Multi-University Training Contest 9
大緻題意,就是把一些字元串壓縮,如果第i個子串與第i-1個有字首相同的,那麼可以表示為 x string 的形式,即lcp長度加上不同部分。現在,問你這樣子壓縮之後和壓縮之前各有多少個字元(空格和換行符也要算上)。
根據一個簡單的定理,這個在求height數組的時候貌似有些資料提到過,lcp(i,j)=min(h[k] | i<k<=j),即排名第i和第j的兩個字尾,他們的lcp為二者的左開右閉區間的height[Rank[]]數組的最小值。有N個這樣的子串,我們隻需要做N次這樣的區間最小值即可。而求區間最小,顯然用線段樹即可,由于height數組也不會變化,是以用ST表也可。
但是,你可能會問,這個height的最小值是字尾的lcp,而這裡的子串并不一定是字尾。其實還是一樣,所有的子串一定是某一個字尾的字首,是以我們直接用它對應的字尾來做lcp即可。唯一的差別就是,如果lcp長度大于前一個或者後一個任意一個的時候,我們要去小的那個,及lcp是三者的最小值。知道兩個子串的lcp之後,就可以知道前面那個數字的占位以及剩下字元串的占位,直接統計即可。
要注意的是,由于總串為10^5,N的範圍也是10^5,是以我們統計長度要用LL,同時注意數字占位并不是1,而是要根據數字本身的位數來,并且0也要占位。具體見代碼(感覺這個代碼寫得略醜……):
#include<bits/stdc++.h>
#define LL long long
#define N 100010
using namespace std;
struct Suffix_Array
{
int sa[N],Rank[N],h[N],n;
int xx[N],yy[N],c[N]; char *s;
bool cmp(int *s,int x,int y,int k)
{return (s[x]==s[y])&&(s[x+k]==s[y+k]);}
void ins(char *str) {s=str;n=strlen(s)+1;}
void DA()
{
memset(c,0,sizeof(c));
int *x=xx,*y=yy,m=130,*t,i;
for(i=0;i<n;i++) x[i]=s[i];
for(i=0;i<n;i++) c[x[i]]++;
for(i=1;i<m;i++) c[i]+=c[i-1];
for(i=n-1;i>=0;i--) sa[--c[x[i]]]=i;
for(int k=1,tot=0;tot<n;k<<=1,m=tot)
{
memset(c,0,sizeof(c));
for(i=0;i<n;i++) c[x[i]]++;
for(i=1;i<m;i++) c[i]+=c[i-1];
for(i=n-k,tot=0;i<n;i++) y[tot++]=i;
for(i=0;i<n;i++) if (sa[i]>=k) y[tot++]=sa[i]-k;
for(i=n-1;i>=0;i--) sa[--c[x[y[i]]]]=y[i];
for(i=tot=1,t=x,x=y,y=t,x[sa[0]]=0;i<n;i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],k)?tot-1:tot++;
}
}
void cal_height()
{
int i,j,k=0;
for(i=1;i<n;i++) Rank[sa[i]]=i;
for(i=0;i<n-1;h[Rank[i++]]=k)
for(k?k--:0,j=sa[Rank[i]-1];s[i+k]==s[j+k];k++);
}
} SA;
struct ST
{
struct node{int min,l,r;} tree[N<<2];
inline void build(int i,int l,int r)
{
int mid=(l+r)>>1;
tree[i].r=r; tree[i].l=l;
if (l==r){tree[i].min=SA.h[l];return;}
build(i<<1,l,mid);build(i<<1|1,mid+1,r);
tree[i].min=min(tree[i<<1].min,tree[i<<1|1].min);
}
inline int getmin(int i,int l,int r)
{
if (l>r) return 0;
if (tree[i].l==l&&tree[i].r==r) return tree[i].min;
int mid=(tree[i].l+tree[i].r)>>1;
if (mid>=r) return getmin(i<<1,l,r);
else if (mid<l) return getmin(i<<1|1,l,r);
else return min(getmin(i<<1,l,mid),getmin(i<<1|1,mid+1,r));
}
} seg;
char s[N];
int Count(int lcp)
{
int res=0;
while(lcp)res++,lcp/=10;
return max(res,1);
}
int main()
{
while(~scanf("%s",s))
{
int n;
SA.ins(s);
scanf("%d",&n);
int cur_b,cur_e;
LL ans1=0,ans2=0;
int pre_b=-1,pre_e=-1;
SA.DA();SA.cal_height();
seg.build(1,1,strlen(s));
while(n--)
{
scanf("%d%d",&cur_b,&cur_e);
ans2+=cur_e-cur_b+1;
if (pre_b>-1&&pre_e>-1)
{
int l=SA.Rank[pre_b];
int r=SA.Rank[cur_b];
if (l>r) swap(l,r);
int tmp=seg.getmin(1,l+1,r);
if (l==r) tmp=cur_e-cur_b;
int lcp=min(cur_e-cur_b,min(tmp,pre_e-pre_b));
ans1+=cur_e-cur_b-lcp+2+Count(lcp);
} else ans1+=3+cur_e-cur_b; pre_b=cur_b,pre_e=cur_e;
}
printf("%lld %lld\n",ans2,ans1);
}
return 0;
}