天天看點

淺談求最長公共子串LCP的幾種做法

定義:

淺談求最長公共子串LCP的幾種做法

以這題為例PIPIOJ1476

做法1、動态規劃

淺談求最長公共子串LCP的幾種做法

代碼:

import java.io.OutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.HashSet;
import java.util.StringTokenizer;
import java.io.IOException;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.InputStream;
 
/**
 * Built using CHelper plug-in
 * Actual solution is at the top
 */
public class Main {
    public static void main(String[] args) {
        InputStream inputStream = System.in;
        OutputStream outputStream = System.out;
        InputReader sc = new InputReader(inputStream);
        PrintWriter out = new PrintWriter(outputStream);
        Task solver = new Task();
        solver.solve(1, sc, out);
        out.close();
    }
 
    static class Task {
    	
        public int calc(String s,String t,int n) {
        	int[][] dp=new int[n+1][n+1];
        	int ans=0;
        	for(int i=1;i<=n;i++) {
        		for(int j=1;j<=n;j++) {
        			if(s.charAt(i)==t.charAt(j))
        				dp[i][j]=dp[i-1][j-1]+1;
        			else
        				dp[i][j]=0;
        			ans=Math.max(ans, dp[i][j]);
        		}
        	}
        	return ans;
        }
    	
        public void solve(int testNumber, InputReader sc, PrintWriter out) {
            int n=sc.nextInt();
            String s=" "+sc.next();
            String t=" "+sc.next();
            out.println(calc(s,t,n));
        }
    }
     
    static class InputReader {
        public BufferedReader reader;
        public StringTokenizer tokenizer;
 
        public InputReader(InputStream stream) {
            reader = new BufferedReader(new InputStreamReader(stream), 32768);
            tokenizer = null;
        }
 
        public String next() {
            while (tokenizer == null || !tokenizer.hasMoreTokens()) {
                try {
                    tokenizer = new StringTokenizer(reader.readLine());
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
            return tokenizer.nextToken();
        }
 
        public int nextInt() {
            return Integer.parseInt(next());
        }
 
        public long nextLong() {
            return Long.parseLong(next());
        }
         
        public boolean hasNext() {
            try {
                String string = reader.readLine();
                if (string == null) {
                    return false;
                }
                tokenizer = new StringTokenizer(string);
                return tokenizer.hasMoreTokens();
            } catch (IOException e) {
                return false;
            }
        }
 
    }
}
 
           

理論上這種做法送出上述的例題傳回的結果應該是TLE,但實際傳回的是MLE(尴尬hhh),因為這種做法的空間複雜度也是n^2級别的。

淺談求最長公共子串LCP的幾種做法

做法2:hash+二分

可以考慮多hash因為本題中串的長度過大,單hash的話很容易産生沖突。在對兩個串分别完成hash之後,我們可以二分LCP的長度len,然後再對答案進行check。簡單提一下check的做法,首先我們記錄下S中長度為len的子串的hash值,然後再枚舉T串中長度為len的子串的hash值,看是否存在hash值相等的情況。

淺談求最長公共子串LCP的幾種做法

代碼:

import java.io.OutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.HashSet;
import java.util.StringTokenizer;
import java.io.IOException;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.InputStream;
 
/**
 * Built using CHelper plug-in
 * Actual solution is at the top
 */
public class Main {
    public static void main(String[] args) {
        InputStream inputStream = System.in;
        OutputStream outputStream = System.out;
        InputReader sc = new InputReader(inputStream);
        PrintWriter out = new PrintWriter(outputStream);
        Task solver = new Task();
        solver.solve(1, sc, out);
        out.close();
    }
 
    static class Task {
        public HashSet<Long> set1=new HashSet<>();
        public HashSet<Long> set2=new HashSet<>();
         
        public boolean check(int len,Hash hash1,Hash hash2,int n) {
            set1.clear();
            set2.clear();
            for(int i=0;i+len-1<n;i++) {
                set1.add(hash1.get(0, i, i+len-1));
                set2.add(hash1.get(1, i, i+len-1));
            }
            for(int i=0;i+len-1<n;i++) {
                if(set1.contains(hash2.get(0, i, i+len-1))&&set2.contains(hash2.get(1, i, i+len-1)))
                    return true;
            }
            return false;
        }
         
        public void solve(int testNumber, InputReader sc, PrintWriter out) {
            int n=sc.nextInt();
            String s=sc.next();
            String t=sc.next();
            Hash hash1=new Hash(s);
            Hash hash2=new Hash(t);
             
            int l=1;
            int r=n;
            int ans=0;
            while(l<=r) {
                int mid=(l+r)>>1;
                if(check(mid,hash1,hash2,n)) {
                    ans=mid;
                    l=mid+1;
                }else
                    r=mid-1;
            }
            out.println(ans);
        }
    }
     
    static class Hash{
        public static final long[] SEED=new long[] {31,37};
        public static final long[] MOD=new long[] {(long) (1e9+7),998244353};
        long[][] pow;
        long[][] v;
        char[] res;
         
        public Hash(String s) {
            this.res=(" "+s).toCharArray();
            this.pow=new long[MOD.length][s.length()+1];
            this.v=new long[MOD.length][s.length()+1];
            this.init();
        }
         
        public void init() {
            pow[1][0]=pow[0][0]=1;
            for(int i=0;i<MOD.length;i++) {
                for(int j=1;j<res.length;j++) {
                    pow[i][j]=pow[i][j-1]*SEED[i]%MOD[i];
                    v[i][j]=v[i][j-1]*SEED[i]+res[j]-'a'+1;  //防止"aaaaaa"這樣的串出現錯誤
                    v[i][j]%=MOD[i];
                }
            }
        }
         
        public long get(int index,int l,int r) {            //此處的l,r下标從0開始
            l++;
            r++;
            long temp=(v[index][r]-v[index][l-1]*pow[index][r-l+1]%MOD[index]+MOD[index])%MOD[index];             //H(T)=(H(S+T)-H(S)*base^(T_length)+MOD)%MOD
            return (temp+MOD[index])%MOD[index];
        }
    }
     
     
    static class InputReader {
        public BufferedReader reader;
        public StringTokenizer tokenizer;
 
        public InputReader(InputStream stream) {
            reader = new BufferedReader(new InputStreamReader(stream), 32768);
            tokenizer = null;
        }
 
        public String next() {
            while (tokenizer == null || !tokenizer.hasMoreTokens()) {
                try {
                    tokenizer = new StringTokenizer(reader.readLine());
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
            return tokenizer.nextToken();
        }
 
        public int nextInt() {
            return Integer.parseInt(next());
        }
 
        public long nextLong() {
            return Long.parseLong(next());
        }
         
        public boolean hasNext() {
            try {
                String string = reader.readLine();
                if (string == null) {
                    return false;
                }
                tokenizer = new StringTokenizer(string);
                return tokenizer.hasMoreTokens();
            } catch (IOException e) {
                return false;
            }
        }
 
    }
}
 
           

正确通過本題:

淺談求最長公共子串LCP的幾種做法

但由于常數過大的緣故,還是比較慢的。

做法3:字尾數組

淺談求最長公共子串LCP的幾種做法
淺談求最長公共子串LCP的幾種做法

以上截取自IOI2009 國家集訓隊論文《字尾數組——處理字元串的有力工具》

代碼:

import java.io.OutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.StringTokenizer;
import java.io.IOException;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.InputStream;

/**
 * Built using CHelper plug-in
 * Actual solution is at the top
 */
public class Main {
    public static void main(String[] args) {
        InputStream inputStream = System.in;
        OutputStream outputStream = System.out;
        InputReader sc = new InputReader(inputStream);
        PrintWriter out = new PrintWriter(outputStream);
        Task solver = new Task();
        solver.solve(1, sc, out);
        out.close();
    }

    static class Task {
        public void solve(int testNumber, InputReader sc, PrintWriter out) {
           while(sc.hasNext()) {
        	   int n=sc.nextInt();
        	   String s=sc.next();
               String t=sc.next();
               String temp=s+"#"+t;
               SuffixArray SA=new SuffixArray(temp);
               SA.getSA();
               SA.getHeight();
               System.out.println(SA.getAns(s.length()));
           }
        }
    }
    
    static class SuffixArray{
    	public static int Max=(int) (1e5+10);
		public char[] s;
		public int[] x;    //x[i]是第i個元素的第一關鍵字(值)
		public int[] y;    //y[i]表示第二關鍵字排名為i的數,第一關鍵字的位置(下标)
		public int[] c;    //桶
		public int[] sa;   //表示字典序第i個字尾的起始下标
		public int[] height;  //height[i]=suffix(sa[i-1])和 suffix(sa[i])的最長公共字首    排名為i和i-1的字尾的最長公共字首
		public int[] rank;   //起始位置的下标為i的字尾的排名
		public int[][] ST;
		
		public SuffixArray(String res) {
			s=(" "+res).toCharArray();
			x=new int[s.length+1];
			y=new int[s.length+1];
			c=new int[Math.max(Max, s.length+1)];
			sa=new int[s.length+1];
			height=new int[s.length+1];
			rank=new int[s.length+1];
			//ST=new int[s.length+1][30];
		}
		
		public void getSA() {
			int n=s.length-1;
			int m=200;              //字元個數    排序的過程會發生改變
			for(int i=1;i<=n;i++)
				c[x[i]=s[i]]++;
			for(int i=2;i<=m;i++)
				c[i]+=c[i-1];    做c的字首和,我們就可以得出每個關鍵字最多是在第幾名
			for(int i=n;i>=1;i--)
				sa[c[x[i]]--]=i;
			for(int k=1;k<=n;k<<=1) {        
				int num=0;
				for(int i=n-k+1;i<=n;i++)   //y[i]表示第二關鍵字排名為i的數,第一關鍵字的位置	
					y[++num]=i;             //第n-k+1到第n位是沒有第二關鍵字的 是以排名在最前面
				for(int i=1;i<=n&&num<n;i++) {
					if(sa[i]>k)
						y[++num]=sa[i]-k;     //如果滿足(sa[i]>k) 那麼它可以作為别人的第二關鍵字,就把它的第一關鍵字的位置添加進y就行了
				}
				for(int i=0;i<=m;i++)
					c[i]=0;
				for(int i=1;i<=n;i++)
					c[x[i]]++;
				for(int i=2;i<=m;i++)
					c[i]+=c[i-1];    //第一關鍵字排名為1~i的數有多少個
				for(int i=n;i>=1;i--) {    //因為y的順序是按照第二關鍵字的順序來排
					sa[c[x[y[i]]]--]=y[i];  //第二關鍵字靠後的,在同一個第一關鍵字桶中排名越靠後
					y[i]=0;
				}
				
				int[] temp=x;       //此時y中存的是值
				x=y;
				y=temp;
				
				x[sa[1]]=1;    //對關鍵字(值)重新編号
				num=1;
				for(int i=2;i<=n;i++) {
					 x[sa[i]]=(y[sa[i]]==y[sa[i-1]] && y[sa[i]+k]==y[sa[i-1]+k]) ? num : ++num;    
				}
				if(num==n)      //排序完成 ,所有關鍵字已經兩兩不同
					break;
				m=num;    
			}
		}
		
		public void getHeight() {
			int k=0;
			int n=s.length-1;
			for(int i=1;i<=n;i++)
				rank[sa[i]]=i;
			for (int i=1; i<=n; ++i) {                       //h[i]=height[rank[i]],也就是 suffix(i)和在它前一名的字尾的最長公共字首。
		        if (rank[i]==1) continue;//第一名height為0
		        if (k>0) 
		        	--k;//h[i]>=h[i-1]-1;
		        int j=sa[rank[i]-1];
		        while (j+k<=n && i+k<=n && s[i+k]==s[j+k])   //增量法  每次求的都是目前的h[i]
		        	++k;
		        height[rank[i]]=k;//h[i]=height[rank[i]];
		    }
		}
		
		public int getAns(int n) {
			int ans=0;
			
			for(int i=2;i<height.length;i++) {
				if((sa[i]<=n&&sa[i-1]>n+1)||(sa[i-1]<=n&&sa[i]>n+1))
					ans=Math.max(ans, height[i]);
			}
			return ans;
		}
		
	/*	public void initLCP() {
			for(int i=0;i<ST.length;i++)
				Arrays.fill(ST[i], Integer.MAX_VALUE);
			for(int i=2;i<ST.length;i++)
				ST[i][0]=height[i];
			int t=(int) (Math.log((double)height.length-1)/Math.log(2.0)+1);
			for(int j=1;j<t;j++) {
				for(int i=1;i+(1<<j)-1<ST.length;i++)
					ST[i][j]=Math.min(ST[i][j-1], ST[i+(1<<(j-1))][j-1]);
			}
		}
		
		public int getLCP(int l,int r) {
			if(l==r)
				return s.length-l;
			l++;
			int k=(int) (Math.log((double)(r-l+1))/Math.log(2.0));
			return Math.min(ST[l][k], ST[r-(1<<k)+1][k]);
		}*/
	}

    static class InputReader {
        public BufferedReader reader;
        public StringTokenizer tokenizer;

        public InputReader(InputStream stream) {
            reader = new BufferedReader(new InputStreamReader(stream), 32768);
            tokenizer = null;
        }

        public String next() {
            while (tokenizer == null || !tokenizer.hasMoreTokens()) {
                try {
                    tokenizer = new StringTokenizer(reader.readLine());
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
            return tokenizer.nextToken();
        }

        public int nextInt() {
            return Integer.parseInt(next());
        }

        public long nextLong() {
            return Long.parseLong(next());
        }
        
        public boolean hasNext() {
            try {
                String string = reader.readLine();
                if (string == null) {
                    return false;
                }
                tokenizer = new StringTokenizer(string);
                return tokenizer.hasMoreTokens();
            } catch (IOException e) {
                return false;
            }
        }

    }
}
           
淺談求最長公共子串LCP的幾種做法

可以看到這種做法要前面遠優于兩種做法。

繼續閱讀