找出兩個字元串中,連續相同的字元個數最少為min的字元串
(具體涉及一些不能公布的,還是不說了,有更好的算法,請拍磚讨論)
主要是這個函數--Compare2Str(String str1, String str2, int min),其他不用管
-----------------------------似乎還有點問題!等待明天繼續調試
package com.ssj.test;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
public class CompareString {
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
String contig1 = "AAGAAACGTATCCGTGTTTACATGGATGGATGTTTCGATCTCATGCATTATGGACACGCA"+
"AATGCTTTAAGACAAGCTAAAGCTTTAGGAGATGAACTAGTAGTTGGAATTGTAAGTGAT"
+"GAAGAAATCATCAAGAACAAAGGTCCTCCTGTTTTATCAATGGAGGAAAGATTGGCACTT"
+"GTTAGTGGATTGAAGTGGGTTGATGAAGTTATTGCTAATGCACCATATGCTATTACCGAA"
+"GACTTCATGAACAGTCTATTTAAAGAACATAAGATTGATTATATCATTCATGGAGATGAT"
+"CCTTGTTTGCTTCCTGATGGAAGTGATGCATATGCTTTAGCAAAAAAAGTCGGTCGTTAC"
+"AAGCAAATTAAACGTACAGAAGGCGTATCAAGCACCGACATTGTAGGAAGGATACTTGCA"
+"TCCATGGAAGATAAAGAAGTATGTGAAGTTAATGGAGAAAGTAATGAAATGAATAAAAAT"
+"TTGGACAGCCATTTCAAGGCCAAACATGCCTCTAATTTTTTGCCTACATCAAGAAGAATT"
+"GTTCAGTTT";
String EL372564 = "AAGAAACGTATCCGTGTGTACATGGATGGATGTTTTGATCTCATGCATTATGGTCACGCAAATGCTTTAAGACAAGCTAAAGCTTTAGGAGACGAATTAGTGGTTGGAATTGTAAGTGATGAAGAAATCATCAAGAACAAAGGTCCTCCTGTTTTATCAATGGAGGAAAGATTGGCACTTGTTAGTGGATTGAAGTGGGTTGATGAAGTTATTGCTAATGCACCTTATGCTATTACTGAAGACTTCATGAACAGTCTATTTAAAGAACATAAGATTGATTATATCATTCATGGAGATGATCCTTGTTTGCTTCCTGATGGAAGTGATGCATATGCTTTAGCAAAAAAA";
String EL371249 = "AAGAAACGTATCCGTGTGTACATGGATGGATGTTTTGATCTCATGCATTATGGTCACGCAAATGCTTTAAGACAAGCTAAAGCTTTAGGAGACGAATTAGTGGTTGGAATTGTAAGTGATGAAGAAATCGTCAAGAACAAAGGTCCTCCTGTTTTATCAATGGAGGAAAGATTGGCACTTGTTAGTGGATTGAAGTGGGTTGATGAAGTTATTGCTAATGCACCTTATGCTATTACTGAAGACTTCATGAACAGTCTATTTAAAGAACATAAGATTGATTATATCATTCATGGAGATGATCCTTGTTTGCTTCCTGATGGAAGTGATGCATATGCTTTAGCAAAAAAA";
String EL347301 = "AAGAAACGTATCCGTGTGTACATGGATGGATGTTTTGATCTCATGCATTATGGTCACGCAAATGCTTTAAGACAAGCTAAAGCTTTAGGAGACGAATTAGTGGTTGGAATTGTAAGTGATGAAGAAATCATCAAGAACAAAGGTCCTCCTGTTTTATCAATGGAGGAAAGATTGGCACTTGTTAGTGGATTGAAGTGGGTTGATGAAGTTATTGCTAATGCACCTTATGCTATTACTGAAGACTTCATGAACAGTCTATTTAAAGAACATAAGATTGATTATATCATTCATGGAGATGATCCTTGTTTGCTTCCTGATGGAAGTGATGCATATGCTTTAGCAAAAAAAGTTGGTCGTTACAAACAAATTAAACGTACAGAAGGCGTATCAAGCACCGACATTGTAGGAAGGATACTTGCATCCATGGAAGAT";
String EH685069 = "AAGAAACGTATCCGTGTTTACATGGATGGATGTTTCGATCTCATGCATTATGGACACGCAAATGCTTTAAGACAAGCTAAAGCTTTAGGAGATGAACTAGTAGTTGGAATTGTAAGTGATGAAGAAATCATCAAGAACAAAGGTCCTCCTGTTTTATCAATGGAGGAAACATTGGCACTTGTTAGTGGATTGAAGTGGGTTGATGAAGTTATTGCTAATGCACCATATGCTATTACCGAAGACTTCATGAACAGTCTATTTAAAGAACATAAGATTGATTATATCATTCATGGAGATGATCCTTGTCTGCTTCCTGATGGAAGTGATGCATATGCTTTAGCAAAAAAAGTCGGTCGTTACAAGCAAATTAAACGTACAGAAAGCGTATCAAGCACCGACATTGTAGGAAGGGTACTTGCATCCATGGAAGATAAAGAACTATGTGAA";
// String str3 = "AAGAAACGTATCCGTGT--TTACATGGATGGATGTTTCGATCTCATGCATTATGGACACGCAAATGC";
System.out.println("--------contig1 and EL372564---------");
CompareTest(contig1, EL372564);
System.out.println("\n--------EL371249 and EL372564---------");
CompareTest(EL371249, EL372564);
System.out.println("\n--------EL372564 and EL347301---------");
CompareTest(EL372564, EL347301);
System.out.println("\n--------contig1 and EH685069---------");
CompareTest(contig1, EH685069);
//System.out.println("AAGAAACGTATCCGTGTTTACATGGATGGATGTTTCGATCTCATGCATTATGGACACGCA".length());
String contig = "TNNNNNNNNNNNNNNNNNNNNNACTTATGCACAGACCAACCTGAATGTTGACGTGCTCCA"
+"CATTTCCTACCTATCTTACACTTACCATAATGGGTGGAATATGTTCTTTCTAAAGTATTT"
+"TCATTAGGTTTTAAAGCCCACATGCATCTTAAAGAAAATGTAAACCACATGTTTGACATT"
+"CATTTACACTTAACTACTTTTTTTCCTATAAAAAAGAAAAATGTAAATGTGATGTTCAAA"
+"TTGCCAATTTGAGAGCCATATTGTGTACATTAAAATTAGTGGCATTTCAAGTCATTTTTC"
+"TTTCCATGGAATTATTACAAAGGAAAATGCCTTTGAACAATAGAAAGTCATGGATCTATC"
+"CCAACTCTAAATAAGTGGTATTTTAGTGGGTTTTAAACATGGTTAAAGTTCTTATCCCAG"
+"AGCTCTAAACCACAAAGCCCAGTTATATTGGTTCTCTTCATCTGTCAGAGGTAAAAACAG"
+"AGGGTATTGATCTTTTATTTATCTTTGAAGCTTTAAAGTATAATTAGTTAA"+ "AGAAAAAGT"
+"TTCTGTACTTAGAACATCTCGGTTCTAGCCTTGACTTATGCTAGAAACAAGGATTATATC"
+"TGTATTAGTCTGGTCATACACTGTTATAAAGAACTACCTGAGACTGGATAATTTATAAAG"
+"AAAAGAGGTTTAATTAGCTTATGGTTCCACAGGCTGTACAGGAAGCATGCCTGGGGAGGC"
+"CTTAAGAAACTTACAATCATGGCAGAATGTGAAGAGGAAGAAAGCATGTTTTCTCATGGC"
+"CAGAGCAAGAGGAAGAGAGAGAAGGGGGAGGTGCTACTCCACTTTTAANGCAACCCAGAT"
+"CTCATGAGAACTCCACTCCATTGTGCACAAAAATGGCCAAGGGCGGAAATTTCCACCCTC"
+"GTGATCNNCAATCACCTCCCACCAGGCCCCTCCTCTATTATTGGGGATTACAATTTGGCA"
+"TGAGATCTGGGTGAGGACACAAATCCAAACCATATCAACATCCCTACTCTACTTACTTTA"
+"TAAACTTGGTGGGAGAATCAAGTGGCATGGGGATGAAAGTCAATGGATACCGGAAAGAGG"
+"ACTAATCCATGGCTGAAAGGGGGGTATCCAATTACACCAAGGCTTTACAGGGGAAATATA"
+"CAGAGGTTAAAACAAAGGTTTGGTCTTTCCAAAA";
String ENV1180 = "TNNNNNNNNNNNNNNNNNNNNNACTTATGCACAGACCAACCTGAATGTTGACGTGCTCCA"
+"CATTTCCTACCTATCTTACACTTACCATAATGGGTGGAATATGTTCTTTCTAAAGTATTT"
+"TCATTAGGTTTTAAAGCCCACATGCATCTTAAAGAAAATGTAAACCACATGTTTGACATT"
+"CATTTACACTTAACTACTTTTTTTCCTATAAAAAAGAAAAATGTAAATGTGATGTTCAAA"
+"TTGCCAATTTGAGAGCCATATTGTGTACATTAAAATTAGTGGCATTTCAAGTCATTTTTC"
+"TTTCCATGGAATTATTACAAAGGAAAATGCCTTTGAACAATAGAAAGTCATGGATCTATC"
+"CCAACTCTAAATAAGTGGTATTTTAGTGGGTTTTAAACATGGTTAAAGTTCTTATCCCAG"
+"AGCTCTAAACCACAAAGCCCAGTTATATTGGTTCTCTTCATCTGTCAGAGGTAAAAACAG"
+"AGGGTATTGATCTTTTATTTATCTTTGAAGCTTTAAAGTATAATTAGTTAA" + "GGAAAAGTT"
+"CTGTACTTAGAACATCTCGGTTCTAGCCTTGACTTATGCTAGAAACAAGGATTATATCTG"
+"TATTAGTCTGGTCTACACTGTTATAAAGAACTACCTG";
String ENV446 = "CCXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTATCCAACTCTAAATAAGTGGTATTT"
+"AGTGGGTTTTAAACATGGTTAAAGTTCTTATCCCAGAGCTCTAAACCACAAAGCCCAGTT"
+"ATATTTGTTCTCTTCATCTGTCAGAGGTAAAAACAGAGGGTATTGATCTTTTATTTATCT"
+"TTGAAGCTTTAAAGTATAATTTGTTCAAGGAAAAGTTTCTGTACTTAGAACATCTCGGTT"
+"CTAGCCTTGACTTATGCTAGAAACAAGGATTATATCTGTATTAGTCTGTTCATACACTGT"
+"TATAAAGAACTACCTGAGACTGGATAATTTATAAAGAAAAGAGGTTTAATTAGCTTATGG"
+"TTCCACAGGCTGTACAGGAAGCATGCCTGGGGAGGCCTTAAGAAACTTACAATCATGGCA"
+"GAATGTGAAGAGGAAGAAAGCATGTTTTCTCATGGCCAGAGCAAGAGGAAGAGAGAGAAG"
+"GGGGAGGTGCTACTCCACTTTTAANGCAACCCAGATCTCATGAGAACTCCACTCCATTGT"
+"GCACAAAAATGGCCAAGGGCGGAAATTTCCACCCTCGTGATCNNCAATCACCTCCCACCA"
+"GGCCCCTCCTCTATTATTGGGGATTACAATTTGGCATGAGATCTGGGTGAGGACACAAAT"
+"CCAAACCATATCAACATCCCTACTCTACTTACTTTATAAACTTGGTGGGAGAATCAAGTG"
+"GCATGGGGATGAAAGTCAATGGATACCGGAAAGAGGACTAATCCATGGCTGAAAGGGGGG"
+"TATCCAATTACACCAAGGCTTTACAGGGGAAATATACAGAGGTTAAAACAAAGGTTTGGT"
+"CTTTCCAAAA";
System.out.println("\n--------contig and ENV1180---------");
CompareTest(ENV1180, contig);
String str = "";
// Compare2Str("abcdbce","cbce",2);
Compare2Str(ENV446,ENV1180,20);
// System.out.println(ENV446.indexOf());
}
public static void CompareTest(String str1, String str2){
int len1 = str1.length();
int len2= str2.length();
StringBuffer strb = new StringBuffer();
int k = (len1>len2 ? len2 : len1);
//System.out.println("k = "+k);
// int num = 0;
for (int i = 0; i<k; i++){
if(str1.charAt(i) == str2.charAt(i)){
strb.append(str1.charAt(i));
}
else {
// num = i;
break;
}
}
System.out.println("the num of the same char : "+strb.length());
System.out.println("the same String is : " +strb.toString());
// System.out.println("the arrays1 is : " +strb.toString());
// if(strb.toString().equals(str1.substring(0,num))){
// System.out.println("Very Good!!!");
// }
}
public static void Compare2Str(String str1, String str2, int min){
//找出兩個字元串中,連續相同的字元個數最少為min的字元串
int len1 = str1.length();
int len2= str2.length();
StringBuffer strb = new StringBuffer();
String maxStr, minStr;
int minLen = 0;
int maxLen = 0;
if(len1 > len2){
maxLen = len1;
minLen = len2;
maxStr = str1;
minStr = str2;
}
else{
maxLen = len2;
minLen = len1;
maxStr = str2;
minStr = str1;
}
List strList = new ArrayList();
char c1, c2;
for (int i=0; i<maxLen; i++){
StringBuffer tempStr = new StringBuffer("");
c1 = maxStr.charAt(i);
for(int j=0; j<minLen; j++){
c2 = minStr.charAt(j);
while(c1 == c2){
tempStr.append(c1);
if(++i<maxLen && ++j<minLen){
c1 = maxStr.charAt(i);
c2 = minStr.charAt(j);
}
else{
break;
}
}
if(tempStr.length()>=min){
strList.add(tempStr);
}
}
}
Iterator iter = strList.iterator();
while(iter.hasNext()){
StringBuffer s = (StringBuffer)iter.next();
System.out.println("-------" + s.toString());
}
}
public static void isHaveSubString(String str1, String str2){
if(str1.startsWith(str2)){
System.out.println("---OK--");
}
}
}