天天看點

字元串操作大全:面試準備和日常編碼所需一文打盡!

字元串操作大全:面試準備和日常編碼所需一文打盡!

全文共14056字,預計學習時長36分鐘

字元串操作大全:面試準備和日常編碼所需一文打盡!

圖源:unsplash

字元串是一系列字元,由常數或變量構成。它是程式設計語言中必不可少的資料類型。本文中将重點關注JavaScript字元串操作,但其原理和算法也可應用于其他語言。

參加技術面試時,面試官常常會關注以下内容:

·        程式設計技術

·        語言能力

·        解題技巧

本文不僅可以讓你成功通過技術面試,對日常編碼也很有用。代碼要點格式中,我們列出了JavaScript字元串的幾點重要特性,這是程式設計技能的基礎。其中包括存在了20年的屬性和方法,也涵蓋ES2021中的特性。如有不清楚之處,可以有針對性地查漏補缺。JavaScript程式設計語言可以解決許多應用問題。這些算法或其變體,經常出現在真實的面試場景中。

字元串操作大全:面試準備和日常編碼所需一文打盡!

字元串屬性和方法

字元串用于表示和操作字元序列。字元串屬性和方法有很多。以下是可供參考的代碼示例,包括ES2020中的“matchAll”和ES2021中的“replaceAll”。

const str = Today is a nice day! ;        console.log(str.length); // 20      console.log(str[2]); // "d"      console.log(typeof str); // "string"      console.log(typeof str[2]); // "string"      console.log(typeofString(5)); //"string"      console.log(typeofnewString(str)); //"object"      console.log(str.indexOf( is )); // 6      console.log(str.indexOf( today )); // -1      console.log(str.includes( is )); // true      console.log(str.includes( IS )); // false      console.log(str.startsWith( Today )); // true      console.log(str.endsWith( day )); // false      console.log(str.split(   )); // ["Today", "is", "a", "nice","day!"]      console.log(str.split(  )); // ["T", "o", "d", "a","y", " ", "i", "s", " ","a", " ", "n", "i", "c","e", " ", "d", "a", "y","!"]      console.log(str.split( a )); // ["Tod", "y is ", " nice d","y!"]      console.log(str +1+2); // "Today is a nice day!12"      console.log(str + str); // "Today is a nice day!Today is a niceday!"      console.log(str.concat(str)); // "Today is a nice day!Today is a niceday!"      console.log(str.repeat(2)); // "Today is a nice day!Today is a nice day!"      console.log( abc < bcd ); // true      console.log( abc .localeCompare( bcd )); // -1      console.log( a .localeCompare( A )); // -1      console.log( a .localeCompare( A , undefined, { numeric: true })); // -1      console.log( a .localeCompare( A , undefined, { sensitivity:  accent  })); // 0      console.log( a .localeCompare( A , undefined, { sensitivity:  base  })); // 0      console.log( a .localeCompare( A! , undefined, { sensitivity:  base , ignorePunctuation: true })); // 0      console.log( abc .toLocaleUpperCase()); // "ABC"      console.log(str.padStart(25,  * )); // "*****Todayis a nice day!"      console.log(str.padEnd(22,  ! )); // "Today is anice day!!!"      console.log(    middle  .trim().length); // 6      console.log(    middle  .trimStart().length); // 8      console.log(    middle  .trimEnd().length); // 9      console.log(str.slice(6, 8)); // "is"      console.log(str.slice(-4)); // "day!"      console.log(str.substring(6, 8)); // "is"      console.log(str.substring(-4)); // "Today is a nice day!"      console.log( a .charCodeAt()); // 97      console.log(String.fromCharCode(97)); // "a"      console.log(str.search(/[a-c]/)); // 3      console.log(str.match(/[a-c]/g)); // ["a", "a", "c", "a"]      console.log([...str.matchAll(/[a-c]/g)]);      // [Array(1), Array(1), Array(1), Array(1)]      // 0: ["a", index: 3, input: "Today is a nice day!",groups: undefined]      // 1: ["a", index: 9, input: "Today is a nice day!",groups: undefined]      // 2: ["c", index: 13, input: "Today is a niceday!", groups: undefined]      // 3: ["a", index: 17, input: "Today is a niceday!", groups: undefined]      console.log([... test1test2 .matchAll(/t(e)(st(d?))/g)]);      // [Array(4), Array(4)]      // 0: (4) ["test1", "e", "st1","1", index: 0, input: "test1test2", groups: undefined]      // 1: (4) ["test2", "e", "st2","2", index: 5, input: "test1test2", groups: undefined]      console.log(str.replace( a ,  z )); // Todzy is anice day!      console.log(str.replace(/[a-c]/,  z )); // Todzy is anice day!      console.log(str.replace(/[a-c]/g,  z )); // Todzy is znize dzy!      console.log(str.replaceAll( a ,  z )); // Todzy is znice dzy!      console.log(str.replaceAll(/[a-c]/g,  z )); // Todzy is znize dzy!      console.log(str.replaceAll(/[a-c]/,  z )); // TypeError:String.prototype.replaceAll called with a non-global RegExp argument
           
字元串操作大全:面試準備和日常編碼所需一文打盡!

映射和集合

對于字元串操作,我們需要在某處存儲中間值。數組、映射和集合都是需要掌握的常用資料結構,本文主要讨論集合和映射。

集合

Set是存儲所有類型的唯一值的對象。以下是供參考的代碼示例,一目了然。

const set =newSet( aabbccdefghi );                        console.log(set.size); // 9                       console.log(set.has( d )); // true                       console.log(set.has( k )); // false                       console.log(set.add( k )); // {"a", "b", "c", "d","e" "f", "g", "h", "i","k"}                       console.log(set.has( k )); // true                       console.log(set.delete( d )); // true                       console.log(set.has( d )); // false                       console.log(set.keys()); // {"a", "b", "c","e" "f", "g", "h", "i","k"}                       console.log(set.values()); // {"a", "b", "c","e" "f", "g", "h", "i","k"}                       console.log(set.entries()); // {"a" => "a","b" => "b", "c" => "c","e" => "e",                       // "f"=> "f", "g" => "g", "h" =>"h"}, "i" => "i", "k" =>"k"}                       const set2 =newSet();                       set.forEach(item => set2.add(item.toLocaleUpperCase()));                       set.clear();                       console.log(set); // {}                       console.log(set2); //{"A", "B", "C", "E", "F","G", "H", "I", "K"}                       console.log(newSet([{ a: 1, b: 2, c: 3 }, { d: 4, e: 5 }, { d: 4, e: 5 }]));                       // {{a: 1, b: 2,c: 3}, {d: 4, e: 5}, {d: 4, e: 5}}                       const item = { f: 6, g: 7 };                       console.log(newSet([{ a: 1, b: 2, c: 3 }, item, item]));                       // {{a: 1, b: 2,c: 3}, {f: 6, g: 7}}
           

映射

映射是儲存鍵值對的對象。任何值都可以用作鍵或值。映射會記住鍵的原始插入順序。以下是供參考的代碼示例:

const map =newMap();         console.log(map.set(1,  first )); // {1 =>"first"}      console.log(map.set( a ,  second )); // {1 =>"first", "a" => "second"}      console.log(map.set({ obj:  123  }, [1, 2, 3]));      // {1 => "first", "a" =>"second", {obj: "123"} => [1, 2, 3]}      console.log(map.set([2, 2, 2], newSet( abc )));      // {1 => "first", "a" => "second",{obj: "123"} => [1, 2, 3], [2, 2, 2] => {"a","b", "c"}}      console.log(map.size); // 4      console.log(map.has(1)); // true      console.log(map.get(1)); // "first"      console.log(map.get( a )); // "second"      console.log(map.get({ obj:  123  })); // undefined      console.log(map.get([2, 2, 2])); // undefined      console.log(map.delete(1)); // true      console.log(map.has(1)); // false      const arr = [3, 3];      map.set(arr, newSet( xyz ));      console.log(map.get(arr)); // {"x", "y", "z"}      console.log(map.keys()); // {"a", {obj: "123"}, [2, 2,2], [3, 3]}      console.log(map.values()); // {"second", [1, 2, 3], {"a","b", "c"}, {"x", "y", "z"}}      console.log(map.entries());      // {"a" => "second", {obj: "123"}=> [1, 2, 3], [2, 2, 2] => {"a", "b", "c"},[3, 3] => {"x", "y", "z"}}      const map2 =newMap([[ a , 1], [ b , 2], [ c , 3]]);      map2.forEach((value, key, map) => console.log(`value = ${value}, key = ${key}, map = ${map.size}`));      // value = 1, key = a, map = 3      // value = 2, key = b, map = 3      // value = 3, key = c, map = 3      map2.clear();      console.log(map2.entries()); // {}
           
字元串操作大全:面試準備和日常編碼所需一文打盡!

應用題

面試中有英語應用題,我們探索了一些經常用于測試的算法。

等值線

等值線圖是指所含字母均隻出現一次的單詞。

·        dermatoglyphics (15個字母)

·        hydropneumatics (15個字母)

·        misconjugatedly (15個字母)

·        uncopyrightable (15個字母)

·        uncopyrightables (16個字母)

·        subdermatoglyphic (17個字母)

如何寫一個算法來檢測字元串是否是等值線圖?有很多方法可以實作。可以把字元串放在集合中,然後自動拆分成字元。由于集合是存儲唯一值的對象,如果它是一個等值線圖,它的大小應該與字元串長度相同。

/**    * An algorithm to verify whethera given string is an isogram    * @param {string} str The string to be verified    * @return {boolean} Returns whether it is an isogram    */   functionisIsogram(str) {    if (!str) {       returnfalse;    }           const set =newSet(str);    return set.size=== str.length;   }
           

以下是驗證測試:

console.log(isIsogram(  )); // false                             console.log(isIsogram( a )); // true                            console.log(isIsogram( misconjugatedly )); // true                            console.log(isIsogram( misconjugatledly )); // false
           

全字母短句

全字母短句是包含字母表中所有26個字母的句子,不分大小寫。理想情況下,句子越短越好。以下為全字母短句:

·        Waltz, bad nymph, for quick jigs vex. (28個字母)

·        Jived fox nymph grabs quick waltz. (28個字母)

·        Glib jocks quiz nymph to vex dwarf. (28個字母)

·        Sphinx of black quartz, judge my vow. (29個字母)

·        How vexingly quick daft zebras jump! (30個字母)

·        The five boxing wizards jump quickly. (31個字母)

·        Jackdaws love my big sphinx of quartz. (31個字母)

·        Pack my box with five dozen liquor jugs. (32個字母)

·        The quick brown fox jumps over a lazy dog. (33個字母)

還有很多方法可以驗證給定的字元串是否是全字母短句。這一次,我們将每個字母(轉換為小寫)放入映射中。如果映射大小為26,那麼它就是全字母短句。

/**       * An algorithm to verify whethera given string is a pangram    * @param {string} str The string to be verified    * @return {boolean} Returns whether it is a pangram    */   functionisPangram(str) {    const len = str.length;    if (len <26) {       returnfalse;    }               const map =newMap();    for (let i =0; i < len; i++) {       if (str[i].match(/[a-z]/i)) { // if it is letter a to z, ignoring the case         map.set(str[i].toLocaleLowerCase(), true); // use lower case letter as a key       }    }    return map.size===26;   }
           

以下是驗證測試:

console.log(isPangram(  )); // false                             console.log(isPangram( Bawds jog, flick quartz, vex nymphs. )); // true                            console.log(isPangram( The quick brown fox jumped over the lazy sleepingdog. )); // true                            console.log(isPangram( Roses are red, violets are blue, sugar is sweet,and so are you. )); // false
           

同構字元串

給定兩個字元串s和t,如果可以替換掉s中的字元得到t,那麼這兩個字元串是同構的。s中的所有字元轉換都必須應用到s中相同的字元上,例如,murmur與tartar為同構字元串,如果m被t替換,u被a替換,r被自身替換。以下算法使用數組來存儲轉換字元,也适用于映射。

/**    * An algorithm to verify whethertwo given strings are isomorphic    * @param {string} s The first string    * @param {string} t The second string    * @return {boolean} Returns whether these two strings are isomorphic    */   functionareIsomorphic(s, t) {    // strings with different lengths are notisomorphic    if (s.length !== t.length) {         returnfalse;    }               // the conversion array    const convert = [];    for (let i =0; i < s.length; i++) {         // if the conversioncharacter exists         if (convert[s[i]]) {             // apply the conversion and compare             if (t[i] === convert[s[i]]) { // so far so good                 continue;             }             returnfalse; // not isomorphic         }                   // set the conversion character for future use         convert[s[i]] = t[i];    }               // these two strings are isomorphic since there are no violations    returntrue;   };
           

以下是驗證測試:

console.log(areIsomorphic( atlatl ,  tartar )); // true                                    console.log(areIsomorphic( atlatlp ,  tartarq )); // true                                    console.log(areIsomorphic( atlatlpb ,  tartarqc )); // true                                    console.log(areIsomorphic( atlatlpa ,  tartarqb )); // false
           

相同字母異構詞

相同字母異構詞是通過重新排列不同單詞的字母而形成的單詞,通常使用所有原始字母一次。從一個池中重新排列單詞有很多種可能性。例如,cat的相同字母異構詞有cat、act、atc、tca、atc和tac。我們可以添加額外的要求,即新單詞必須出現在源字元串中。如果源實際上是actually,則結果數組是[“act”]。

/**    * Given a pool to compose ananagram, show all anagrams contained (continuously) in the source    * @param {string} source A source string to draw an anagram from    * @param {string} pool A pool to compose an anagram    * @return {array} Returns an array of anagrams that are contained by the source string    */   functionshowAnagrams(source, pool) {    // if source is not long enough to hold theanagram    if (source.length< pool.length) {       return [];    }               const sourceCounts = []; // an array tohold the letter counts in source    const poolCounts = []; // an array tohold the letter counts in pool               // initialize counts for 26 letters to be 0    for (let i =0; i <26; i++) {       sourceCounts[i] =0;       poolCounts[i] =0;    }               // convert both strings to lower cases    pool = pool.toLocaleLowerCase();    const lowerSource = source.toLocaleLowerCase();               for (let i =0; i < pool.length; i++) {        // calculatepoolCounts for each letter in pool, mapping a - z to 0 - 25       poolCounts[pool[i].charCodeAt() -97]++;    }               const result = [];    for (let i =0; i < lowerSource.length; i++) {       // calculatesourceCounts for each letter for source, mapping a - z to 0 - 25       sourceCounts[lowerSource[i].charCodeAt() -97]++;       if (i >= pool.length-1) { // if source islong enough         // if sourceCountsis the same as poolCounts         if (JSON.stringify(sourceCounts) ===JSON.stringify(poolCounts)) {           // save the found anagram, using the original source to make stringcase-preserved           result.push(source.slice(i - pool.length+1, i +1));         }         // shift thestarting window by 1 index (drop the current first letter)         sourceCounts[lowerSource[i - pool.length+1].charCodeAt() -97]--;       }    }          // removeduplicates by a Set    return [...newSet(result)];   }
           

以下是驗證測試:

console.log(showAnagrams( AaaAAaaAAaa ,  aa )); // ["Aa", "aa", "aA", "AA"]                                        console.log(showAnagrams( CbatobaTbacBoat ,  Boat ));  //["bato", "atob", "toba", "obaT","Boat"]                                        console.log(showAnagrams( AyaKkayakkAabkk ,  Kayak ));                                        // ["AyaKk", "yaKka", "aKkay", "Kkaya","kayak", "ayakk", "yakkA"]
           

回文

回文是從前往後讀和從後往前讀讀法相同的單詞或句子。有很多回文,比如A,Bob,還有 “A man, a plan, a canal — Panama”。檢查回文的算法分為兩種。使用循環或使用遞歸從兩端檢查是否相同。下列代碼使用遞歸方法:

/**    * An algorithm to verify whethera given string is a palindrome    * @param {string} str The string to be verified    * @return {boolean} Returns whether it is a palindrome    */   functionisPalindrome(str) {    functioncheckIsPalindrome(s) {       // empty stringor one letter is a defecto palindrome       if (s.length<2) {         returntrue;       }                 if ( // if two ends notequal, ignoring the case         s[0].localeCompare(s[s.length-1], undefined, {           sensitivity:  base ,         }) !== 0       ) {         returnfalse;       }                 // since two ends equal, checking the inside       returncheckIsPalindrome(s.slice(1, -1));    }        // check whether it is a palindrome, removing noneletters and digits    returncheckIsPalindrome(str.replace(/[^A-Za-z0-9]/g,   ));   }
           

以下是驗證測試:

console.log(isPalindrome(  )); // true                               console.log(isPalindrome( a )); // true                               console.log(isPalindrome( Aa )); // true                               console.log(isPalindrome( Bob )); // true                               console.log(isPalindrome( Odd or even )); // false                               console.log(isPalindrome( Never odd or even )); // true                               console.log(isPalindrome( 02/02/2020 )); // true                               console.log(isPalindrome( 2/20/2020 )); // false                               console.log(isPalindrome( A man, a plan, a canal – Panama )); // true
           

回文面試題有很多不同的變形題,下面是一個在給定字元串中尋找最長回文的算法。

/**                                           * An algorithm to find thelongest palindrome in a given string    * @param {string} source The source to find the longest palindrome from    * @return {string} Returns the longest palindrome    */   functionfindLongestPalindrome(source) {    // convert to lower cases and only keep lettersand digits    constlettersAndDigits = source.replace(/[^A-Za-z0-9]/g,   );    const str = lettersAndDigits.toLocaleLowerCase();               const len = str.length;              // empty string or one letter is a defecto palindrome    if (len <2) {       return str;    }               // the first letter is the current longest palindrome    let maxPalindrome = lettersAndDigits[0];               // assume that the index is the middle of a palindrome    for (let i =0; i < len; i++) {                 // try the case that the palindrome has one middle       for (         let j =1; // start with onestep away (inclusive)         j < len &&// end with the len end (exclusive)         i - j >= 0&&// cannot pass the start index (inclusive)         i + j < len &&// cannot exceed end index (exclusive)         Math.min(2 * i +1, 2 * (len - i) -1) > maxPalindrome.length; // potential max length should be longer than thecurrent length         j++       ) {         if (str[i - j] !== str[i + j]) { // if j stepsbefore the middle is different from j steps after the middle           break;         }         if (2 * j +1> maxPalindrome.length) { // if it is longerthan the current length           maxPalindrome = lettersAndDigits.slice(i - j, i + j +1); // j steps before, middle, and j steps after         }       }                 // try the case that the palindrome has two middles       if (i < len -1&& str[i] === str[i +1]) { // if two middles are the same         if (maxPalindrome.length<2) { // the string withtwo middles could be the current longest palindrome           maxPalindrome = lettersAndDigits.slice(i, i +2);         }         for (           let j =1; // start with one step away (inclusive)           j < len -1&&// end with the len - 1 end (exclusive)           i - j >= 0&&// cannot pass the start index (inclusive)           i + j +1< len &&// cannot exceed end index (exclusive)           Math.min(2 * i +2, 2 * (len - i)) > maxPalindrome.length; // potential max length should be longer than thecurrent length           j++         ) {           if (str[i - j] !== str[i + j +1]) { // if j stepsbefore the left middle is different from j steps after the right middle             break;           }           if (2 * j +2> maxPalindrome.length) { // if it is longer than the current length             maxPalindrome = lettersAndDigits.slice(i - j, i + j +2); // j steps before, middles, and j steps after           }         }       }    }    return maxPalindrome;   }            
           

以下是驗證測試:

console.log(findLongestPalindrome(  )); // ""                                         console.log(findLongestPalindrome( abc )); // "a"                                        console.log(findLongestPalindrome( Aabcd )); // "Aa"                                        console.log(findLongestPalindrome( I am Bob. )); // "Bob"                                        console.log(findLongestPalindrome( Odd or even )); // "Oddo"                                        console.log(findLongestPalindrome( Never odd or even )); // "Neveroddoreven"                                        console.log(findLongestPalindrome( Today is 02/02/2020. )); // "02022020"                                        console.log(findLongestPalindrome( It is 2/20/2020. )); // "20202"                                        console.log(findLongestPalindrome( A man, a plan, a canal – Panama )); // "AmanaplanacanalPanama"
           

熟能生巧。享受編碼!

字元串操作大全:面試準備和日常編碼所需一文打盡!

推薦閱讀專題

字元串操作大全:面試準備和日常編碼所需一文打盡!
字元串操作大全:面試準備和日常編碼所需一文打盡!
字元串操作大全:面試準備和日常編碼所需一文打盡!
字元串操作大全:面試準備和日常編碼所需一文打盡!
字元串操作大全:面試準備和日常編碼所需一文打盡!

留言點贊發個朋友圈

我們一起分享AI學習與發展的幹貨

編譯組:張月星、何婧璇

相關連結:

https://medium.com/better-programming/the-technical-interview-guide-to-string-manipulation-92f4c4649cd

如轉載,請背景留言,遵守轉載規範

推薦文章閱讀

EMNLP2017論文集28篇論文解讀

2018年AI三大頂會中國學術成果全連結

ACL2017論文集:34篇解讀幹貨全在這裡