全文共14056字,預計學習時長36分鐘
圖源:unsplash
字元串是一系列字元,由常數或變量構成。它是程式設計語言中必不可少的資料類型。本文中将重點關注JavaScript字元串操作,但其原理和算法也可應用于其他語言。
參加技術面試時,面試官常常會關注以下内容:
· 程式設計技術
· 語言能力
· 解題技巧
本文不僅可以讓你成功通過技術面試,對日常編碼也很有用。代碼要點格式中,我們列出了JavaScript字元串的幾點重要特性,這是程式設計技能的基礎。其中包括存在了20年的屬性和方法,也涵蓋ES2021中的特性。如有不清楚之處,可以有針對性地查漏補缺。JavaScript程式設計語言可以解決許多應用問題。這些算法或其變體,經常出現在真實的面試場景中。
字元串屬性和方法
字元串用于表示和操作字元序列。字元串屬性和方法有很多。以下是可供參考的代碼示例,包括ES2020中的“matchAll”和ES2021中的“replaceAll”。
const str = Today is a nice day! ; console.log(str.length); // 20 console.log(str[2]); // "d" console.log(typeof str); // "string" console.log(typeof str[2]); // "string" console.log(typeofString(5)); //"string" console.log(typeofnewString(str)); //"object" console.log(str.indexOf( is )); // 6 console.log(str.indexOf( today )); // -1 console.log(str.includes( is )); // true console.log(str.includes( IS )); // false console.log(str.startsWith( Today )); // true console.log(str.endsWith( day )); // false console.log(str.split( )); // ["Today", "is", "a", "nice","day!"] console.log(str.split( )); // ["T", "o", "d", "a","y", " ", "i", "s", " ","a", " ", "n", "i", "c","e", " ", "d", "a", "y","!"] console.log(str.split( a )); // ["Tod", "y is ", " nice d","y!"] console.log(str +1+2); // "Today is a nice day!12" console.log(str + str); // "Today is a nice day!Today is a niceday!" console.log(str.concat(str)); // "Today is a nice day!Today is a niceday!" console.log(str.repeat(2)); // "Today is a nice day!Today is a nice day!" console.log( abc < bcd ); // true console.log( abc .localeCompare( bcd )); // -1 console.log( a .localeCompare( A )); // -1 console.log( a .localeCompare( A , undefined, { numeric: true })); // -1 console.log( a .localeCompare( A , undefined, { sensitivity: accent })); // 0 console.log( a .localeCompare( A , undefined, { sensitivity: base })); // 0 console.log( a .localeCompare( A! , undefined, { sensitivity: base , ignorePunctuation: true })); // 0 console.log( abc .toLocaleUpperCase()); // "ABC" console.log(str.padStart(25, * )); // "*****Todayis a nice day!" console.log(str.padEnd(22, ! )); // "Today is anice day!!!" console.log( middle .trim().length); // 6 console.log( middle .trimStart().length); // 8 console.log( middle .trimEnd().length); // 9 console.log(str.slice(6, 8)); // "is" console.log(str.slice(-4)); // "day!" console.log(str.substring(6, 8)); // "is" console.log(str.substring(-4)); // "Today is a nice day!" console.log( a .charCodeAt()); // 97 console.log(String.fromCharCode(97)); // "a" console.log(str.search(/[a-c]/)); // 3 console.log(str.match(/[a-c]/g)); // ["a", "a", "c", "a"] console.log([...str.matchAll(/[a-c]/g)]); // [Array(1), Array(1), Array(1), Array(1)] // 0: ["a", index: 3, input: "Today is a nice day!",groups: undefined] // 1: ["a", index: 9, input: "Today is a nice day!",groups: undefined] // 2: ["c", index: 13, input: "Today is a niceday!", groups: undefined] // 3: ["a", index: 17, input: "Today is a niceday!", groups: undefined] console.log([... test1test2 .matchAll(/t(e)(st(d?))/g)]); // [Array(4), Array(4)] // 0: (4) ["test1", "e", "st1","1", index: 0, input: "test1test2", groups: undefined] // 1: (4) ["test2", "e", "st2","2", index: 5, input: "test1test2", groups: undefined] console.log(str.replace( a , z )); // Todzy is anice day! console.log(str.replace(/[a-c]/, z )); // Todzy is anice day! console.log(str.replace(/[a-c]/g, z )); // Todzy is znize dzy! console.log(str.replaceAll( a , z )); // Todzy is znice dzy! console.log(str.replaceAll(/[a-c]/g, z )); // Todzy is znize dzy! console.log(str.replaceAll(/[a-c]/, z )); // TypeError:String.prototype.replaceAll called with a non-global RegExp argument
映射和集合
對于字元串操作,我們需要在某處存儲中間值。數組、映射和集合都是需要掌握的常用資料結構,本文主要讨論集合和映射。
集合
Set是存儲所有類型的唯一值的對象。以下是供參考的代碼示例,一目了然。
const set =newSet( aabbccdefghi ); console.log(set.size); // 9 console.log(set.has( d )); // true console.log(set.has( k )); // false console.log(set.add( k )); // {"a", "b", "c", "d","e" "f", "g", "h", "i","k"} console.log(set.has( k )); // true console.log(set.delete( d )); // true console.log(set.has( d )); // false console.log(set.keys()); // {"a", "b", "c","e" "f", "g", "h", "i","k"} console.log(set.values()); // {"a", "b", "c","e" "f", "g", "h", "i","k"} console.log(set.entries()); // {"a" => "a","b" => "b", "c" => "c","e" => "e", // "f"=> "f", "g" => "g", "h" =>"h"}, "i" => "i", "k" =>"k"} const set2 =newSet(); set.forEach(item => set2.add(item.toLocaleUpperCase())); set.clear(); console.log(set); // {} console.log(set2); //{"A", "B", "C", "E", "F","G", "H", "I", "K"} console.log(newSet([{ a: 1, b: 2, c: 3 }, { d: 4, e: 5 }, { d: 4, e: 5 }])); // {{a: 1, b: 2,c: 3}, {d: 4, e: 5}, {d: 4, e: 5}} const item = { f: 6, g: 7 }; console.log(newSet([{ a: 1, b: 2, c: 3 }, item, item])); // {{a: 1, b: 2,c: 3}, {f: 6, g: 7}}
映射
映射是儲存鍵值對的對象。任何值都可以用作鍵或值。映射會記住鍵的原始插入順序。以下是供參考的代碼示例:
const map =newMap(); console.log(map.set(1, first )); // {1 =>"first"} console.log(map.set( a , second )); // {1 =>"first", "a" => "second"} console.log(map.set({ obj: 123 }, [1, 2, 3])); // {1 => "first", "a" =>"second", {obj: "123"} => [1, 2, 3]} console.log(map.set([2, 2, 2], newSet( abc ))); // {1 => "first", "a" => "second",{obj: "123"} => [1, 2, 3], [2, 2, 2] => {"a","b", "c"}} console.log(map.size); // 4 console.log(map.has(1)); // true console.log(map.get(1)); // "first" console.log(map.get( a )); // "second" console.log(map.get({ obj: 123 })); // undefined console.log(map.get([2, 2, 2])); // undefined console.log(map.delete(1)); // true console.log(map.has(1)); // false const arr = [3, 3]; map.set(arr, newSet( xyz )); console.log(map.get(arr)); // {"x", "y", "z"} console.log(map.keys()); // {"a", {obj: "123"}, [2, 2,2], [3, 3]} console.log(map.values()); // {"second", [1, 2, 3], {"a","b", "c"}, {"x", "y", "z"}} console.log(map.entries()); // {"a" => "second", {obj: "123"}=> [1, 2, 3], [2, 2, 2] => {"a", "b", "c"},[3, 3] => {"x", "y", "z"}} const map2 =newMap([[ a , 1], [ b , 2], [ c , 3]]); map2.forEach((value, key, map) => console.log(`value = ${value}, key = ${key}, map = ${map.size}`)); // value = 1, key = a, map = 3 // value = 2, key = b, map = 3 // value = 3, key = c, map = 3 map2.clear(); console.log(map2.entries()); // {}
應用題
面試中有英語應用題,我們探索了一些經常用于測試的算法。
等值線
等值線圖是指所含字母均隻出現一次的單詞。
· dermatoglyphics (15個字母)
· hydropneumatics (15個字母)
· misconjugatedly (15個字母)
· uncopyrightable (15個字母)
· uncopyrightables (16個字母)
· subdermatoglyphic (17個字母)
如何寫一個算法來檢測字元串是否是等值線圖?有很多方法可以實作。可以把字元串放在集合中,然後自動拆分成字元。由于集合是存儲唯一值的對象,如果它是一個等值線圖,它的大小應該與字元串長度相同。
/** * An algorithm to verify whethera given string is an isogram * @param {string} str The string to be verified * @return {boolean} Returns whether it is an isogram */ functionisIsogram(str) { if (!str) { returnfalse; } const set =newSet(str); return set.size=== str.length; }
以下是驗證測試:
console.log(isIsogram( )); // false console.log(isIsogram( a )); // true console.log(isIsogram( misconjugatedly )); // true console.log(isIsogram( misconjugatledly )); // false
全字母短句
全字母短句是包含字母表中所有26個字母的句子,不分大小寫。理想情況下,句子越短越好。以下為全字母短句:
· Waltz, bad nymph, for quick jigs vex. (28個字母)
· Jived fox nymph grabs quick waltz. (28個字母)
· Glib jocks quiz nymph to vex dwarf. (28個字母)
· Sphinx of black quartz, judge my vow. (29個字母)
· How vexingly quick daft zebras jump! (30個字母)
· The five boxing wizards jump quickly. (31個字母)
· Jackdaws love my big sphinx of quartz. (31個字母)
· Pack my box with five dozen liquor jugs. (32個字母)
· The quick brown fox jumps over a lazy dog. (33個字母)
還有很多方法可以驗證給定的字元串是否是全字母短句。這一次,我們将每個字母(轉換為小寫)放入映射中。如果映射大小為26,那麼它就是全字母短句。
/** * An algorithm to verify whethera given string is a pangram * @param {string} str The string to be verified * @return {boolean} Returns whether it is a pangram */ functionisPangram(str) { const len = str.length; if (len <26) { returnfalse; } const map =newMap(); for (let i =0; i < len; i++) { if (str[i].match(/[a-z]/i)) { // if it is letter a to z, ignoring the case map.set(str[i].toLocaleLowerCase(), true); // use lower case letter as a key } } return map.size===26; }
以下是驗證測試:
console.log(isPangram( )); // false console.log(isPangram( Bawds jog, flick quartz, vex nymphs. )); // true console.log(isPangram( The quick brown fox jumped over the lazy sleepingdog. )); // true console.log(isPangram( Roses are red, violets are blue, sugar is sweet,and so are you. )); // false
同構字元串
給定兩個字元串s和t,如果可以替換掉s中的字元得到t,那麼這兩個字元串是同構的。s中的所有字元轉換都必須應用到s中相同的字元上,例如,murmur與tartar為同構字元串,如果m被t替換,u被a替換,r被自身替換。以下算法使用數組來存儲轉換字元,也适用于映射。
/** * An algorithm to verify whethertwo given strings are isomorphic * @param {string} s The first string * @param {string} t The second string * @return {boolean} Returns whether these two strings are isomorphic */ functionareIsomorphic(s, t) { // strings with different lengths are notisomorphic if (s.length !== t.length) { returnfalse; } // the conversion array const convert = []; for (let i =0; i < s.length; i++) { // if the conversioncharacter exists if (convert[s[i]]) { // apply the conversion and compare if (t[i] === convert[s[i]]) { // so far so good continue; } returnfalse; // not isomorphic } // set the conversion character for future use convert[s[i]] = t[i]; } // these two strings are isomorphic since there are no violations returntrue; };
以下是驗證測試:
console.log(areIsomorphic( atlatl , tartar )); // true console.log(areIsomorphic( atlatlp , tartarq )); // true console.log(areIsomorphic( atlatlpb , tartarqc )); // true console.log(areIsomorphic( atlatlpa , tartarqb )); // false
相同字母異構詞
相同字母異構詞是通過重新排列不同單詞的字母而形成的單詞,通常使用所有原始字母一次。從一個池中重新排列單詞有很多種可能性。例如,cat的相同字母異構詞有cat、act、atc、tca、atc和tac。我們可以添加額外的要求,即新單詞必須出現在源字元串中。如果源實際上是actually,則結果數組是[“act”]。
/** * Given a pool to compose ananagram, show all anagrams contained (continuously) in the source * @param {string} source A source string to draw an anagram from * @param {string} pool A pool to compose an anagram * @return {array} Returns an array of anagrams that are contained by the source string */ functionshowAnagrams(source, pool) { // if source is not long enough to hold theanagram if (source.length< pool.length) { return []; } const sourceCounts = []; // an array tohold the letter counts in source const poolCounts = []; // an array tohold the letter counts in pool // initialize counts for 26 letters to be 0 for (let i =0; i <26; i++) { sourceCounts[i] =0; poolCounts[i] =0; } // convert both strings to lower cases pool = pool.toLocaleLowerCase(); const lowerSource = source.toLocaleLowerCase(); for (let i =0; i < pool.length; i++) { // calculatepoolCounts for each letter in pool, mapping a - z to 0 - 25 poolCounts[pool[i].charCodeAt() -97]++; } const result = []; for (let i =0; i < lowerSource.length; i++) { // calculatesourceCounts for each letter for source, mapping a - z to 0 - 25 sourceCounts[lowerSource[i].charCodeAt() -97]++; if (i >= pool.length-1) { // if source islong enough // if sourceCountsis the same as poolCounts if (JSON.stringify(sourceCounts) ===JSON.stringify(poolCounts)) { // save the found anagram, using the original source to make stringcase-preserved result.push(source.slice(i - pool.length+1, i +1)); } // shift thestarting window by 1 index (drop the current first letter) sourceCounts[lowerSource[i - pool.length+1].charCodeAt() -97]--; } } // removeduplicates by a Set return [...newSet(result)]; }
以下是驗證測試:
console.log(showAnagrams( AaaAAaaAAaa , aa )); // ["Aa", "aa", "aA", "AA"] console.log(showAnagrams( CbatobaTbacBoat , Boat )); //["bato", "atob", "toba", "obaT","Boat"] console.log(showAnagrams( AyaKkayakkAabkk , Kayak )); // ["AyaKk", "yaKka", "aKkay", "Kkaya","kayak", "ayakk", "yakkA"]
回文
回文是從前往後讀和從後往前讀讀法相同的單詞或句子。有很多回文,比如A,Bob,還有 “A man, a plan, a canal — Panama”。檢查回文的算法分為兩種。使用循環或使用遞歸從兩端檢查是否相同。下列代碼使用遞歸方法:
/** * An algorithm to verify whethera given string is a palindrome * @param {string} str The string to be verified * @return {boolean} Returns whether it is a palindrome */ functionisPalindrome(str) { functioncheckIsPalindrome(s) { // empty stringor one letter is a defecto palindrome if (s.length<2) { returntrue; } if ( // if two ends notequal, ignoring the case s[0].localeCompare(s[s.length-1], undefined, { sensitivity: base , }) !== 0 ) { returnfalse; } // since two ends equal, checking the inside returncheckIsPalindrome(s.slice(1, -1)); } // check whether it is a palindrome, removing noneletters and digits returncheckIsPalindrome(str.replace(/[^A-Za-z0-9]/g, )); }
以下是驗證測試:
console.log(isPalindrome( )); // true console.log(isPalindrome( a )); // true console.log(isPalindrome( Aa )); // true console.log(isPalindrome( Bob )); // true console.log(isPalindrome( Odd or even )); // false console.log(isPalindrome( Never odd or even )); // true console.log(isPalindrome( 02/02/2020 )); // true console.log(isPalindrome( 2/20/2020 )); // false console.log(isPalindrome( A man, a plan, a canal – Panama )); // true
回文面試題有很多不同的變形題,下面是一個在給定字元串中尋找最長回文的算法。
/** * An algorithm to find thelongest palindrome in a given string * @param {string} source The source to find the longest palindrome from * @return {string} Returns the longest palindrome */ functionfindLongestPalindrome(source) { // convert to lower cases and only keep lettersand digits constlettersAndDigits = source.replace(/[^A-Za-z0-9]/g, ); const str = lettersAndDigits.toLocaleLowerCase(); const len = str.length; // empty string or one letter is a defecto palindrome if (len <2) { return str; } // the first letter is the current longest palindrome let maxPalindrome = lettersAndDigits[0]; // assume that the index is the middle of a palindrome for (let i =0; i < len; i++) { // try the case that the palindrome has one middle for ( let j =1; // start with onestep away (inclusive) j < len &&// end with the len end (exclusive) i - j >= 0&&// cannot pass the start index (inclusive) i + j < len &&// cannot exceed end index (exclusive) Math.min(2 * i +1, 2 * (len - i) -1) > maxPalindrome.length; // potential max length should be longer than thecurrent length j++ ) { if (str[i - j] !== str[i + j]) { // if j stepsbefore the middle is different from j steps after the middle break; } if (2 * j +1> maxPalindrome.length) { // if it is longerthan the current length maxPalindrome = lettersAndDigits.slice(i - j, i + j +1); // j steps before, middle, and j steps after } } // try the case that the palindrome has two middles if (i < len -1&& str[i] === str[i +1]) { // if two middles are the same if (maxPalindrome.length<2) { // the string withtwo middles could be the current longest palindrome maxPalindrome = lettersAndDigits.slice(i, i +2); } for ( let j =1; // start with one step away (inclusive) j < len -1&&// end with the len - 1 end (exclusive) i - j >= 0&&// cannot pass the start index (inclusive) i + j +1< len &&// cannot exceed end index (exclusive) Math.min(2 * i +2, 2 * (len - i)) > maxPalindrome.length; // potential max length should be longer than thecurrent length j++ ) { if (str[i - j] !== str[i + j +1]) { // if j stepsbefore the left middle is different from j steps after the right middle break; } if (2 * j +2> maxPalindrome.length) { // if it is longer than the current length maxPalindrome = lettersAndDigits.slice(i - j, i + j +2); // j steps before, middles, and j steps after } } } } return maxPalindrome; }
以下是驗證測試:
console.log(findLongestPalindrome( )); // "" console.log(findLongestPalindrome( abc )); // "a" console.log(findLongestPalindrome( Aabcd )); // "Aa" console.log(findLongestPalindrome( I am Bob. )); // "Bob" console.log(findLongestPalindrome( Odd or even )); // "Oddo" console.log(findLongestPalindrome( Never odd or even )); // "Neveroddoreven" console.log(findLongestPalindrome( Today is 02/02/2020. )); // "02022020" console.log(findLongestPalindrome( It is 2/20/2020. )); // "20202" console.log(findLongestPalindrome( A man, a plan, a canal – Panama )); // "AmanaplanacanalPanama"
熟能生巧。享受編碼!
推薦閱讀專題
留言點贊發個朋友圈
我們一起分享AI學習與發展的幹貨
編譯組:張月星、何婧璇
相關連結:
https://medium.com/better-programming/the-technical-interview-guide-to-string-manipulation-92f4c4649cd
如轉載,請背景留言,遵守轉載規範
推薦文章閱讀
EMNLP2017論文集28篇論文解讀
2018年AI三大頂會中國學術成果全連結
ACL2017論文集:34篇解讀幹貨全在這裡