Revisions to Algorithm – Number of strings containing every string of a given set a strings

fixed the indentation

Source Link

edited Dec 30, 2020 at 9:01

141
5

use std::io;
use std::collections::HashSet;

const ALPHABET: [char; 4] = ['A', 'C', 'G', 'T'];

type Word = Vec<char>;
type Buffer = Vec<Option<char>>;

fn hash(word: Word) -> u32 {
    word
    .iter()
    .enumerate()
    .map(|(i, x)| match x {
        'A' => 0,
        'C' => 1,
        'G' => 2,
        'T' => 3,
        _ => 4
    }*4_u32 * 4_u32.pow(i as u32))
    .sum()
}

fn can_insert(word: &Word, i: usize, buffer: &Buffer) -> (bool, Vec<usize>) {
    if word.len() + i > buffer.len() {
        return (false, Vec::new());
    }
    let mut changes = Vec::new();
    for j in 0..word.len() {
        if let Some(chr) = buffer[i+j] {
            if chr != word[j] {
                return (false, Vec::new());
            }
        } else {
            changes.push(j);
        }
    }
    (true, changes)
}

fn fill(buffer: &mut Buffer, index: usize, done: &mut HashSet<u32>) {
    if index == buffer.len() {
        done.insert(
            hash(buffer
             .iter()
             .map(|x| x.unwrap())
             .collect()
            )
        );
    } else {
        if buffer[index].is_none() {
            for &chr in ALPHABET.iter() {
                buffer[index] = Some(chr);
                fill(buffer, index+1, done);
            }
            buffer[index] = None;
        } else {
            fill(buffer, index+1, done);
        }
    }
}

fn count(buffer: &mut Buffer, words: &[Word], index: usize, done: &mut HashSet<u32>) {
    if index == words.len() {
        fill(buffer, 0, done);
    } else {
        let word = &words[index];
        let rev = word
            .iter()
            .rev()
            .copied()
            .collect();
        
        for i in 0..buffer.len() {
            if let (true, changes) = can_insert(word, i, buffer) {
                for &j in &changes {
                    buffer[i+j] = Some(word[j]);
                }
                count(buffer, words, index+1, done);
                for &j in &changes {
                    buffer[i+j] = None;
                }
            }
            if let (true, changes) = can_insert(&rev, i, buffer) {
                for &j in &changes {
                    buffer[i+j] = Some(rev[j]);
                }
                count(buffer, words, index+1, done);
                for &j in &changes {
                    buffer[i+j] = None;
                }
            }
        }
    
    }
}

fn solve_problem(n: usize, _m: usize, words: Vec<Word>) {
    let mut buffer = vec![None; n];
    let mut done = HashSet::new();
    count(&mut buffer, &words, 0, &mut done);
    println!("{}",
         done.len()
    );
}

fn main() {
    let n = read_line().parse().unwrap();
    let m = read_line().parse().unwrap();
    let words = read_words(m);
    solve_problem(n, m, words);
}

// What is left are just io functions to read the problem

fn read_line() -> String {
    let mut line = String::new();
    io::stdin()
        .read_line(&mut line)
        .expect("Failed to read line");
    line.trim().to_string()
}

fn read_words(m: usize) -> Vec<Word> {
    let mut words = Vec::new();
    for _ in 0..m {
        let word = read_line();
        words.push(word);
    }
    words.iter().map(|x| x.chars().collect()).collect()
}

use std::io;
use std::collections::HashSet;

const ALPHABET: [char; 4] = ['A', 'C', 'G', 'T'];

type Word = Vec<char>;
type Buffer = Vec<Option<char>>;

fn hash(word: Word) -> u32 {
    word
    .iter()
    .enumerate()
    .map(|(i, x)| match x {
        'A' => 0,
        'C' => 1,
        'G' => 2,
        'T' => 3,
        _ => 4
    }*4_u32.pow(i as u32))
    .sum()
}

fn can_insert(word: &Word, i: usize, buffer: &Buffer) -> (bool, Vec<usize>) {
    if word.len() + i > buffer.len() {
        return (false, Vec::new());
    }
    let mut changes = Vec::new();
    for j in 0..word.len() {
        if let Some(chr) = buffer[i+j] {
            if chr != word[j] {
        return (false, Vec::new());
        }
        } else {
        changes.push(j);
    }
    }
    (true, changes)
}

fn fill(buffer: &mut Buffer, index: usize, done: &mut HashSet<u32>) {
    if index == buffer.len() {
    done.insert(
        hash(buffer
         .iter()
         .map(|x| x.unwrap())
         .collect()
        )
    );
    } else {
    if buffer[index].is_none() {
        for &chr in ALPHABET.iter() {
        buffer[index] = Some(chr);
        fill(buffer, index+1, done);
        }
        buffer[index] = None;
    } else {
        fill(buffer, index+1, done);
    }
    }
}

fn count(buffer: &mut Buffer, words: &[Word], index: usize, done: &mut HashSet<u32>) {
    if index == words.len() {
    fill(buffer, 0, done);
    } else {
    let word = &words[index];
    let rev = word
        .iter()
        .rev()
        .copied()
        .collect();
    
    for i in 0..buffer.len() {
        if let (true, changes) = can_insert(word, i, buffer) {
        for &j in &changes {
            buffer[i+j] = Some(word[j]);
        }
        count(buffer, words, index+1, done);
        for &j in &changes {
            buffer[i+j] = None;
        }
        }
        if let (true, changes) = can_insert(&rev, i, buffer) {
        for &j in &changes {
            buffer[i+j] = Some(rev[j]);
        }
        count(buffer, words, index+1, done);
        for &j in &changes {
            buffer[i+j] = None;
        }
        }
    }
    
    }
}

fn solve_problem(n: usize, _m: usize, words: Vec<Word>) {
    let mut buffer = vec![None; n];
    let mut done = HashSet::new();
    count(&mut buffer, &words, 0, &mut done);
    println!("{}",
         done.len()
    );
}

fn main() {
    let n = read_line().parse().unwrap();
    let m = read_line().parse().unwrap();
    let words = read_words(m);
    solve_problem(n, m, words);
}

// What is left are just io functions to read the problem

fn read_line() -> String {
    let mut line = String::new();
    io::stdin()
        .read_line(&mut line)
        .expect("Failed to read line");
    line.trim().to_string()
}

fn read_words(m: usize) -> Vec<Word> {
    let mut words = Vec::new();
    for _ in 0..m {
        let word = read_line();
        words.push(word);
    }
    words.iter().map(|x| x.chars().collect()).collect()
}

use std::io;
use std::collections::HashSet;

const ALPHABET: [char; 4] = ['A', 'C', 'G', 'T'];

type Word = Vec<char>;
type Buffer = Vec<Option<char>>;

fn hash(word: Word) -> u32 {
    word
    .iter()
    .enumerate()
    .map(|(i, x)| match x {
        'A' => 0,
        'C' => 1,
        'G' => 2,
        'T' => 3,
        _ => 4
    } * 4_u32.pow(i as u32))
    .sum()
}

fn can_insert(word: &Word, i: usize, buffer: &Buffer) -> (bool, Vec<usize>) {
    if word.len() + i > buffer.len() {
        return (false, Vec::new());
    }
    let mut changes = Vec::new();
    for j in 0..word.len() {
        if let Some(chr) = buffer[i+j] {
            if chr != word[j] {
                return (false, Vec::new());
            }
        } else {
            changes.push(j);
        }
    }
    (true, changes)
}

fn fill(buffer: &mut Buffer, index: usize, done: &mut HashSet<u32>) {
    if index == buffer.len() {
        done.insert(
            hash(buffer
             .iter()
             .map(|x| x.unwrap())
             .collect()
            )
        );
    } else {
        if buffer[index].is_none() {
            for &chr in ALPHABET.iter() {
                buffer[index] = Some(chr);
                fill(buffer, index+1, done);
            }
            buffer[index] = None;
        } else {
            fill(buffer, index+1, done);
        }
    }
}

fn count(buffer: &mut Buffer, words: &[Word], index: usize, done: &mut HashSet<u32>) {
    if index == words.len() {
        fill(buffer, 0, done);
    } else {
        let word = &words[index];
        let rev = word
            .iter()
            .rev()
            .copied()
            .collect();
        
        for i in 0..buffer.len() {
            if let (true, changes) = can_insert(word, i, buffer) {
                for &j in &changes {
                    buffer[i+j] = Some(word[j]);
                }
                count(buffer, words, index+1, done);
                for &j in &changes {
                    buffer[i+j] = None;
                }
            }
            if let (true, changes) = can_insert(&rev, i, buffer) {
                for &j in &changes {
                    buffer[i+j] = Some(rev[j]);
                }
                count(buffer, words, index+1, done);
                for &j in &changes {
                    buffer[i+j] = None;
                }
            }
        }
    
    }
}

fn solve_problem(n: usize, _m: usize, words: Vec<Word>) {
    let mut buffer = vec![None; n];
    let mut done = HashSet::new();
    count(&mut buffer, &words, 0, &mut done);
    println!("{}",
         done.len()
    );
}

fn main() {
    let n = read_line().parse().unwrap();
    let m = read_line().parse().unwrap();
    let words = read_words(m);
    solve_problem(n, m, words);
}

// What is left are just io functions to read the problem

fn read_line() -> String {
    let mut line = String::new();
    io::stdin()
        .read_line(&mut line)
        .expect("Failed to read line");
    line.trim().to_string()
}

fn read_words(m: usize) -> Vec<Word> {
    let mut words = Vec::new();
    for _ in 0..m {
        let word = read_line();
        words.push(word);
    }
    words.iter().map(|x| x.chars().collect()).collect()
}

added 3996 characters in body

Source Link

edited Dec 29, 2020 at 12:18

jthulhu

141
5

EDITEDIT: an example

Suppose I am given the strings "ab" and "ca", and the length 4, (and my alphabet is {a,b,c}) I can make the following strings: abca, caab, caba, cabb, cabc, acab, bcab, ccab, for a total of 8 possible strings.

EDIT: code from answers

From @KilianFoth's and @Jasmijn's answer, I came up with the following code, with a few modifications:

@KilianFoth I think you might actually count twice (or many more times) a string if you don't cache, in some way, the one you've already done, but...

@Jasmijn storing the actual found strings, even in a set (which in Rust is a HashSet) takes too much memory (in the sense that I tried it and it failed)

Therefore I created a "hash" function that is more like a compression algorithm: since all my strings are made of 4 letters, they are uniquely defined by a number in base 4. Thereby, I store that number instead of the string, which takes less memory but serves the purpose.

Moreover, I don't recreate tons of templates like in @Jasmijn's answer, I recycle a single buffer for every string (notice that whenever I fill a value, few lines later I empty it)

However, the main algorithm is the same, and that's the problem: it is too slow (I submitted it and it didn't pass the fith test out of twelve). Thus there must be a better algorithm.

use std::io;
use std::collections::HashSet;

const ALPHABET: [char; 4] = ['A', 'C', 'G', 'T'];

type Word = Vec<char>;
type Buffer = Vec<Option<char>>;

fn hash(word: Word) -> u32 {
    word
    .iter()
    .enumerate()
    .map(|(i, x)| match x {
        'A' => 0,
        'C' => 1,
        'G' => 2,
        'T' => 3,
        _ => 4
    }*4_u32.pow(i as u32))
    .sum()
}

fn can_insert(word: &Word, i: usize, buffer: &Buffer) -> (bool, Vec<usize>) {
    if word.len() + i > buffer.len() {
        return (false, Vec::new());
    }
    let mut changes = Vec::new();
    for j in 0..word.len() {
        if let Some(chr) = buffer[i+j] {
            if chr != word[j] {
        return (false, Vec::new());
        }
        } else {
        changes.push(j);
    }
    }
    (true, changes)
}

fn fill(buffer: &mut Buffer, index: usize, done: &mut HashSet<u32>) {
    if index == buffer.len() {
    done.insert(
        hash(buffer
         .iter()
         .map(|x| x.unwrap())
         .collect()
        )
    );
    } else {
    if buffer[index].is_none() {
        for &chr in ALPHABET.iter() {
        buffer[index] = Some(chr);
        fill(buffer, index+1, done);
        }
        buffer[index] = None;
    } else {
        fill(buffer, index+1, done);
    }
    }
}

fn count(buffer: &mut Buffer, words: &[Word], index: usize, done: &mut HashSet<u32>) {
    if index == words.len() {
    fill(buffer, 0, done);
    } else {
    let word = &words[index];
    let rev = word
        .iter()
        .rev()
        .copied()
        .collect();
    
    for i in 0..buffer.len() {
        if let (true, changes) = can_insert(word, i, buffer) {
        for &j in &changes {
            buffer[i+j] = Some(word[j]);
        }
        count(buffer, words, index+1, done);
        for &j in &changes {
            buffer[i+j] = None;
        }
        }
        if let (true, changes) = can_insert(&rev, i, buffer) {
        for &j in &changes {
            buffer[i+j] = Some(rev[j]);
        }
        count(buffer, words, index+1, done);
        for &j in &changes {
            buffer[i+j] = None;
        }
        }
    }
    
    }
}

fn solve_problem(n: usize, _m: usize, words: Vec<Word>) {
    let mut buffer = vec![None; n];
    let mut done = HashSet::new();
    count(&mut buffer, &words, 0, &mut done);
    println!("{}",
         done.len()
    );
}

fn main() {
    let n = read_line().parse().unwrap();
    let m = read_line().parse().unwrap();
    let words = read_words(m);
    solve_problem(n, m, words);
}

// What is left are just io functions to read the problem

fn read_line() -> String {
    let mut line = String::new();
    io::stdin()
        .read_line(&mut line)
        .expect("Failed to read line");
    line.trim().to_string()
}

fn read_words(m: usize) -> Vec<Word> {
    let mut words = Vec::new();
    for _ in 0..m {
        let word = read_line();
        words.push(word);
    }
    words.iter().map(|x| x.chars().collect()).collect()
}

EDIT: an example

Suppose I am given the strings "ab" and "ca", and the length 4, (and my alphabet is {a,b,c}) I can make the following strings: abca, caab, caba, cabb, cabc, acab, bcab, ccab, for a total of 8 possible strings.

EDIT: an example

Suppose I am given the strings "ab" and "ca", and the length 4, (and my alphabet is {a,b,c}) I can make the following strings: abca, caab, caba, cabb, cabc, acab, bcab, ccab, for a total of 8 possible strings.

EDIT: code from answers

From @KilianFoth's and @Jasmijn's answer, I came up with the following code, with a few modifications:

@KilianFoth I think you might actually count twice (or many more times) a string if you don't cache, in some way, the one you've already done, but...

@Jasmijn storing the actual found strings, even in a set (which in Rust is a HashSet) takes too much memory (in the sense that I tried it and it failed)

Therefore I created a "hash" function that is more like a compression algorithm: since all my strings are made of 4 letters, they are uniquely defined by a number in base 4. Thereby, I store that number instead of the string, which takes less memory but serves the purpose.

Moreover, I don't recreate tons of templates like in @Jasmijn's answer, I recycle a single buffer for every string (notice that whenever I fill a value, few lines later I empty it)

However, the main algorithm is the same, and that's the problem: it is too slow (I submitted it and it didn't pass the fith test out of twelve). Thus there must be a better algorithm.

use std::io;
use std::collections::HashSet;

const ALPHABET: [char; 4] = ['A', 'C', 'G', 'T'];

type Word = Vec<char>;
type Buffer = Vec<Option<char>>;

fn hash(word: Word) -> u32 {
    word
    .iter()
    .enumerate()
    .map(|(i, x)| match x {
        'A' => 0,
        'C' => 1,
        'G' => 2,
        'T' => 3,
        _ => 4
    }*4_u32.pow(i as u32))
    .sum()
}

fn can_insert(word: &Word, i: usize, buffer: &Buffer) -> (bool, Vec<usize>) {
    if word.len() + i > buffer.len() {
        return (false, Vec::new());
    }
    let mut changes = Vec::new();
    for j in 0..word.len() {
        if let Some(chr) = buffer[i+j] {
            if chr != word[j] {
        return (false, Vec::new());
        }
        } else {
        changes.push(j);
    }
    }
    (true, changes)
}

fn fill(buffer: &mut Buffer, index: usize, done: &mut HashSet<u32>) {
    if index == buffer.len() {
    done.insert(
        hash(buffer
         .iter()
         .map(|x| x.unwrap())
         .collect()
        )
    );
    } else {
    if buffer[index].is_none() {
        for &chr in ALPHABET.iter() {
        buffer[index] = Some(chr);
        fill(buffer, index+1, done);
        }
        buffer[index] = None;
    } else {
        fill(buffer, index+1, done);
    }
    }
}

fn count(buffer: &mut Buffer, words: &[Word], index: usize, done: &mut HashSet<u32>) {
    if index == words.len() {
    fill(buffer, 0, done);
    } else {
    let word = &words[index];
    let rev = word
        .iter()
        .rev()
        .copied()
        .collect();
    
    for i in 0..buffer.len() {
        if let (true, changes) = can_insert(word, i, buffer) {
        for &j in &changes {
            buffer[i+j] = Some(word[j]);
        }
        count(buffer, words, index+1, done);
        for &j in &changes {
            buffer[i+j] = None;
        }
        }
        if let (true, changes) = can_insert(&rev, i, buffer) {
        for &j in &changes {
            buffer[i+j] = Some(rev[j]);
        }
        count(buffer, words, index+1, done);
        for &j in &changes {
            buffer[i+j] = None;
        }
        }
    }
    
    }
}

fn solve_problem(n: usize, _m: usize, words: Vec<Word>) {
    let mut buffer = vec![None; n];
    let mut done = HashSet::new();
    count(&mut buffer, &words, 0, &mut done);
    println!("{}",
         done.len()
    );
}

fn main() {
    let n = read_line().parse().unwrap();
    let m = read_line().parse().unwrap();
    let words = read_words(m);
    solve_problem(n, m, words);
}

// What is left are just io functions to read the problem

fn read_line() -> String {
    let mut line = String::new();
    io::stdin()
        .read_line(&mut line)
        .expect("Failed to read line");
    line.trim().to_string()
}

fn read_words(m: usize) -> Vec<Word> {
    let mut words = Vec::new();
    for _ in 0..m {
        let word = read_line();
        words.push(word);
    }
    words.iter().map(|x| x.chars().collect()).collect()
}

fixed grammar.

Source Link

edit approved Dec 25, 2020 at 8:35

cngzz1

45
9

I have a given set S of strings, and a length l, and I am looking for the number of strings of length l that containcontains every string of S. AnA naive approach would be to generate every string of length l (I have a given alphabet), and for each check whether it contains every string of S. However, this is clearly very inefficient, so I was wondering if there was a better way (maybe by usingthat uses dynamic programming).

I couldn't find any thread about such a problem. Some wherewere quite similar (such as, findfinding the number of strings of given length that contain one given string), but none gave me the answer.

EDIT: an example

Suppose I am given the strings "ab" and "ca", and the length 4, (and my alphabet is {a,b,c}) I can make the following strings: abca, caab, caba, cabb, cabc, acab, bcab, ccab, for a total of 8 possible strings.

I have a given set S of strings, and a length l, and I am looking for the number of strings of length l that contain every string of S. An naive approach would be to generate every string of length l (I have a given alphabet), and for each check whether it contains every string of S. However, this is clearly very inefficient, so I was wondering if there was a better way (maybe by using dynamic programming).

I couldn't find any thread about such a problem. Some where quite similar (such as, find the number of strings of given length that contain one given string), but none gave me the answer.

EDIT: an example

Suppose I am given the strings "ab" and "ca", and the length 4, (and my alphabet is {a,b,c}) I can make the following strings: abca, caab, caba, cabb, cabc, acab, bcab, ccab, for a total of 8 possible strings.

I have a given set S of strings, and a length l, and I am looking for the number of strings of length l that contains every string of S. A naive approach would be to generate every string of length l (I have a given alphabet), and for each check whether it contains every string of S. However, this is clearly very inefficient, so I was wondering if there was a better way that uses dynamic programming.

I couldn't find any thread about such a problem. Some were quite similar (such as, finding the number of strings of given length that contain one given string), but none gave me the answer.

EDIT: an example

Suppose I am given the strings "ab" and "ca", and the length 4, (and my alphabet is {a,b,c}) I can make the following strings: abca, caab, caba, cabb, cabc, acab, bcab, ccab, for a total of 8 possible strings.

added 252 characters in body

Source Link

edited Dec 22, 2020 at 14:24

jthulhu

141
5

Loading

Source Link

asked Dec 22, 2020 at 9:50

jthulhu

141
5

Loading

Stack Exchange Network

Return to Question