ATGCATGC
2つのDNA配列がありますTACGTTGC
A
比較がソートされたら「+」を提供し、T
それG
以外の場合はC
「-」を印刷するプログラムを作成したいと思います。
良い
ATGCATGC
TACGTTGC
+++++---
誰でも私を助けることができますか?
上記は予想される結果を提供します。
私が試したことは次のとおりです。
#!/bin/bash
declare -a seq1=()
declare -a seq2=()
read -p 'Enter the ncleotide seq (charactor by charactor followe\d by space0) ' -a seq1
read -n1 seq1
read -p 'Enter the ncleotide seq (charactor by charactor followe\d by space0) ' -a seq2
read -n1 seq2
for a in ${seq1[*]} ; do
for b in ${seq2[*]} ; do
if [ $a == A ] || [ $b == T ] ; then
echo -n "+"
elif [ $a == A ] || [ $b == C ] ; then
echo -n " -"
elif [ $a == A ] || [ $b == G ] ; then
echo -n "-"
elif [ $a == T ] || [ $b == A ] ; then
echo -n "+"
elif [ $a == T ] || [ $b == C ] ; then
echo -n "-"
elif [ $a == T ] || [ $b == G ] ; then
echo -n "-"
elif [ $a == C ] || [ $b == G ] ; then
echo -n "+"
elif [ $a == C ] || [ $b == A ] ; then
echo -n "-"
elif [ $a == C ] || [ $b == T ] ; then
echo -n "-"
elif [ $a == G ] || [ $b == C ] ; then
echo -n "+"
elif [ $a == G ] || [ $b == A ] ; then
echo -n "-"
elif [ $a == G ] || [ $b == T ] ; then
echo -n "-"
else
echo $a $b
fi
done
done
ベストアンサー1
使用awk
:
awk -v seq1='CATGCATGCTCAT' -v seq2='ATACGTTGCGTTA' '
function sign(s) { cmp=(cmp==""?"":cmp) s }
BEGIN{
split(seq1, tmp1, ""); split(seq2, tmp2, "");
for(i in tmp1) {
if( tmp1[i] == tmp2[i] ){ sign("-"); continue }
if( (tmp1[i] ~/[AT]/ && tmp2[i] ~/[AT]/) ||
(tmp1[i] ~/[GC]/ && tmp2[i] ~/[GC]/) ) { sign("+"); continue }
sign("-")
}
print seq1 ORS seq2 ORS cmp
}'
出力:
CATGCATGCTCAT
ATACGTTGCGTTA
-+++++-----++
コメントを含む同じコード:
awk -v seq1='CATGCATGCTCAT' -v seq2='ATACGTTGCGTTA' '
# set sequences one in seq1 another in seq2
function sign(s) { cmp=(cmp==""?"":cmp) s }
# function to join the the changes on +/- for each pair of chars
BEGIN{
split(seq1, tmp1, ""); split(seq2, tmp2, "");
# split each sequences characters into individual arrays
for(i in tmp1) {
# loop over keys on one of the arrays (assuming length of both seq will be same)
if( tmp1[i] == tmp2[i] ){ sign("-"); continue }
# if both chars were same AA, TT, CC, GG, ..., sign should be "-"
if( (tmp1[i] ~/[AT]/ && tmp2[i] ~/[AT]/) ||
(tmp1[i] ~/[GC]/ && tmp2[i] ~/[GC]/) ) { sign("+"); continue }
# if one was "A" and another was "T" or vice-versa as well as
# if one was "G" and another was "C" or vice-versa, sign should be "+"
sign("-")
# otherwise "-"
}
print seq1 ORS seq2 ORS cmp
# print the last result, first printing the sequences then
# the comparison result in 'cmp'
}'