"Useful Microsoft Word Macros for Molecular Biologists and Protein Chemists", as published in Biotechniques June 2000, volume 28:1198-1201, written by Gerry Shaw (email shaw@ufbi.ufl.edu)
The point of this publication, which you can read in the journal or possibly download from http://www.biotechniques.com, is to make use of the Visual Basic programming language built into recent version of Microsoft Word to perform useful manipulations on protein and nucleic acid sequences. The source code for these programs is listed below, and can be simply copied from this page and pasted into the "macros" menu of Microsoft Word. The first Macro, called IEP, calculates the isoelectric point of a mouse selected protein sequence. Sequence must be in upper case letters. Note one common problem when cutting these macros from the browser page and pasting them into the Visual Basic compiler is that the browser will probably add a line feed command into the longer lines of code, which will mess things up. A alternate approach is get the whole collection of macros by ftp/fetch from here. Initial responses to this approach have been very positive, so please email if you have any comments, problems, suggestions for improvements or ideas for other macros that would be useful.
Sub IEP()
'
' IEP Macro
' Macro created 07/01/98 by Gerry
Shaw
'
X = Len(Selection.Text)
If X = 0 Then MsgBox ("No selection
made")
For YY= 1 To X
Select Case Mid$(Selection.Text, YY, 1)
Case "C"
C = C + 1
Case "D"
D = D + 1
Case "E"
E = E +
1
Case "H"
H = H + 1
Case "K"
K = K + 1
Case "R"
R = R
+ 1
Case "Y"
Y = Y + 1
End Select
Next YY
' Define
array of nine elements containing the pKa values of amino acids, N and
C-termini
Dim pK(8) As Double
pK(0) = 0.000446 ' C-term
pK(1) = 0.0000851 ' Glu
pK(2) = 0.000126 ' Asp
pK(3) = 0.000000000661 '
Lys
pK(4) = 0.00000000102 ' Arg
pK(5) = 0.00000000447 ' Cys
pK(6) =
0.000000912 ' His
pK(7) = 0.000000000776 ' Tyr
pK(8) = 0.000000000166 '
N-term
For pH = 2 To 12 Step 0.1
' calculates HH, the proton
concentration
HH = Exp(-pH * Log(10))
' positive charge is
function of proton conc, number of K, R and H and N-terminus
pcharge =
HH * K / (HH + pK(3)) + HH * R / (HH + pK(4)) + HH * H / (HH + pK(6)) + HH /
(HH + pK(8))
' negative charge is function of proton conc, number of
Tyr, Cys, Glu, Asp and C-terminus
ncharge = Y * (1 - (HH / (HH +
pK(7)))) + C * (1 - (HH / (HH + pK(5)))) + 1 - (HH / (HH + pK(1))) + E * (1 -
(HH / (HH + pK(1)))) + D * (1 - (HH / (HH + pK(2))))
' exits for loop
when pcharge is less than ncharge
If (pcharge <= ncharge) Then Exit
For
Next pH
MsgBox ("pKa = " & Format(pH, "fixed"))
End
Sub
The second macro, Nucweight, determines the molecular weight of a mouse selected DNA sequence. Sequence for this version must be uppercase text. To make it read lowercase, which is what you get with most sequence databases nowadays, change the Case commands; for example change Case "A" to Case "a" and make corresponding changes to the other Case commands.
Sub Nucweight()
'
' Nucweight Macro
' Macro recorded
07/02/98 by Gerry Shaw
' This version for DNA sequences: for RNA MW for A =
329.2, U = 306.1, G = 345.2, C= 305.2
'
X = Len(Selection.Text)
For
YY = 1 To X
Select Case Mid$(Selection.Text, YY, 1)
Case "A"
MW =
MW + 313.2
Case "T"
MW = MW + 304.2
Case "G"
MW = MW +
329.2
Case "C"
MW = MW + 289.2
Case Else
Z = Z + 1
End
Select
Next YY
MW = MW + 18
If (MW > 18) Then MsgBox ("Selection
includes " & X - Z & " bases, " & "Molecular Weight= " & MW
& " Daltons")
If (MW = 18) Then MsgBox ("No sequence selected")
End
Sub
This macro determines counts the number of amino acids and determines the molecular weight of a mouse selected protein sequence
Sub Protweight()
'
' Protweight Macro
' Macro recorded
07/02/98 by Gerry Shaw
'
X = Len(Selection.Text)
For YY = 1 To
X
Select Case Mid$(Selection.Text, YY, 1)
Case "A"
MW = MW +
71.09
Case "C"
MW = MW + 103.15
Case "D"
MW = MW + 115.1
Case "E"
MW = MW + 129.13
Case "F"
MW = MW + 147.19
Case
"G"
MW = MW + 57.07
Case "H"
MW = MW + 137.16
Case "I"
MW =
MW + 113.17
Case "K"
MW = MW + 128.19
Case "L"
MW = MW +
113.17
Case "M"
MW = MW + 131.31
Case "N"
MW = MW + 114.12
Case "P"
MW = MW + 97.13
Case "Q"
MW = MW + 128.15
Case "R"
MW = MW + 156.2
Case "S"
MW = MW + 87.09
Case "T"
MW = MW +
101.12
Case "V"
MW = MW + 99.15
Case "W"
MW = MW + 186.23
Case "Y"
MW = MW + 163.19
Y = Y + 1
Case Else
Z = Z + 1
End
Select
Next YY
MW = MW + 18
If MW > 18 Then MsgBox ("Selection
includes " & X - Z & " amino acids, Molecular Weight= " &
Format(MW, "fixed") & " Daltons") Else MsgBox ("No Sequence Selected")
End Sub
This macro decodes mouse selected DNA sequence in all three forward reading frames.
Sub Decode()
'
' Decode Macro
' Macro created
07/19/98 by Gerry Shaw
'
' To decode longer sequence increase number of
elements in outputarray()
' and add appropriate values to readingframe1$,
readingframe2$ and readingframe3$ strings
Dim i As Integer
Dim outputarray(24) As String
X = Len(Selection.Text)
If X > 30 Then
X = 30
For i = 3 To X
codon = 0
Select Case
Mid$(Selection.Text, i - 2, 1)
Case "T"
codon = 0
Case "C"
codon = 16
Case "A"
codon = 32
Case "G"
codon = 48
End
Select
Select Case Mid$(Selection.Text, i - 1, 1)
Case "T"
codon = codon + 0
Case "C"
codon = codon + 4
Case "A"
codon =
codon + 8
Case "G"
codon = codon + 12
End Select
Select
Case Mid$(Selection.Text, i, 1)
Case "T"
codon = codon + 0
Case
"C"
codon = codon + 1
Case "A"
codon = codon + 2
Case "G"
codon = codon + 3
End Select
If codon >= 0 Then outputarray(i) = "F"
If codon >=
2 Then outputarray(i) = "L"
If codon >= 6 Then outputarray(i) = "S"
If codon >= 8 Then outputarray(i) = "Y"
If codon >= 10 Then
outputarray(i) = "*"
If codon >= 12 Then outputarray(i) = "C"
If
codon = 14 Then outputarray(i) = "*"
If codon = 15 Then outputarray(i) =
"W"
If codon >= 18 Then outputarray(i) = "L"
If codon >= 20 Then
outputarray(i) = "P"
If codon >= 24 Then outputarray(i) = "H"
If
codon >= 26 Then outputarray(i) = "Q"
If codon >= 28 Then
outputarray(i) = "R"
If codon >= 32 Then outputarray(i) = "I"
If
codon = 35 Then outputarray(i) = "M"
If codon >= 36 Then outputarray(i)
= "T"
If codon >= 40 Then outputarray(i) = "N"
If codon >= 42
Then outputarray(i) = "K"
If codon >= 44 Then outputarray(i) = "S"
If codon >= 46 Then outputarray(i) = "R"
If codon >= 48 Then
outputarray(i) = "V"
If codon >= 52 Then outputarray(i) = "A"
If
codon >= 56 Then outputarray(i) = "D"
If codon >= 58 Then
outputarray(i) = "E"
If codon >= 60 Then outputarray(i) = "G"
Next i
If X > 0 Then
readingframe1$ = outputarray(3) &
outputarray(6) & outputarray(9) & outputarray(12) & outputarray(15)
& outputarray(18) & outputarray(21) & outputarray(24)
readingframe2$ = outputarray(1) & outputarray(4) & outputarray(7) &
outputarray(10) & outputarray(13) & outputarray(16) &
outputarray(19) & outputarray(22)
readingframe3$ = outputarray(2) &
outputarray(5) & outputarray(8) & outputarray(11) & outputarray(14)
& outputarray(17) & outputarray(20) & outputarray(23)
MsgBox
("Reading Frame 1: " & readingframe1$ & Chr$(13) & "Reading Frame
2: " & readingframe2$ & Chr$(13) & "Reading frame 3: " &
readingframe3$)
Else: MsgBox ("No sequence selected")
End If
End Sub
Sub Tm()
'
' Tm Macro
' Macro created 07/30/99 by Gerry
Shaw
'
X = Len(Selection.Text)
For YY = 1 To X
Select Case
Mid$(Selection.Text, YY, 1)
Case "A"
N = N + 1
AT = AT + 1
Case
"T"
N = N + 1
AT = AT + 1
Case "G"
N = N + 1
GC = GC +
1
Case "C"
N = N + 1
GC = GC + 1
Case Else
Z = Z + 1
End Select
Next YY
If N = 0 Then MsgBox ("No sequence selected")
If
N <= 18 Then
melt = AT * 2 + GC * 4
MsgBox ("There are " & N
& " Bases selected, GC content is " & Format(100 * GC / N, "fixed")
& "%" & Chr(13) & "Tm = " & Format(melt, "fixed") & "
Degrees Centigrade, using the method of Itakura et al., good for oligos of 18
bases or less")
End If
If N > 18 Then
melt = 81.5 + 16.6 *
Log(0.15) / Log(10) + 0.41 * ((100 * GC) / N) - (600 / N)
MsgBox ("There
are " & N & " Bases selected, GC content is " & Format(100 * GC /
N, "fixed") & "%" & Chr(13) & "Tm = " & Format(melt, "fixed")
& " Degrees Centigrade, using method of Bolton and McCarthy, good for
oligos larger than 18 base ")
End If
End Sub
Sub Reformat()
'
' Reformat Macro
' Macro created 1/5/00 by
Gerry Shaw
'
Selection.Find.Execute FindText:="0", ReplaceWith:="",
Replace:=wdReplaceAll
Selection.Find.Execute FindText:="1",
ReplaceWith:="", Replace:=wdReplaceAll
Selection.Find.Execute
FindText:="2", ReplaceWith:="", Replace:=wdReplaceAll
Selection.Find.Execute FindText:="3", ReplaceWith:="",
Replace:=wdReplaceAll
Selection.Find.Execute FindText:="4",
ReplaceWith:="", Replace:=wdReplaceAll
Selection.Find.Execute
FindText:="5", ReplaceWith:="", Replace:=wdReplaceAll
Selection.Find.Execute FindText:="6", ReplaceWith:="",
Replace:=wdReplaceAll
Selection.Find.Execute FindText:="7",
ReplaceWith:="", Replace:=wdReplaceAll
Selection.Find.Execute
FindText:="8", ReplaceWith:="", Replace:=wdReplaceAll
Selection.Find.Execute FindText:="9", ReplaceWith:="",
Replace:=wdReplaceAll
Selection.Find.Execute FindText:=" ",
ReplaceWith:="", Replace:=wdReplaceAll
Selection.Find.Execute
FindText:="^p", ReplaceWith:="", Replace:=wdReplaceAll
Selection.Find.Execute FindText:="-", ReplaceWith:="",
Replace:=wdReplaceAll
Selection.Find.Execute FindText:=",",
ReplaceWith:="", Replace:=wdReplaceAll
End Sub
Sub Reverse()
'
' Reverse Macro
' Macro created
1/19/00 by Gerry Shaw
'
Dim i As Integer
Dim outputarray(36)
As String
X = Len(Selection.Text)
If X > 30 Then limit = X - 30 Else
limit = 1
For i = limit To X
Select Case Mid$(Selection.Text, i,
1)
Case "T"
outputarray(X + 1 - i) = "A"
Z = Z + 1
Case "C"
outputarray(X + 1 - i) = "G"
Z = Z + 1
Case "A"
outputarray(X + 1 -
i) = "T"
Z = Z + 1
Case "G"
outputarray(X + 1 - i) = "C"
Z = Z
+ 1
Case "t"
outputarray(X + 1 - i) = "A"
Z = Z + 1
Case
"c"
outputarray(X + 1 - i) = "G"
Z = Z + 1
Case "a"
outputarray(X + 1 - i) = "T"
Z = Z + 1
Case "g"
outputarray(X + 1 -
i) = "C"
Z = Z + 1
End Select
Next
If Z > 0 Then
outputstring$ = (outputarray(1) & outputarray(2) & outputarray(3) &
outputarray(4) & outputarray(5) & outputarray(6) & outputarray(7)
& outputarray(8) & outputarray(9) & outputarray(10) & " " &
outputarray(11) & outputarray(12) & outputarray(13) &
outputarray(14) & outputarray(15) & outputarray(16) &
outputarray(17) & outputarray(18) & outputarray(19) &
outputarray(20) & " " & outputarray(21) & outputarray(22) &
outputarray(23) & outputarray(24) & outputarray(25) &
outputarray(26) & outputarray(27) & outputarray(28) &
outputarray(29) & outputarray(30))
If X > 30 Then
outputstring$
= ("3' 30 bases are: " & outputstring$)
Else: outputstring$ = ("3'
Sequence is " & outputstring$)
End If
MsgBox (outputstring$)
Else: MsgBox ("No sequence selected")
End If
End Sub
Sub AAcomp()
'
' AAcomp Macro
' Macro created 07/09/99 by
Gerry Shaw
'
X = Len(Selection.Text)
For Number = 1 To X
Select
Case Mid$(Selection.Text, Number, 1)
Case "A"
A = A + 1
Case
"C"
C = C + 1
Case "D"
D = D + 1
Case "E"
E = E + 1
Case "F"
F = F + 1
Case "G"
G = G + 1
Case "H"
H = H +
1
Case "I"
i = i + 1
Case "K"
K = K + 1
Case "L"
L = L
+ 1
Case "M"
M = M + 1
Case "N"
N = N + 1
Case "P"
P =
P + 1
Case "Q"
Q = Q + 1
Case "R"
R = R + 1
Case "S"
S
= S + 1
Case "T"
T = T + 1
Case "V"
V = V + 1
Case "W"
W = W + 1
Case "Y"
Y = Y + 1
Case Else
Z = Z + 1
End
Select
Next Number
If X > 1 Then
total = A + C + D + E + F + G +
H + i + K + L + N + M + P + Q + R + S + T + V + W + Y
A = Format(A * 100 /
total, "fixed")
C = Format(C * 100 / total, "fixed")
D = Format(D * 100
/ total, "fixed")
E = Format(E * 100 / total, "fixed")
F = Format(F *
100 / total, "fixed")
G = Format(G * 100 / total, "fixed")
H = Format(H
* 100 / total, "fixed")
i = Format(i * 100 / total, "fixed")
K =
Format(K * 100 / total, "fixed")
L = Format(L * 100 / total, "fixed")
M
= Format(M * 100 / total, "fixed")
N = Format(N * 100 / total, "fixed")
P = Format(P * 100 / total, "fixed")
Q = Format(Q * 100 / total,
"fixed")
R = Format(R * 100 / total, "fixed")
S = Format(S * 100 /
total, "fixed")
T = Format(T * 100 / total, "fixed")
V = Format(V * 100
/ total, "fixed")
W = Format(W * 100 / total, "fixed")
Y = Format(Y *
100 / total, "fixed")
End If
If X - Z > 0 Then
MsgBox (" Ala = "
& A & "%" & Chr(13) & " Cys = " & C & "%" & Chr(13)
& " Asp = " & D & "%" & Chr(13) & " Glu = " & E &
"%" & Chr(13) & " Phe = " & F & "%" & Chr(13) & " Gly =
" & G & "%" & Chr(13) & " His = " & H & "%" &
Chr(13) & " Iso = " & i & "%" & Chr(13) & " Lys = " & K
& "%" & Chr(13) & " Leu = " & L & "%" & Chr(13) & "
Met = " & M & "%" & Chr(13) & " Asn = " & N & "%" &
Chr(13) & " Pro = " & P & "%" & Chr(13) & " Gln = " & Q
& "%" & Chr(13) & " Arg = " & R & "%" & Chr(13) & "
Ser = " & S & "%" & Chr(13) & " Thr = " & T & "%" &
Chr(13) & " Val = " & V & "%" & Chr(13) & " Trp = " & W
& "%" & Chr(13) & " Tyr = " & Y & "%" & Chr(13))
Else: MsgBox ("No sequence selected")
End If
End Sub