"Useful Microsoft Word Macros for Molecular Biologists and Protein Chemists", as published in Biotechniques June 2000, volume 28:1198-1201, written by Gerry Shaw (email shaw@ufbi.ufl.edu)

The point of this publication, which you can read in the journal or possibly download from http://www.biotechniques.com, is to make use of the Visual Basic programming language built into recent version of Microsoft Word to perform useful manipulations on protein and nucleic acid sequences. The source code for these programs is listed below, and can be simply copied from this page and pasted into the "macros" menu of Microsoft Word. The first Macro, called IEP, calculates the isoelectric point of a mouse selected protein sequence. Sequence must be in upper case letters. Note one common problem when cutting these macros from the browser page and pasting them into the Visual Basic compiler is that the browser will probably add a line feed command into the longer lines of code, which will mess things up. A alternate approach is get the whole collection of macros by ftp/fetch from here. Initial responses to this approach have been very positive, so please email if you have any comments, problems, suggestions for improvements or ideas for other macros that would be useful.

Sub IEP()
'
' IEP Macro
' Macro created 07/01/98 by Gerry Shaw
'
X = Len(Selection.Text)
If X = 0 Then MsgBox ("No selection made")

For YY= 1 To X
Select Case Mid$(Selection.Text, YY, 1)
Case "C"
C = C + 1
Case "D"
D = D + 1
Case "E"
E = E + 1
Case "H"
H = H + 1
Case "K"
K = K + 1
Case "R"
R = R + 1
Case "Y"
Y = Y + 1
End Select
Next YY

' Define array of nine elements containing the pKa values of amino acids, N and C-termini

Dim pK(8) As Double

pK(0) = 0.000446 ' C-term
pK(1) = 0.0000851 ' Glu
pK(2) = 0.000126 ' Asp
pK(3) = 0.000000000661 ' Lys
pK(4) = 0.00000000102 ' Arg
pK(5) = 0.00000000447 ' Cys
pK(6) = 0.000000912 ' His
pK(7) = 0.000000000776 ' Tyr
pK(8) = 0.000000000166 ' N-term

For pH = 2 To 12 Step 0.1

' calculates HH, the proton concentration

HH = Exp(-pH * Log(10))

' positive charge is function of proton conc, number of K, R and H and N-terminus

pcharge = HH * K / (HH + pK(3)) + HH * R / (HH + pK(4)) + HH * H / (HH + pK(6)) + HH / (HH + pK(8))

' negative charge is function of proton conc, number of Tyr, Cys, Glu, Asp and C-terminus

ncharge = Y * (1 - (HH / (HH + pK(7)))) + C * (1 - (HH / (HH + pK(5)))) + 1 - (HH / (HH + pK(1))) + E * (1 - (HH / (HH + pK(1)))) + D * (1 - (HH / (HH + pK(2))))

' exits for loop when pcharge is less than ncharge

If (pcharge <= ncharge) Then Exit For
Next pH
MsgBox ("pKa = " & Format(pH, "fixed"))
End Sub

The second macro, Nucweight, determines the molecular weight of a mouse selected DNA sequence. Sequence for this version must be uppercase text. To make it read lowercase, which is what you get with most sequence databases nowadays, change the Case commands; for example change Case "A" to Case "a" and make corresponding changes to the other Case commands.


Sub Nucweight()
'
' Nucweight Macro
' Macro recorded 07/02/98 by Gerry Shaw
' This version for DNA sequences: for RNA MW for A = 329.2, U = 306.1, G = 345.2, C= 305.2
'
X = Len(Selection.Text)
For YY = 1 To X
Select Case Mid$(Selection.Text, YY, 1)
Case "A"
MW = MW + 313.2
Case "T"
MW = MW + 304.2
Case "G"
MW = MW + 329.2
Case "C"
MW = MW + 289.2
Case Else
Z = Z + 1
End Select
Next YY
MW = MW + 18
If (MW > 18) Then MsgBox ("Selection includes " & X - Z & " bases, " & "Molecular Weight= " & MW & " Daltons")
If (MW = 18) Then MsgBox ("No sequence selected")
End Sub

This macro determines counts the number of amino acids and determines the molecular weight of a mouse selected protein sequence

Sub Protweight()
'
' Protweight Macro
' Macro recorded 07/02/98 by Gerry Shaw
'
X = Len(Selection.Text)
For YY = 1 To X
Select Case Mid$(Selection.Text, YY, 1)
Case "A"
MW = MW + 71.09
Case "C"
MW = MW + 103.15
Case "D"
MW = MW + 115.1
Case "E"
MW = MW + 129.13
Case "F"
MW = MW + 147.19
Case "G"
MW = MW + 57.07
Case "H"
MW = MW + 137.16
Case "I"
MW = MW + 113.17
Case "K"
MW = MW + 128.19
Case "L"
MW = MW + 113.17
Case "M"
MW = MW + 131.31
Case "N"
MW = MW + 114.12
Case "P"
MW = MW + 97.13
Case "Q"
MW = MW + 128.15
Case "R"
MW = MW + 156.2
Case "S"
MW = MW + 87.09
Case "T"
MW = MW + 101.12
Case "V"
MW = MW + 99.15
Case "W"
MW = MW + 186.23
Case "Y"
MW = MW + 163.19
Y = Y + 1
Case Else
Z = Z + 1
End Select
Next YY
MW = MW + 18
If MW > 18 Then MsgBox ("Selection includes " & X - Z & " amino acids, Molecular Weight= " & Format(MW, "fixed") & " Daltons") Else MsgBox ("No Sequence Selected")
End Sub

This macro decodes mouse selected DNA sequence in all three forward reading frames.


Sub Decode()
'
' Decode Macro
' Macro created 07/19/98 by Gerry Shaw
'
' To decode longer sequence increase number of elements in outputarray()
' and add appropriate values to readingframe1$, readingframe2$ and readingframe3$ strings

Dim i As Integer

Dim outputarray(24) As String
X = Len(Selection.Text)
If X > 30 Then X = 30

For i = 3 To X
codon = 0
Select Case Mid$(Selection.Text, i - 2, 1)
Case "T"
codon = 0
Case "C"
codon = 16
Case "A"
codon = 32
Case "G"
codon = 48
End Select

Select Case Mid$(Selection.Text, i - 1, 1)
Case "T"
codon = codon + 0
Case "C"
codon = codon + 4
Case "A"
codon = codon + 8
Case "G"
codon = codon + 12
End Select

Select Case Mid$(Selection.Text, i, 1)
Case "T"
codon = codon + 0
Case "C"
codon = codon + 1
Case "A"
codon = codon + 2
Case "G"
codon = codon + 3
End Select


If codon >= 0 Then outputarray(i) = "F"
If codon >= 2 Then outputarray(i) = "L"
If codon >= 6 Then outputarray(i) = "S"
If codon >= 8 Then outputarray(i) = "Y"
If codon >= 10 Then outputarray(i) = "*"
If codon >= 12 Then outputarray(i) = "C"
If codon = 14 Then outputarray(i) = "*"
If codon = 15 Then outputarray(i) = "W"
If codon >= 18 Then outputarray(i) = "L"
If codon >= 20 Then outputarray(i) = "P"
If codon >= 24 Then outputarray(i) = "H"
If codon >= 26 Then outputarray(i) = "Q"
If codon >= 28 Then outputarray(i) = "R"
If codon >= 32 Then outputarray(i) = "I"
If codon = 35 Then outputarray(i) = "M"
If codon >= 36 Then outputarray(i) = "T"
If codon >= 40 Then outputarray(i) = "N"
If codon >= 42 Then outputarray(i) = "K"
If codon >= 44 Then outputarray(i) = "S"
If codon >= 46 Then outputarray(i) = "R"
If codon >= 48 Then outputarray(i) = "V"
If codon >= 52 Then outputarray(i) = "A"
If codon >= 56 Then outputarray(i) = "D"
If codon >= 58 Then outputarray(i) = "E"
If codon >= 60 Then outputarray(i) = "G"

Next i
If X > 0 Then
readingframe1$ = outputarray(3) & outputarray(6) & outputarray(9) & outputarray(12) & outputarray(15) & outputarray(18) & outputarray(21) & outputarray(24)
readingframe2$ = outputarray(1) & outputarray(4) & outputarray(7) & outputarray(10) & outputarray(13) & outputarray(16) & outputarray(19) & outputarray(22)
readingframe3$ = outputarray(2) & outputarray(5) & outputarray(8) & outputarray(11) & outputarray(14) & outputarray(17) & outputarray(20) & outputarray(23)
MsgBox ("Reading Frame 1: " & readingframe1$ & Chr$(13) & "Reading Frame 2: " & readingframe2$ & Chr$(13) & "Reading frame 3: " & readingframe3$)
Else: MsgBox ("No sequence selected")
End If

End Sub

Sub Tm()
'
' Tm Macro
' Macro created 07/30/99 by Gerry Shaw
'
X = Len(Selection.Text)
For YY = 1 To X
Select Case Mid$(Selection.Text, YY, 1)
Case "A"
N = N + 1
AT = AT + 1
Case "T"
N = N + 1
AT = AT + 1
Case "G"
N = N + 1
GC = GC + 1
Case "C"
N = N + 1
GC = GC + 1
Case Else
Z = Z + 1
End Select
Next YY
If N = 0 Then MsgBox ("No sequence selected")
If N <= 18 Then
melt = AT * 2 + GC * 4
MsgBox ("There are " & N & " Bases selected, GC content is " & Format(100 * GC / N, "fixed") & "%" & Chr(13) & "Tm = " & Format(melt, "fixed") & " Degrees Centigrade, using the method of Itakura et al., good for oligos of 18 bases or less")
End If
If N > 18 Then
melt = 81.5 + 16.6 * Log(0.15) / Log(10) + 0.41 * ((100 * GC) / N) - (600 / N)
MsgBox ("There are " & N & " Bases selected, GC content is " & Format(100 * GC / N, "fixed") & "%" & Chr(13) & "Tm = " & Format(melt, "fixed") & " Degrees Centigrade, using method of Bolton and McCarthy, good for oligos larger than 18 base ")
End If
End Sub

Sub Reformat()
'
' Reformat Macro
' Macro created 1/5/00 by Gerry Shaw
'

Selection.Find.Execute FindText:="0", ReplaceWith:="", Replace:=wdReplaceAll
Selection.Find.Execute FindText:="1", ReplaceWith:="", Replace:=wdReplaceAll
Selection.Find.Execute FindText:="2", ReplaceWith:="", Replace:=wdReplaceAll
Selection.Find.Execute FindText:="3", ReplaceWith:="", Replace:=wdReplaceAll
Selection.Find.Execute FindText:="4", ReplaceWith:="", Replace:=wdReplaceAll
Selection.Find.Execute FindText:="5", ReplaceWith:="", Replace:=wdReplaceAll
Selection.Find.Execute FindText:="6", ReplaceWith:="", Replace:=wdReplaceAll
Selection.Find.Execute FindText:="7", ReplaceWith:="", Replace:=wdReplaceAll
Selection.Find.Execute FindText:="8", ReplaceWith:="", Replace:=wdReplaceAll
Selection.Find.Execute FindText:="9", ReplaceWith:="", Replace:=wdReplaceAll
Selection.Find.Execute FindText:=" ", ReplaceWith:="", Replace:=wdReplaceAll
Selection.Find.Execute FindText:="^p", ReplaceWith:="", Replace:=wdReplaceAll
Selection.Find.Execute FindText:="-", ReplaceWith:="", Replace:=wdReplaceAll
Selection.Find.Execute FindText:=",", ReplaceWith:="", Replace:=wdReplaceAll

End Sub
Sub Reverse()
'
' Reverse Macro
' Macro created 1/19/00 by Gerry Shaw
'
Dim i As Integer

Dim outputarray(36) As String
X = Len(Selection.Text)
If X > 30 Then limit = X - 30 Else limit = 1

For i = limit To X
Select Case Mid$(Selection.Text, i, 1)
Case "T"
outputarray(X + 1 - i) = "A"
Z = Z + 1
Case "C"
outputarray(X + 1 - i) = "G"
Z = Z + 1
Case "A"
outputarray(X + 1 - i) = "T"
Z = Z + 1
Case "G"
outputarray(X + 1 - i) = "C"
Z = Z + 1
Case "t"
outputarray(X + 1 - i) = "A"
Z = Z + 1
Case "c"
outputarray(X + 1 - i) = "G"
Z = Z + 1
Case "a"
outputarray(X + 1 - i) = "T"
Z = Z + 1
Case "g"
outputarray(X + 1 - i) = "C"
Z = Z + 1
End Select
Next

If Z > 0 Then
outputstring$ = (outputarray(1) & outputarray(2) & outputarray(3) & outputarray(4) & outputarray(5) & outputarray(6) & outputarray(7) & outputarray(8) & outputarray(9) & outputarray(10) & " " & outputarray(11) & outputarray(12) & outputarray(13) & outputarray(14) & outputarray(15) & outputarray(16) & outputarray(17) & outputarray(18) & outputarray(19) & outputarray(20) & " " & outputarray(21) & outputarray(22) & outputarray(23) & outputarray(24) & outputarray(25) & outputarray(26) & outputarray(27) & outputarray(28) & outputarray(29) & outputarray(30))
If X > 30 Then
outputstring$ = ("3' 30 bases are: " & outputstring$)
Else: outputstring$ = ("3' Sequence is " & outputstring$)
End If
MsgBox (outputstring$)
Else: MsgBox ("No sequence selected")
End If

End Sub

Sub AAcomp()
'
' AAcomp Macro
' Macro created 07/09/99 by Gerry Shaw
'
X = Len(Selection.Text)
For Number = 1 To X
Select Case Mid$(Selection.Text, Number, 1)
Case "A"
A = A + 1
Case "C"
C = C + 1
Case "D"
D = D + 1
Case "E"
E = E + 1
Case "F"
F = F + 1
Case "G"
G = G + 1
Case "H"
H = H + 1
Case "I"
i = i + 1
Case "K"
K = K + 1
Case "L"
L = L + 1
Case "M"
M = M + 1
Case "N"
N = N + 1
Case "P"
P = P + 1
Case "Q"
Q = Q + 1
Case "R"
R = R + 1
Case "S"
S = S + 1
Case "T"
T = T + 1
Case "V"
V = V + 1
Case "W"
W = W + 1
Case "Y"
Y = Y + 1
Case Else
Z = Z + 1
End Select
Next Number
If X > 1 Then
total = A + C + D + E + F + G + H + i + K + L + N + M + P + Q + R + S + T + V + W + Y
A = Format(A * 100 / total, "fixed")
C = Format(C * 100 / total, "fixed")
D = Format(D * 100 / total, "fixed")
E = Format(E * 100 / total, "fixed")
F = Format(F * 100 / total, "fixed")
G = Format(G * 100 / total, "fixed")
H = Format(H * 100 / total, "fixed")
i = Format(i * 100 / total, "fixed")
K = Format(K * 100 / total, "fixed")
L = Format(L * 100 / total, "fixed")
M = Format(M * 100 / total, "fixed")
N = Format(N * 100 / total, "fixed")
P = Format(P * 100 / total, "fixed")
Q = Format(Q * 100 / total, "fixed")
R = Format(R * 100 / total, "fixed")
S = Format(S * 100 / total, "fixed")
T = Format(T * 100 / total, "fixed")
V = Format(V * 100 / total, "fixed")
W = Format(W * 100 / total, "fixed")
Y = Format(Y * 100 / total, "fixed")
End If
If X - Z > 0 Then
MsgBox (" Ala = " & A & "%" & Chr(13) & " Cys = " & C & "%" & Chr(13) & " Asp = " & D & "%" & Chr(13) & " Glu = " & E & "%" & Chr(13) & " Phe = " & F & "%" & Chr(13) & " Gly = " & G & "%" & Chr(13) & " His = " & H & "%" & Chr(13) & " Iso = " & i & "%" & Chr(13) & " Lys = " & K & "%" & Chr(13) & " Leu = " & L & "%" & Chr(13) & " Met = " & M & "%" & Chr(13) & " Asn = " & N & "%" & Chr(13) & " Pro = " & P & "%" & Chr(13) & " Gln = " & Q & "%" & Chr(13) & " Arg = " & R & "%" & Chr(13) & " Ser = " & S & "%" & Chr(13) & " Thr = " & T & "%" & Chr(13) & " Val = " & V & "%" & Chr(13) & " Trp = " & W & "%" & Chr(13) & " Tyr = " & Y & "%" & Chr(13))
Else: MsgBox ("No sequence selected")
End If

End Sub