As I switched from IBM Connection to WordPress for my blog, I started thinking about my existing content. Was there a way to move them all over without having to manually copy and paste and recreate all 268 entries?
Well, there is, and this is how I did it, using just a few tools. First I used Wget to retrieve my old blog. This put all the posts on one folder (entries), and all images in another (resource). It was then a simple task to write a Lotusscript agent that processed each file in that folder and read the content, parsed out the title, date originally posted and HTML for the blog post itself. I put that data into separate Notes documents, after performing some cleanup and string replacement.
I had already moved all images to a filer on my primary web server, so I performed a replace of the image URLs in the HTML, to have any images pointing to their new location. I also had to fix some special characters and replace them with the corresponding HTML entities.
Now when I had all the data, I just wrote another agent to export the data out again, to create a CSV file. I then installed a CSV importer in my WordPress blog and used to to import the file I just created.
After a few tweaks I performed a successful import. Later I realized I had missed a few special characters, so I had to fix those entries, but we are talking about 4 or 5, out of 268 entries.
If there is an interest, I might clean up the code a little and create a nicer UI (right now many of the values like path and URL are hard-coded) and then release the code if anyone else is planning to go through the same exercise. Below is the existing code to read the blog entries into a simple Notes database.
Option Public
Option Declare
Dim entrydir As String
Dim resourcedir As String
Sub Initialize
Dim filename As String
Dim cnt List As Integer
Dim blogentry List As String
Dim tst As Variant
entrydir = "D:\BleedYellowBlog\www.bleedyellow.com\blogs\texasswede\entry\"
resourcedir = "D:\BleedYellowBlog\www.bleedyellow.com\blogs\texasswede\resource\"
cnt("Total") = 0
filename = Dir$(entrydir + "*.*")
Do While fileName <> ""
blogentry(filename) = entrydir + filename
cnt("Total") = cnt("Total") + 1
fileName = Dir$()
Loop
cnt("Processed") = 0
ForAll be In blogentry
cnt("Processed") = cnt("Processed") + 1
Print "Processing " & cnt("Processed") & " of " & cnt("Total")
Call ProcessBlogEntry(ListTag(be),be)
End ForAll
End Sub
Function FixHTML(html As String) As String
Dim tmp As String
tmp = Replace(html,_
"https://www.bleedyellow.com/blogs/texasswede/resource/",_
"http://www.texasswede.com/blogfiles/resource/")
tmp = Replace(tmp,_
"http://www.bleedyellow.com/blogs/texasswede/resource/",_
"http://www.texasswede.com/blogfiles/resource/")
tmp = Replace(tmp,"/BLOGS_UPLOADED_IMAGES/","/uploaded_images/")
tmp = Replace(tmp,"´",|"´"|)
tmp = Replace(tmp,"’","´")
tmp = Replace(tmp,"“",|"|)
tmp = Replace(tmp,"”",|"|)
tmp = Replace(tmp,"…",|"..."|)
tmp = Replace(tmp,"<wbr>",||)
tmp = Replace(tmp,"> < ",|>&anp;nbsp;< |)
FixHTML = tmp
End Function
Function ProcessBlogEntry(filename As String, localpath As String) As Boolean
Dim session As New NotesSession
Dim db As NotesDatabase
Dim blogentry As NotesDocument
Dim rtitem As NotesRichTextItem
Dim siteurl As String
Dim html List As String
Dim tmp As String
Dim import As Boolean
Dim titlesection As Boolean
Dim row As Integer
Dim currow As Integer
Dim titletext As string
Dim htmltext As String
Dim title As String
Dim posteddate As String
import = False
titlesection = False
row = 0
Open localpath For Input As #1 charset="UTF-8"
Do Until EOF(1)
Line Input #1, tmp
If InStr(tmp,|class="entryContentContainer"|) > 0 Then
import = True
End If
If import = True Then
If InStr(LCase(tmp),|<!-- rating -->|) > 0 Then
import = False
End If
End If
If InStr(LCase(tmp),|<!-- entry title and info -->|) > 0 Then
titlesection = True
End If
If titlesection = True Then
If InStr(LCase(tmp),|<!-- user name, date, meta info -->|) > 0 Then
titlesection = False
End If
End If
If titlesection = True Then
titletext = titletext + tmp
End If
If InStr(LCase(tmp),|blogsdate.date.localize|) > 0 Then
posteddate = StrLeft(StrRight(tmp,"localize ("),"));")
End If
If import = True Then
row = row + 1
html(CStr(row)) = tmp
End If
Loop
Close #1
Set db = session.CurrentDatabase
Set blogentry = New NotesDocument(db)
blogentry.Form = "Blog Entry"
title = Replace(FullTrim(StrLeft(StrRight(titletext,"<h4>"),"</h4>")),"@amp;quot;",|"|)
Set rtitem = New NotesRichTextItem(blogentry,"Content")
posteddate = Format$(JSMillisecondsToLSDate(CDbl(posteddate)),"mm/dd/yyyy hh:nn") + " GMT"
siteurl = "http://www.bleedyellow.com/blogs/texasswede/"
Call blogentry.ReplaceItemValue("Title", title)
Call blogentry.ReplaceItemvalue("PostedDate", posteddate)
Call blogentry.ReplaceItemValue("OriginalURL", siteurl + filename)
currow = 0
ForAll t In html
currow = currow + 1
If InStr(t, |class="entryContentContainer"|)>0 Then
' Do nothing
Else
If currow < row-2 Then
Call rtitem.AppendText(fixhtml(t))
Call rtitem.AddNewLine(1,true)
End If
End If
End ForAll
Call blogentry.ComputeWithForm(True,False)
Call blogentry.Save(True,True)
End Function
Function JSMillisecondsToLSDate(millis As Double) As Variant
Dim ndt As NotesDateTime
Dim zoneOffset As Integer
Dim jsEpochDouble As Double, adjustedEpochDouble As Double, millisDateDouble As Double
%REM
JavaScript millisecond values are based on GMT
but writable LotusScript date/time values are local.
We need to know the local timezone offset from GMT,
and for that we need a NotesDateTime object
with both date and time components
%END REM
Set ndt = New NotesDateTime(Now)
zoneOffset = ndt.TimeZone
'The JavaScript epoch is midnight (day start) January 1, 1970 GMT
jsEpochDouble = CDbl(DateNumber(1970,1,1))
'Adjust epoch to local time
adjustedEpochDouble = jsEpochDouble - (zoneOffset/24)
'There are 86400000 milliseconds in a day
millisDateDouble = adjustedEpochDouble + (millis / 86400000)
JSMillisecondsToLSDate = CDat(millisDateDouble)
End Function
And here is the agent to export the documents to a CSV file that can be imported into a WordPress blog using the CSV import plugin.
Option Public
Option Declare
Sub Initialize
Dim session As New NotesSession
Dim db As NotesDatabase
Dim view As NotesView
Dim doc As NotesDocument
Dim filename As String
filename = "d:\bleedyellow.csv"
Open filename For Output As #1
Print #1, |"csv_post_title","csv_post_post",| + _
|"csv_post_type","csv_post_excerpt",| + _
|"csv_post_categories","csv_post_tags",| + _
|"csv_post_date","custom_field_1","custom_field_2"|
Set db = session.Currentdatabase
Set view = db.GetView("By Title")
Set doc = view.GetFirstDocument
Do Until doc Is Nothing
Print #1, GetCSV(doc)
Set doc = view.GetNextDocument(doc)
Loop
Close #1
End Sub
Function GetCSV(doc As NotesDocument) As String
Dim rtitem As NotesRichTextItem
Dim tmp As String
Dim content As String
Set rtitem = doc.Getfirstitem("Content")
content = Replace(FullTrim(rtitem.GetUnformattedText()),|"|,|""|)
tmp = |"| + Replace(doc.GetItemValue("Title")(0),|"|,|""|) + |",|
tmp = tmp + |"| + content + |",|
tmp = tmp + ",,"
tmp = tmp +|"| + "Old Blog Post" + |",|
tmp = tmp +|"| + doc.GetItemValue("Tags")(0) + |",|
tmp = tmp +|"| + doc.GetItemValue("PostedDate")(0) + |",,,|
GetCSV = tmp
End Function