78 lines
2.3 KiB
Python
78 lines
2.3 KiB
Python
#!/usr/bin/python
|
|
|
|
from codecs import open
|
|
import sys
|
|
import os
|
|
from lxml import etree
|
|
|
|
COMMENT_DIR='comments'
|
|
|
|
def wp2comments(xml):
|
|
|
|
#xmlfile = open(xml, encoding='utf-8', mode='r').read()
|
|
tree = etree.parse(xml)
|
|
root = tree.getroot()
|
|
items = root.findall('.//item')
|
|
|
|
n = 0
|
|
|
|
if not os.path.exists(COMMENT_DIR):
|
|
os.makedirs(COMMENT_DIR)
|
|
elif not os.path.isdir(COMMENT_DIR):
|
|
print('"%s" exists but is not a directory!' % COMMENT_DIR)
|
|
sys.exit(1)
|
|
|
|
for item in items:
|
|
title = item.find('title')
|
|
if title is None: continue
|
|
|
|
# Only fetch comments from published posts.
|
|
status = item.find('wp:status', namespaces=root.nsmap)
|
|
if status is None or status.text != 'publish':
|
|
continue
|
|
|
|
slug = item.find('wp:post_name', namespaces=root.nsmap)
|
|
if slug is None:
|
|
print('WARNING: skipping "%s" with no post_name')
|
|
continue
|
|
slug = slug.text
|
|
|
|
comments = item.findall('wp:comment', namespaces=root.nsmap)
|
|
if not comments:
|
|
# No comments found for this post.
|
|
continue
|
|
|
|
for comment in comments:
|
|
def comment_tag(tag):
|
|
result = comment.find(tag, namespaces=root.nsmap)
|
|
if result is None:
|
|
return ''
|
|
else:
|
|
return result.text
|
|
|
|
status = comment_tag('wp:comment_approved')
|
|
if status != '1':
|
|
continue
|
|
|
|
author = comment_tag('wp:comment_author')
|
|
ip = comment_tag('wp:comment_author_IP')
|
|
date = comment_tag('wp:comment_date_gmt')
|
|
email = comment_tag('wp:comment_author_email')
|
|
url = comment_tag('wp:comment_author_url')
|
|
content = comment_tag('wp:comment_content')
|
|
|
|
n += 1
|
|
f = open(os.path.join(COMMENT_DIR, '%s-%d.md' % (slug, n)),
|
|
encoding='utf-8', mode='w')
|
|
f.write(u'post_id: %s\n' % (slug, ))
|
|
f.write(u'Author: %s\n' % (author, ))
|
|
f.write(u'Date: %s\n' % (date, ))
|
|
f.write(u'Author_Email: %s\n' % (email, ))
|
|
f.write(u'Author_IP: %s\n' % (ip, ))
|
|
f.write(u'\n%s\n' % (content, ))
|
|
f.close()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
wp2comments(sys.argv[1])
|