I did the same things before, the main idea is converting html to markdown and keep it to workflowy,
I write one piece of code to execute the conversion work.
python
import sys
import os
from bs4 import BeautifulSoup
def remove_linefeed(text):
text = text.replace('\n', '')
return text
def convert_to_markdown(text, class_):
if class_ == 'bodyContainer':
text = ''
elif class_ == 'notebookFor':
text = '# ' + text + ' '
elif class_ == 'bookTitle':
text = text + '\n\n'
elif class_ == 'authors':
text = '**' + text + '**' + '\n\n'
elif class_ == 'citation':
text = ''
elif class_ == 'sectionHeading':
text = '## ' + text + '\n\n'
elif class_ == 'noteHeading':
if text.split('-')[0].strip() in ['Note', 'Bookmark']:
text = '*' + text + '*' + '\n\n'
else:
text = '*' + text + '*' + '\n\n' + '> '
elif class_ == 'noteText':
text = text + '\n\n'
return text
def main():
file_name = sys.argv[1]
prefix, suffix = os.path.splitext(file_name)
soup = BeautifulSoup(open(file_name, encoding='utf-8'), 'lxml')
text = ''
for div in soup.select('div'):
tmp_text = remove_linefeed(div.get_text().strip())
text += convert_to_markdown(tmp_text, div['class'][0])
with open(prefix + '.md', 'w', encoding='utf-8') as f:
f.write(text)
if __name__ == '__main__':
main()