Вы находитесь на странице: 1из 4

#!/usr/bin/env python # -*- coding: utf8 -*# ################################################### # pdf2mp3.py - little script/program to convert a # pdf-file or ascii-file (.dat, .

txt) into a mp3 audio or wav file # # Copyright (C) 2010 Hannes Rennau # hannes@bolding-burchard.com # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the # Free Software Foundation, Inc., # 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # ################################################### # LIST OF PACKAGES NEEDED: # you need to install the following packages: # sudo apt-get install python poppler-utils festival festvox-rablpc16k # lame espeak # HOW TO USE: # 1.create a file with the name pdf2mp3 and copy the content of # the whole text in there # 2.make the file an executable via: # >>> chmod +x pdf2mp3 # 3.copy file to /usr/bin to make usage of program possible from everywhere # on your computer: >>> sudo cp pdf2mp3 /usr/bin/ # 4.after that get help calling: # pdf2mp3 -h # # 5.EXAMPLE: # you want to convert yourfilename.pdf into a mp3 file, then just type: # pdf2mp3 -v en -f yourfilename.pdf -o yourfilename.mp3 # (for the english voice 'en', for german voice 'de', # type: espeak --voices to get list of voices available on your system) # # # edited by busfahrer, November 2010

import os,sys import string import re from subprocess import call from optparse import OptionParser as op def main(): """Parses command line """ parser = op(usage='%prog -s integer[optional] -g integer[optional] \ -v [de,en,...] -f filename[.pdf|.txt|.dat] \ -o filename[.wav|.mp3] [optional: --ascii]', description='This script convertes ASCII files (basically those files with \ extension .txt or .dat) or pdf files into an mp3 (or wav) \ audio file.',version=r'$v0.5$') parser.add_option('-s','--speed', type='string', metavar='INTEGER', help='Speed in words per minute, 80 to 390, \ default is 170.') parser.add_option('-g','--gap', type='string', metavar='INTEGER', help='Word gap.Pause between words,units of 10mS at the \ default speed.') parser.add_option('-v','--voice', type='string',metavar='VOICENAME', help='name of the voice to be used.\ type: ***espeak --voices*** to get list of available \ voices on your system.') parser.add_option('-f','--file', type='string',metavar='SOURCEFILENAME', help='input path of file to read (and late on convert to \ audio file).This can be a pdf or \ ascii (.txt or .dat) file. extension must be given!') parser.add_option('-o','--output', type='string',metavar='OUTPUTFILENAME', help='Output filename (with extension .wav or .mp3 that \ the program knows which audio format you want.)') options,args = parser.parse_args() if options.voice is None: print 'no voice name given, use -v voicename \ [type ***espeak --voices*** for list of available voices]' return 2 filename_inp = str(options.file) filename_out = str(options.output)

language = str(options.voice) speed = str(options.speed) gapless = str(options.gap) result = convert(options.file, options.output, options.voice, options.speed, options.gap) if result is not None: print result return 2 def convert(filename_inp, filename_out, language, speed, gapless): if not os.path.isfile(filename_inp): return '*** input file %s does not exist ***' % filename_inp extension_out = os.path.splitext(filename_out)[1] if extension_out not in('.wav', '.mp3'): return 'please decide whether you want wav or mp3 format by \ typing -o filename.wav or -o filename.mp3' extension = os.path.splitext(filename_inp)[1] if extension not in('.dat', '.txt', '.pdf'): return '*** input file does not have extension (.txt, .dat, .pdf) ***' if extension == '.pdf': print 'converting pdf file: %s to ASCII\n' % filename_inp pdf_convert_to_ascii(filename_inp) filename_inp = "%s.txt" % filename_inp[:-len(extension)] convert_to_wav(filename_inp, filename_out, language, speed, gapless) if extension_out == '.mp3': convert_wav_2_mp3(filename_out) def pdf_convert_to_ascii(input_pdf_file): call(['pdftotext', input_pdf_file, input_pdf_file[:-4] + '.txt']) def convert_to_wav(filename_inp, filename_out, language, speed, gapless): print 'converting %s to %s\n'% (filename_inp, filename_out[:-4] + '.wav') with open(filename_inp, 'r') as infile: text = infile.read() os.remove(filename_inp) edited_text = re.sub('[^a-zA-Z .,!?\n]', '', text) with open('edited_text.txt', 'w') as f: f.write(edited_text) cmd = ['espeak', '-v', language, '-f', 'edited_text.txt', '-w', filename_out[:-4] + '.wav'] if speed is not None: cmd.insert(1, speed) cmd.insert(1, '-s') if gapless is not None: cmd.insert(1, gapless)

cmd.insert(1, '-g') call(cmd) os.remove('edited_text.txt') def convert_wav_2_mp3(filename_out): print 'converting %s to %s\n'% (filename_out[:-4] + '.wav', filename_out[:-4] + '.mp3') retcode = call(['lame', '-f', filename_out[:-4] + '.wav', filename_out[:-4] + '.mp3']) if retcode == 0: print 'Datei %s wurde erfolgreich erstellt'% filename_out os.remove(filename_out[:-4] + '.wav') if __name__=='__main__': ret = main() sys.exit(ret)