ocrscrape.py --- fiddling with long lines

2011-12-06 16:34:58 -06:00 · 2011-12-06 16:34:58 -06:00 · 943279ef0b
commit 943279ef0b
parent 0559a51b52
1 changed files with 12 additions and 6 deletions
--- a/ocrscrape.py
+++ b/ocrscrape.py
@ -78,7 +78,8 @@ class OCRListScraper:
            len_all = len(self.ocrs)
            ocrs_set = set(self.ocrs)
            self.ocrs = list(ocrs_set)
-            print(' done! ({0:d} found ({1:d} duplicates))\n'.format(len(self.ocrs), len_all-len(self.ocrs)))
+            print(' done! ({0:d} found ({1:d} duplicates))\n'.format(len(self.ocrs),
+                  len_all-len(self.ocrs)))

 class OCRScraperWorker(threading.Thread):

@ -112,22 +113,27 @@ class OCRScraperWorker(threading.Thread):
            if os.path.isfile(filename):
                if self._file_checksum_matches(filename, ocr):
                    os.utime(filename, None)
-                    sys.stdout.write('[{0:s}] \033[92m*\033[0m {1:s} already downloaded\n'.format(self.thread_id, ocr.title.encode('utf-8')))
+                    sys.stdout.write('[{0:s}] \033[92m*\033[0m {1:s} already downloaded\n'
+                                     .format(self.thread_id, ocr.title.encode('utf-8')))
                else:
-                    sys.stdout.write('[{0:s}] \033[93m!\033[0m downloading {1:s} (checksum failure)\n'.format(self.thread_id, ocr.title.encode('utf-8')))
+                    sys.stdout.write('[{0:s}] \033[93m!\033[0m downloading {1:s} (checksum failure)\n'
+                                     .format(self.thread_id, ocr.title.encode('utf-8')))
                    do_download = True
            else:
                # download
-                sys.stdout.write('[{0:s}] \033[91mx\033[0m downloading {1:s}\n'.format(self.thread_id, ocr.title.encode('utf-8')))
+                sys.stdout.write('[{0:s}] \033[91mx\033[0m downloading {1:s}\n'.format(self.thread_id,
+                                 ocr.title.encode('utf-8')))
                do_download = True

            if do_download:
                urllib.urlretrieve(ocr.urls[random.randint(0, len(ocr.urls)-1)], filename)
                if not self._file_checksum_matches(filename, ocr):
-                    sys.stdout.write('[{0:s}] \033[93m!\033[0m {1:s} still mismatches, probably OCR listing problem\n'.format(self.thread_id, ocr.title.encode('utf-8')))
+                    sys.stdout.write(
+                        '[{0:s}] \033[93m!\033[0m {1:s} still mismatches, probably OCR listing problem\n'
+                        .format(self.thread_id, ocr.title.encode('utf-8')))

    def _identify_ocr_data_from_url(self, url):
-        """Given the input URL, turn the relevant data into an OCR object for later checking and downloading."""
+        """Given the input URL, turn the relevant data into an OCR object for later use. """

        ocr = self.OCR()
        ocr.num = '0'