python类URLopener()的实例源码

fetchcrucom.py 文件源码 项目:Random 作者: rkramesh 项目源码 文件源码 阅读 15 收藏 0 点赞 0 评论 0
def csvComment():
     'Module for data to be fetched and parsed into csv'
     print 'started'    
     with open(sortdata, 'r') as f:
         for line in f:
               line = line.strip('\n')
               durl='http://fisheye.cuc.com/cru/'+line+'/reviewHistory.csv'
               print durl
               testfile = urllib.URLopener()
               testfile.retrieve('http://fisheye.cuc.com/cru/'+line+'/reviewHistory.csv', line+'.csv')
               with open(line+'.csv') as f:
                    columns = defaultdict(list) # each value in each column is appended to a list
                    reader = csv.DictReader(f) # read rows into a dictionary format
                    for row in reader: # read a row as {column1: value1, column2: value2,...}
                        for (k,v) in row.items(): # go over each column name and value
                            columns[k].append(v) # append the value into the appropriate list

                    d = dict(zip(zip(columns['Date'],columns['User'],columns['New value']),columns['Action']))
                    print d
                    ##print rkdict
##                    for key, value in d.iteritems():
##                        if value == 'COMMENT_CHANGED' or value == 'COMMENT_ADDED':
##                            writer = csv.writer(open('final.csv', 'ab'))
##                            for (key, value)in zip(d.items()):
##                                       writer.writerow([line, key, value ])
##                        else:
##                            print 'No Comments found for '+line
fetchcrucom.py 文件源码 项目:Random 作者: rkramesh 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def dictcsvFinalReview():
     print 'started'    
     with open(sortdata, 'r') as f:
         for line in f:
               line = line.strip('\n')
               durl='http://fisheye.cuc.com/cru/'+line+'/reviewHistory.csv'
               print durl
               testfile = urllib.URLopener()
               os.chdir(r'C:\Users\radhakrishnanr\Desktop\filescsv')
               testfile.retrieve('http://fisheye.cuc.com/cru/'+line+'/reviewHistory.csv', line+'.csv')
               columns = defaultdict(list) # each value in each column is appended to a list
               with open(line+'.csv') as f:
                    reader = csv.DictReader(f) # read rows into a dictionary format
                    for row in reader: # read a row as {column1: value1, column2: value2,...}
                        for (k,v) in row.items(): # go over each column name and value
                            columns[k].append(v) # append the value into the appropriate list
                                                 # based on column name k

               d = dict(zip(zip(columns['Date'],columns['User'],columns['New value']),columns['Action']))
               print d

##               for key, value in d.iteritems():
##                    if value == 'COMMENT_CHANGED' or value == 'COMMENT_ADDED':
##                        
##                        writer = csv.writer(open('final.csv', 'ab'))
##                        for (key, value) in zip(d,line):
##                            writer.writerow([line, key])
##                    else:
##                         print 'No Comments found for '+line
sv.py 文件源码 项目:Random 作者: rkramesh 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def csvComment():
     'Module for data to be fetched and parsed into csv'
     print 'started'    
     with open('sorted.txt', 'r') as f:
         for line in f:
               line = line.strip('\n')
               durl='http://fisheye.com/cru/'+line+'/reviewHistory.csv'
               print durl
               testfile = urllib.URLopener()
               testfile.retrieve('http://fisheye.com/cru/'+line+'/reviewHistory.csv', line+'.csv')
sv.py 文件源码 项目:Random 作者: rkramesh 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def csvFinalReview():
     print 'started'    
     with open('sorted.txt', 'r') as f:
         for line in f:
               line = line.strip('\n')
               durl='http://fisheye.com/cru/'+line+'/reviewHistory.csv'
               print durl
               testfile = urllib.URLopener()
               testfile.retrieve('http://fisheye.com/cru/'+line+'/reviewHistory.csv', line+'.csv')
               columns = defaultdict(list) # each value in each column is appended to a list
               with open(line+'.csv') as f:
                    reader = csv.DictReader(f) # read rows into a dictionary format
                    for row in reader: # read a row as {column1: value1, column2: value2,...}
                        for (k,v) in row.items(): # go over each column name and value
                            columns[k].append(v) # append the value into the appropriate list
                                                 # based on column name k

               d = dict(zip(zip(columns['Date'],columns['User'],columns['New value']),columns['Action']))
               for key, value in d.iteritems():
                    if value == 'COMMENT_CHANGED' or value == 'COMMENT_ADDED':
                        print file,key
                        try:
                            os.remove(line+'.csv')
                        except IOError:
                            pass







#csvComment()
#csvReview()
Getdata.py 文件源码 项目:gps2tec 作者: weihan1107 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def get_gimdata(self):
        import urllib, os
        if self.file_exist():
            print "No need to download GIM data..."
            return
        print "Start to download GIM data..."
        weblink = "ftp://ftp.unibe.ch/aiub/CODE/{0}/".format(self.year)
        if not os.path.isfile(self.sourcefn[:-2]): 
            if not os.path.isfile(self.sourcefn):
                download = urllib.URLopener()
                download.retrieve(weblink+self.sourcefn, self.sourcefn)
            os.system("gzip -fd {0}".format(self.sourcefn))
Getdata.py 文件源码 项目:gps2tec 作者: weihan1107 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def get_navidata(self):
        import urllib, os
        if self.file_exist():
            print "No need to download Navigation data..."
            return

        print "Start to download Navigation data..."
        if self.types in 'igslocal':
            weblink = "ftp://igscb.jpl.nasa.gov/pub/product/"
            if not (os.path.isfile(self.sourcefn_igs1) or os.path.isfile(self.sourcefn_igr1)):
                try:
                    download = urllib.URLopener()
                    download.retrieve("{0}{1:04}/{2}".format(weblink, self.dweeks1, self.sourcefn_igs1), self.sourcefn_igs1)
                    self.sourcefn1 = self.sourcefn_igs1[:-2]
                except IOError:
                    download = urllib.URLopener()
                    download.retrieve("{0}{1:04}/{2}".format(weblink, self.dweeks1, self.sourcefn_igr1), self.sourcefn_igr1)
                    self.sourcefn1 = self.sourcefn_igr1[:-2]
            if not (os.path.isfile(self.sourcefn_igs2) or os.path.isfile(self.sourcefn_igr2)):
                try:
                    download = urllib.URLopener()
                    download.retrieve("{0}{1:04}/{2}".format(weblink, self.dweeks2, self.sourcefn_igs2), self.sourcefn_igs2)
                    self.sourcefn2 = self.sourcefn_igs2[:-2]
                except IOError:
                    download = urllib.URLopener()
                    download.retrieve("{0}{1:04}/{2}".format(weblink, self.dweeks2, self.sourcefn_igr2), self.sourcefn_igr2)
                    self.sourcefn2 = self.sourcefn_igr2[:-2]
        elif self.types=='igsrt':
            weblink = "ftp://cddis.gsfc.nasa.gov/pub/gps/products/{0}/".format(self.sourcefn_igu[3:7])
            download = urllib.URLopener()
            download.retrieve(weblink+self.sourcefn_igu, self.sourcefn_igu)
            self.sourcefn = self.sourcefn_igu[:-2]
        os.system("gzip -fd *sp3.Z")
utils.py 文件源码 项目:muzi-scanner 作者: sdslabs 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def save_image(self, url, path):
        """
        :param url:
        :param path:
        :return nothing:
        """
        image = urllib.URLopener()
        image.retrieve(url, path)
misc.py 文件源码 项目:VanillaML 作者: vinhkhuc 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def download_file(url, local_path):
    dir_path = path.dirname(local_path)
    if not path.exists(dir_path):
        print("Creating the directory '%s' ..." % dir_path)
        os.makedirs(dir_path)

    urllib.URLopener().retrieve(url, local_path)
Windows-client.py 文件源码 项目:T2B-framework 作者: pielco11 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def DownHTTP(url,fileName):
    fileHTTP = urllib.URLopener()
    if fileName == "":
        fileHTTP.retrieve(url,url.split("/")[len(url.split("/"))-1])
    else:
        fileHTTP.retrieve(url,fileName)

###### setup  EDIT
Linux-client.py 文件源码 项目:T2B-framework 作者: pielco11 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def DownHTTP(url,fileName):
    fileHTTP = urllib.URLopener()
    if fileName == "":
        if os.path.isfile(url.split("/")[len(url.split("/"))-1]) == 1:
            newName = url.split("/")[len(url.split("/"))-1].split(".")[0]+"_."+url.split("/")[len(url.split("/"))-1].split(".")[1]
            fileHTTP.retrieve(url,newName)
            return " saved the file with the original name + \"_\""
        else:
            fileHTTP.retrieve(url,url.split("/")[len(url.split("/"))-1])
            return " saved the file with the original name"
    else:
        fileHTTP.retrieve(url,fileName)
        return " saved the file with the given name"
Mac-client.py 文件源码 项目:T2B-framework 作者: pielco11 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def DownHTTP(url,fileName):
    fileHTTP = urllib.URLopener()
    if fileName == "":
        fileHTTP.retrieve(url,url.split("/")[len(url.split("/"))-1])
    else:
        fileHTTP.retrieve(url,fileName)
Windows-client.py 文件源码 项目:T2B-framework 作者: pielco11 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def DownHTTP(url,fileName):
    fileHTTP = urllib.URLopener()
    if fileName == "":
        fileHTTP.retrieve(url,url.split("/")[len(url.split("/"))-1])
    else:
        fileHTTP.retrieve(url,fileName)

###### setup  EDIT
Linux-client.py 文件源码 项目:T2B-framework 作者: pielco11 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def DownHTTP(url,fileName):
    fileHTTP = urllib.URLopener()
    if fileName == "":
        if os.path.isfile(url.split("/")[len(url.split("/"))-1]) == 1:
            newName = url.split("/")[len(url.split("/"))-1].split(".")[0]+"_."+url.split("/")[len(url.split("/"))-1].split(".")[1]
            fileHTTP.retrieve(url,newName)
            return " saved the file with the original name + \"_\""
        else:
            fileHTTP.retrieve(url,url.split("/")[len(url.split("/"))-1])
            return " saved the file with the original name"
    else:
        fileHTTP.retrieve(url,fileName)
        return " saved the file with the given name"
Source.py 文件源码 项目:libSigNetSim 作者: vincent-noel 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def downloadSource(self):

        download_file = URLopener()
        download_file.retrieve(self.__url, self.__filename)
        self.__sourceAvailable = True
get_datasets.py 文件源码 项目:metaqnn 作者: bowenbaker 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def get_caltech101(save_dir=None, root_path=None):
    assert((save_dir is not None and root_path is None) or (save_dir is None and root_path is not None))

    if root_path is None:
        ctx = ssl.create_default_context()
        ctx.check_hostname = False
        ctx.verify_mode = ssl.CERT_NONE

        print 'Downloading Caltech101 dataset...'
        tar_path = os.path.join(save_dir, "101_ObjectCategories.tar.gz")
        url = urllib.URLopener(context=ctx)
        url.retrieve("https://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz", tar_path)
        print 'Download Done, Extracting...'
        tar = tarfile.open(tar_path)
        tar.extractall(save_dir)
        tar.close()

    root = os.path.join(save_dir, "101_ObjectCategories") if not root_path else root_path

    train_x = []
    train_y = []
    val_x = []
    val_y = []

    label = 0
    for cls_folder in os.listdir(root):
        cls_root = os.path.join(root, cls_folder)
        if not os.path.isdir(cls_root):
            continue

        cls_images = [misc.imread(os.path.join(cls_root, img_name)) for img_name in os.listdir(cls_root)]
        cls_images = [np.repeat(np.expand_dims(img, 2), 3, axis=2) if len(img.shape) == 2 else img for img in cls_images]
        cls_images = np.array([np.reshape(misc.imresize(img, (224,224,3)), (3,224,224)) for img in cls_images])
        new_index = np.random.permutation(np.arange(cls_images.shape[0]))
        cls_images = cls_images[new_index, :, :, :]

        train_x.append(cls_images[:30])
        train_y.append(np.array([label]*30))
        if len(cls_images) <= 80:
            val_x.append(cls_images[30:])
            val_y.append(np.array([label]*(len(cls_images)-30)))
        else:
            val_x.append(cls_images[30:80])
            val_y.append(np.array([label]*50))
        label += 1

    Xtr = np.concatenate(train_x)
    Ytr = np.concatenate(train_y)
    Xval= np.concatenate(val_x)
    Yval= np.concatenate(val_y)

    print 'Xtr shape ', Xtr.shape
    print 'Ytr shape ', Ytr.shape
    print 'Xval shape ', Xval.shape
    print 'Yval shape ', Yval.shape

    return Xtr, Ytr, Xval, Yval
get_datasets.py 文件源码 项目:metaqnn 作者: bowenbaker 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def get_cifar10(save_dir=None, root_path=None):
    ''' If root_path is None, we download the data set from internet.

        Either save path or root path must not be None and not both.

        Returns Xtr, Ytr, Xte, Yte as numpy arrays
    '''

    assert((save_dir is not None and root_path is None) or (save_dir is None and root_path is not None))

    if root_path is None:
        print 'Downloading CIFAR10 dataset...'
        tar_path = os.path.join(save_dir, "cifar-10-python.tar.gz")
        url = urllib.URLopener()
        url.retrieve("https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz", tar_path)
        print 'Download Done, Extracting...'
        tar = tarfile.open(tar_path)
        tar.extractall(save_dir)
        tar.close()

    root = os.path.join(save_dir, "cifar-10-batches-py") if not root_path else root_path


    # Training Data
    xs = []
    ys = []
    for b in range(1,6):
        f = os.path.join(root, 'data_batch_%d' % (b, ))
        X, Y = load_CIFAR_batch(f)
        xs.append(X)
        ys.append(Y)
    Xtr = np.concatenate(xs)
    Ytr = np.concatenate(ys)
    print 'Xtrain shape', Xtr.shape
    print 'Ytrain shape', Ytr.shape

    # Testing data
    Xte, Yte = load_CIFAR_batch(os.path.join(root, 'test_batch'))
    print 'Xtest shape', Xte.shape
    print 'Ytest shape', Yte.shape

    return Xtr, Ytr, Xte, Yte
get_datasets.py 文件源码 项目:metaqnn 作者: bowenbaker 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def get_svhn(save_dir=None, root_path=None):
    ''' If root_path is None, we download the data set from internet.

        Either save path or root path must not be None and not both.

        Returns Xtr, Ytr, Xte, Yte as numpy arrays
    '''

    assert((save_dir is not None and root_path is None) or (save_dir is None and root_path is not None))

    if root_path is None:
        new_save_dir = os.path.join(save_dir, 'og_data')
        if not os.path.isdir(new_save_dir):
            os.mkdir(new_save_dir)
        train_mat = os.path.join(new_save_dir, "train_32x32.mat")
        test_mat =  os.path.join(new_save_dir, "test_32x32.mat")
        url = urllib.URLopener()

        print 'Downloading Svhn Train...'
        url.retrieve("http://ufldl.stanford.edu/housenumbers/train_32x32.mat", train_mat)
        print 'Downloading Svhn Test...'
        url.retrieve("http://ufldl.stanford.edu/housenumbers/test_32x32.mat", test_mat)


    root = new_save_dir if not root_path else root_path

    train = io.loadmat(os.path.join(root, 'train_32x32.mat'))
    Xtr = train['X']
    Ytr = train['y']
    del train

    test = io.loadmat(os.path.join(root, 'test_32x32.mat'))
    Xte = test['X']
    Yte = test['y']
    del test

    Xtr = np.transpose(Xtr, (3, 2, 0, 1))
    Xte = np.transpose(Xte, (3, 2, 0, 1))
    Ytr = Ytr.reshape(Ytr.shape[:1]) - 1
    Yte = Yte.reshape(Yte.shape[:1]) - 1

    print 'Xtrain shape', Xtr.shape
    print 'Ytrain shape', Ytr.shape
    print 'Xtest shape', Xte.shape
    print 'Ytest shape', Yte.shape

    return Xtr, Ytr, Xte, Yte
get_datasets.py 文件源码 项目:metaqnn 作者: bowenbaker 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def get_svhn_full(save_dir=None, root_path=None):
    ''' If root_path is None, we download the data set from internet.

        Either save path or root path must not be None and not both.

        Returns Xtr, Ytr, Xte, Yte as numpy arrays
    '''

    assert((save_dir is not None and root_path is None) or (save_dir is None and root_path is not None))

    Xtr_small, Ytr_small, Xte, Yte = get_svhn(save_dir, root_path)

    if root_path is None:
        new_save_dir = os.path.join(save_dir, 'og_data')
        if not os.path.isdir(new_save_dir):
            os.mkdir(new_save_dir)
        extra_mat = os.path.join(new_save_dir, "extra_32x32.mat")
        url = urllib.URLopener()

        print 'Downloading Svhn Extra...'
        url.retrieve("http://ufldl.stanford.edu/housenumbers/extra_32x32.mat", extra_mat)

    root = new_save_dir if not root_path else root_path
    extra = io.loadmat(os.path.join(root, 'extra_32x32.mat'))
    Xtr_extra = extra['X']
    Ytr_extra = extra['y']

    Xtr_extra = np.transpose(Xtr_extra, (3, 2, 0, 1))
    Ytr_extra = Ytr_extra.reshape(Ytr_extra.shape[:1]) - 1

    print 'Xextra shape', Xtr_extra.shape
    print 'Yextra shape', Ytr_extra.shape


    val_x = []
    val_y = []
    train_x = []
    train_y = []
    for i in np.unique(Ytr_small):
        # Get 400 images from X_small
        X_small_label = Xtr_small[Ytr_small == i]
        val_x.append(X_small_label[:400])
        val_y.append([i]*400)
        train_x.append(X_small_label[400:])
        train_y.append([i]*(X_small_label.shape[0] - 400))
        # Get 200 images from X_small
        X_extra_label = Xtr_extra[Ytr_extra == i]
        val_x.append(X_extra_label[:200])
        val_y.append([i]*200)
        train_x.append(X_extra_label[200:])
        train_y.append([i]*(X_extra_label.shape[0] - 200))

    Xtr = np.concatenate(train_x)
    Ytr = np.concatenate(train_y)
    Xval = np.concatenate(val_x)
    Yval = np.concatenate(val_y)

    return Xtr, Ytr, Xval, Yval, Xte, Yte
main.py 文件源码 项目:flickr_downloader 作者: Denisolt 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def download():
    choice = (raw_input('Type "tag" or "album" for corresponding choice. \nDo you want to download images by tag or specific album: '))
    #counter is created in order to label the images when they are downloaded
    counter = 0

    if(choice == 'album'):
        albumID = int(raw_input('Enter the ID of the folder you wish to download: '))
        name = raw_input('Enter the username of the desired users pictures: ')
        # checking if the folder exists, creating a folder and moving into it
        if not os.path.exists(name+'/'+albumID):
            os.makedirs(name+'/'+albumID)
        os.chdir(name+'/'+albumID)

        print('Downloading...')
        # walk_set function loops through the pictures of a specific album
        for photo in flickr.walk_set(albumID):
            # beautiful soup opens up the direct link to the picture using authors id(name) and photo id, specifying sizes/k will
            # result in the highest quality picture available on flickr
            url = 'https://www.flickr.com/photos/'+ name+ '/' + photo.get('id') + '/sizes/k/'
            webpage = requests.get(url)
            soup = BeautifulSoup(webpage.text, 'html.parser')
            x = soup.findAll('img')
            # we read the html using soup and look for img, after which we look for src link and extract it
            for link in soup.find_all('img'):
                new = (link.get('src'))
                if(new.count(".jpg")) == 1:
                    #the link is downloaded using URLopener() and saved with 'photo + counter'
                    testfile = urllib.URLopener()
                    testfile.retrieve(new, 'photo' + str(counter) + '.jpg' )
                    counter = counter + 1

    elif(choice == 'tag'):
        tag = raw_input('Enter the tags(in format:tagName1,tagName2,tagName3 and etc): ')
        # checking if the folder exists, creating a folder and moving into it
        if not os.path.exists(tag):
            os.makedirs(tag)
        os.chdir(tag)
        # checking the total number of available pictures with the specific tag
        total = int(flickr.photos.search(tags=tag).find('photos').attrib['total'])          
        print('There are ' + str(total) + ' pictures found \nDownloading...')
        # walk_set function loops through the pictures with the tag for more info go to flickrapi python documentation
        for photo in flickr.walk(tag_mode='all', tags=tag):
            author =  photo.get('owner') # return the owner of the picture
            # beautiful soup opens up the direct link to the picture using authors id and photos id, specifying sizes/k will
            # result in the highest quality picture available on flickr
            url = 'https://www.flickr.com/photos/'+ author+ '/' + photo.get('id') + '/sizes/k/'
            webpage = requests.get(url)
            soup = BeautifulSoup(webpage.text, 'html.parser')
            x = soup.findAll('img')
            # we read the html using soup and look for img, after which we look for src link and extract it
            for link in soup.find_all('img'):
                new = (link.get('src'))
                if(new.count(".jpg")) == 1:
                    #the link is downloaded using URLopener() and saved with 'photo + counter'
                    testfile = urllib.URLopener()
                    testfile.retrieve(new, 'photo' + str(counter) + '.jpg' )
                    counter = counter + 1
    else:
        print('An Error appeared in your input. ')
        download()


问题


面经


文章

微信
公众号

扫码关注公众号