tweetdata.py 文件源码

python
阅读 30 收藏 0 点赞 0 评论 0

项目:tweetfeels 作者: uclatommy 项目源码 文件源码
def fetchbin(self, start=None, end=None, binsize=timedelta(seconds=60),
                 empty=False):
        """
        Returns a generator that can be used to iterate over the tweet data
        based on ``binsize``.

        :param start: Query start date.
        :type start: datetime
        :param end: Query end date.
        :type end: datetime
        :param binsize: Time duration for each bin for tweet grouping.
        :type binsize: timedelta
        :param empty: Determines whether empty dataframes will be yielded.
        :type empty: boolean
        :returns: A dataframe along with time boundaries for the data.
        :rtype: tuple
        """
        second = timedelta(seconds=1)
        if start is None: start=self.start-second
        if end is None: end=self.end
        if start == self.start: start = start-second
        df = self.tweet_dates
        df = df.groupby(pd.TimeGrouper(freq=f'{int(binsize/second)}S')).size()
        df = df[df.index > start - binsize]
        if not empty: df = df[df != 0]
        conn = sqlite3.connect(self._db, detect_types=sqlite3.PARSE_DECLTYPES)
        c = conn.cursor()
        c.execute(
            "SELECT * FROM tweets WHERE created_at > ? AND created_at <= ?",
            (start, end)
            )
        for i in range(0,len(df)):
            frame = []
            if df.iloc[i] > 0:
                frame = pd.DataFrame.from_records(
                    data=c.fetchmany(df.iloc[i]), columns=self.fields,
                    index='created_at'
                    )
            left = df.index[i].to_pydatetime()
            right = left + binsize
            if len(frame)>0 or empty: yield TweetBin(frame, left, right)
        c.close()
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号