I'd like to pull the XML of a Google Spreadsheet via a python api, so that I can identify the Worksheet ID within the URL for each of the worksheets within a Google Spreadsheet Workbook. For example, the worksheet id for 'sheet2' of this workbook is '1244369280' , since it's url is https://docs.google.com/spreadsheets/d/1yd8qTYjRns4_OT8PbsZzH0zajvzguKS79dq6j--hnTs/edit#gid=1244369280
The reason I'm interested in pulling the XML is because according to this question, the only way to get the Worksheet ID is to stream down the XML of a worksheet, but the example is in Javascript and I need to do this in Python
This is the Javascript Code that I'd like to execute in Python:
Dim worksheetFeed As WorksheetFeed Dim query As WorksheetQuery Dim worksheet As WorksheetEntry Dim output As New MemoryStream Dim xml As String Dim gid As String = String.Empty Try _service = New Spreadsheets.SpreadsheetsService("ServiceName") _service.setUserCredentials(UserId, Password) query = New WorksheetQuery(feedUrl) worksheetFeed = _service.Query(query) worksheet = worksheetFeed.Entries(0) ' Save worksheet feed to memory stream so we can ' get the xml returned from the feed url and look for ' the gid. Gid allows us to download the specific worksheet tab Using output worksheet.SaveToXml(output) End Using xml = Encoding.ASCII.GetString(output.ToArray()) It seems that the best way to get the XML from a Google Spreadsheet is using Gdata, so I've downloaded GData and tried the Google Spreadsheet example with my credentials.
See below
#!/usr/bin/python # # Copyright (C) 2007 Google Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. __author__ = 'api.laurabeth@gmail.com (Laura Beth Lincoln)' try: from xml.etree import ElementTree except ImportError: from elementtree import ElementTree import gdata.spreadsheet.service import gdata.service import atom.service import gdata.spreadsheet import atom import getopt import sys import string class SimpleCRUD: def __init__(self, email, password): self.gd_client = gdata.spreadsheet.service.SpreadsheetsService() self.gd_client.email = 'chris@curalate.com' self.gd_client.password = 'jkjkdioerzumawya' self.gd_client.source = 'Spreadsheets GData Sample' self.gd_client.ProgrammaticLogin() self.curr_key = '' self.curr_wksht_id = '' self.list_feed = None def _PromptForSpreadsheet(self): # Get the list of spreadsheets feed = self.gd_client.GetSpreadsheetsFeed() self._PrintFeed(feed) input = raw_input('\nSelection: ') id_parts = feed.entry[string.atoi(input)].id.text.split('/') self.curr_key = id_parts[len(id_parts) - 1] def _PromptForWorksheet(self): # Get the list of worksheets feed = self.gd_client.GetWorksheetsFeed(self.curr_key) self._PrintFeed(feed) input = raw_input('\nSelection: ') id_parts = feed.entry[string.atoi(input)].id.text.split('/') self.curr_wksht_id = id_parts[len(id_parts) - 1] def _PromptForCellsAction(self): print ('dump\n' 'update {row} {col} {input_value}\n' '\n') input = raw_input('Command: ') command = input.split(' ', 1) if command[0] == 'dump': self._CellsGetAction() elif command[0] == 'update': parsed = command[1].split(' ', 2) if len(parsed) == 3: self._CellsUpdateAction(parsed[0], parsed[1], parsed[2]) else: self._CellsUpdateAction(parsed[0], parsed[1], '') else: self._InvalidCommandError(input) def _PromptForListAction(self): print ('dump\n' 'insert {row_data} (example: insert label=content)\n' 'update {row_index} {row_data}\n' 'delete {row_index}\n' 'Note: No uppercase letters in column names!\n' '\n') input = raw_input('Command: ') command = input.split(' ' , 1) if command[0] == 'dump': self._ListGetAction() elif command[0] == 'insert': self._ListInsertAction(command[1]) elif command[0] == 'update': parsed = command[1].split(' ', 1) self._ListUpdateAction(parsed[0], parsed[1]) elif command[0] == 'delete': self._ListDeleteAction(command[1]) else: self._InvalidCommandError(input) def _CellsGetAction(self): # Get the feed of cells feed = self.gd_client.GetCellsFeed(self.curr_key, self.curr_wksht_id) self._PrintFeed(feed) def _CellsUpdateAction(self, row, col, inputValue): entry = self.gd_client.UpdateCell(row=row, col=col, inputValue=inputValue, key=self.curr_key, wksht_id=self.curr_wksht_id) if isinstance(entry, gdata.spreadsheet.SpreadsheetsCell): print 'Updated!' def _ListGetAction(self): # Get the list feed self.list_feed = self.gd_client.GetListFeed(self.curr_key, self.curr_wksht_id) self._PrintFeed(self.list_feed) def _ListInsertAction(self, row_data): entry = self.gd_client.InsertRow(self._StringToDictionary(row_data), self.curr_key, self.curr_wksht_id) if isinstance(entry, gdata.spreadsheet.SpreadsheetsList): print 'Inserted!' def _ListUpdateAction(self, index, row_data): self.list_feed = self.gd_client.GetListFeed(self.curr_key, self.curr_wksht_id) entry = self.gd_client.UpdateRow( self.list_feed.entry[string.atoi(index)], self._StringToDictionary(row_data)) if isinstance(entry, gdata.spreadsheet.SpreadsheetsList): print 'Updated!' def _ListDeleteAction(self, index): self.list_feed = self.gd_client.GetListFeed(self.curr_key, self.curr_wksht_id) self.gd_client.DeleteRow(self.list_feed.entry[string.atoi(index)]) print 'Deleted!' def _StringToDictionary(self, row_data): dict = {} for param in row_data.split(): temp = param.split('=') dict[temp[0]] = temp[1] return dict def _PrintFeed(self, feed): for i, entry in enumerate(feed.entry): if isinstance(feed, gdata.spreadsheet.SpreadsheetsCellsFeed): print '%s %s\n' % (entry.title.text, entry.content.text) elif isinstance(feed, gdata.spreadsheet.SpreadsheetsListFeed): print '%s %s %s' % (i, entry.title.text, entry.content.text) # Print this row's value for each column (the custom dictionary is # built using the gsx: elements in the entry.) print 'Contents:' for key in entry.custom: print ' %s: %s' % (key, entry.custom[key].text) print '\n', else: print '%s %s\n' % (i, entry.title.text) def _InvalidCommandError(self, input): print 'Invalid input: %s\n' % (input) def Run(self): self._PromptForSpreadsheet() self._PromptForWorksheet() input = raw_input('cells or list? ') if input == 'cells': while True: self._PromptForCellsAction() elif input == 'list': while True: self._PromptForListAction() def main(): # parse command line options try: opts, args = getopt.getopt(sys.argv[1:], "", ["user=", "pw="]) except getopt.error, msg: print 'python spreadsheetExample.py --user [username] --pw [password] ' sys.exit(2) user = 'fake@gmail.com' pw = 'fakepassword' key = '' # Process options for o, a in opts: if o == "--user": user = a elif o == "--pw": pw = a if user == '' or pw == '': print 'python spreadsheetExample.py --user [username] --pw [password] ' sys.exit(2) sample = SimpleCRUD(user, pw) sample.Run() if __name__ == '__main__': main() However this returns the following error:
Traceback (most recent call last): File "/Users/Chris/Desktop/gdata_test.py", line 200, in <module> main() File "/Users/Chris/Desktop/gdata_test.py", line 196, in main sample.Run() File "/Users/Chris/Desktop/gdata_test.py", line 162, in Run self._PromptForSpreadsheet() File "/Users/Chris/Desktop/gdata_test.py", line 49, in _PromptForSpreadsheet feed = self.gd_client.GetSpreadsheetsFeed() File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/gdata/spreadsheet/service.py", line 99, in GetSpreadsheetsFeed converter=gdata.spreadsheet.SpreadsheetsSpreadsheetsFeedFromString) File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/gdata/service.py", line 1074, in Get return converter(result_body) File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/gdata/spreadsheet/__init__.py", line 395, in SpreadsheetsSpreadsheetsFeedFromString xml_string) File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/atom/__init__.py", line 93, in optional_warn_function return f(*args, **kwargs) File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/atom/__init__.py", line 127, in CreateClassFromXMLString tree = ElementTree.fromstring(xml_string) File "<string>", line 125, in XML cElementTree.ParseError: no element found: line 1, column 0 [Finished in 0.3s with exit code 1] [shell_cmd: python -u "/Users/Chris/Desktop/gdata_test.py"] [dir: /Users/Chris/Desktop] [path: /usr/bin:/bin:/usr/sbin:/sbin] I should also mention that I've been using Gspread as a method to interact with Google Spreadsheets, but when I run the below code, I get the gid, but I need to have the worksheet id.
gc = gspread.authorize(credentials) sh = gc.open_by_url('google_spreadsheet_url') sh.get_id_fields() >> {'spreadsheet_id': '1BgCEn-3Nor7UxOEPwD-qv8qXe7CaveJBrn9_Lcpo4W4','worksheet_id': 'oqitk0d'}
No comments:
Post a Comment